diff --git a/.agents/skills/systematic-qa/SKILL.md b/.agents/skills/systematic-qa/SKILL.md new file mode 100644 index 000000000..1c5ef4794 --- /dev/null +++ b/.agents/skills/systematic-qa/SKILL.md @@ -0,0 +1,65 @@ +--- +name: systematic-qa +description: Executes full-project QA like a real user by discovering the repository verification contract, running build, lint, test, and startup commands, exercising core workflows end-to-end, creating realistic fixtures when needed, fixing root-cause regressions, and rerunning the full gate. Use when validating a branch, release candidate, migration, refactor, or risky commit. Do not use for static code review only, one-off unit test edits, or architecture brainstorming without execution. +--- + +# Systematic Project QA + +## Procedures + +**Step 1: Discover the Repository QA Contract** + +1. Read root instructions, repository docs, and CI/build files before running commands. +2. Execute `python3 scripts/discover-project-contract.py --root .` to surface candidate install, verify, build, test, lint, and start commands. +3. Prefer repository-defined umbrella commands such as `make verify`, `just verify`, or CI entrypoints over language-default commands. +4. Read `references/project-signals.md` when command ownership is ambiguous or when multiple ecosystems are present. +5. Identify the changed surface and the regression-critical surface before choosing scenarios. +6. Choose a QA artifact location using repository conventions. If the repository has no QA artifact convention, store scratch artifacts under `/tmp/codex-qa-`. + +**Step 2: Define the QA Scope** + +1. Build a short execution matrix covering baseline verification, changed workflows, and unchanged business-critical workflows. +2. Read `references/checklist.md` and ensure every required category has a planned validation. +3. Prefer public entry points such as CLI commands, HTTP endpoints, browser flows, worker jobs, and documented setup commands over internal test helpers. +4. Create the smallest realistic fixture or fake project needed to exercise the workflow when the repository does not already include one. +5. Treat mocks as a local unit-test boundary only. Do not use mocks or stubs as final proof that a user flow works. + +**Step 3: Establish the Baseline** + +1. Install dependencies with the repository-preferred command before testing runtime flows. +2. Run the canonical verification gate once before scenario testing to establish baseline health. +3. If the baseline fails, read the first failing output carefully and determine whether it is pre-existing or introduced by current work before moving on. +4. Start services in the closest supported production-like mode and confirm readiness through observable signals such as health checks, startup logs, or successful handshakes. + +**Step 4: Execute User-Like Flows** + +1. Drive workflows through the same interfaces a real operator or user would use. +2. Capture the exact command, input, and observable result for each scenario. +3. Validate changed features first, then validate at least one regression-critical flow outside the changed surface. +4. Exercise live integrations when credentials and local prerequisites exist. When they do not, validate every reachable local boundary and record the blocked live step explicitly. +5. Re-run the scenario from a clean state when the first attempt leaves the environment ambiguous. + +**Step 5: Diagnose and Fix Regressions** + +1. Reproduce each failure consistently before proposing a fix. +2. Activate companion debugging and test-hygiene skills when available, especially root-cause debugging and anti-workaround guidance. +3. Add or update the narrowest regression test that proves the bug when the repository supports automated coverage for that surface. +4. Fix production code or real configuration at the source of the failure. Do not weaken tests to match broken behavior. +5. Re-run the narrow reproduction, the impacted scenario, and the baseline gate after each fix. +6. Use `assets/issue-template.md` when the user wants persisted issue files or when the repository already has a QA issue convention. + +**Step 6: Verify the Final State** + +1. Re-run the full repository verification gate from scratch after the last code change. +2. Re-run the most important user-like scenarios after the full gate passes. +3. Summarize the evidence using `assets/verification-report-template.md`. +4. Report blocked scenarios, missing credentials, or environment gaps with the exact command or prerequisite that stopped execution. +5. Do not claim completion without fresh verification evidence from the current state of the repository. + +## Error Handling + +- If command discovery returns multiple plausible gates, prefer the broadest repository-defined command and explain the tie-breaker. +- If no canonical verify command exists, read `references/project-signals.md`, choose the broadest safe install, lint, test, and build commands for the detected ecosystem, and state that assumption explicitly. +- If a required live dependency is unavailable, validate every local boundary that does not require the missing dependency and report the blocked live validation separately. +- If a workflow requires data or services absent from the repository, create the smallest realistic fixture outside the main source tree unless the repository has its own fixture convention. +- If a failure appears unrelated to the requested change, prove that with a clean reproduction before excluding it from the QA scope. diff --git a/.agents/skills/systematic-qa/assets/issue-template.md b/.agents/skills/systematic-qa/assets/issue-template.md new file mode 100644 index 000000000..6bc1fd00e --- /dev/null +++ b/.agents/skills/systematic-qa/assets/issue-template.md @@ -0,0 +1,32 @@ +# Issue : + +## Summary + + + +## Reproduction + +```bash + +``` + +Observed before the fix: + +- + +## Expected + + + +## Root cause + + + +## Fix + + + +## Verification + +- +- diff --git a/.agents/skills/systematic-qa/assets/verification-report-template.md b/.agents/skills/systematic-qa/assets/verification-report-template.md new file mode 100644 index 000000000..90ca5bfca --- /dev/null +++ b/.agents/skills/systematic-qa/assets/verification-report-template.md @@ -0,0 +1,10 @@ +VERIFICATION REPORT +------------------- +Claim: +Command: `` +Executed: +Exit code: <0 or non-zero> +Output summary: +Warnings: +Errors: +Verdict: PASS or FAIL diff --git a/.agents/skills/systematic-qa/references/checklist.md b/.agents/skills/systematic-qa/references/checklist.md new file mode 100644 index 000000000..956ccb81e --- /dev/null +++ b/.agents/skills/systematic-qa/references/checklist.md @@ -0,0 +1,36 @@ +# Systematic Project QA Checklist + +Mark every item as complete before claiming the QA pass is done. + +## Contract Discovery + +- [ ] Root instructions and repository docs were read +- [ ] The canonical verify gate was identified or an explicit fallback was chosen +- [ ] The changed surface and regression-critical surface were identified + +## Baseline + +- [ ] Dependencies were installed with the repository-preferred command +- [ ] The baseline verification gate was run before scenario testing +- [ ] Any pre-existing failures were isolated with evidence + +## User-Like Validation + +- [ ] Changed workflows were exercised through public interfaces +- [ ] At least one unchanged regression-critical workflow was exercised +- [ ] Runtime readiness was confirmed with observable signals +- [ ] Fixtures or fake projects were realistic and minimal + +## Regression Handling + +- [ ] Every failure was reproduced before fixing +- [ ] Root cause was identified before implementation +- [ ] Regression coverage was added or updated when the repository supported it +- [ ] The narrow repro and impacted flows were rerun after each fix + +## Final Verification + +- [ ] The full verification gate was rerun after the last code change +- [ ] The most important user-like flows were rerun after the final gate +- [ ] A verification report was produced from fresh evidence +- [ ] Blocked scenarios or missing prerequisites were disclosed explicitly diff --git a/.agents/skills/systematic-qa/references/project-signals.md b/.agents/skills/systematic-qa/references/project-signals.md new file mode 100644 index 000000000..bf90f0c15 --- /dev/null +++ b/.agents/skills/systematic-qa/references/project-signals.md @@ -0,0 +1,57 @@ +# Project Signal Guide + +Use this guide when repository instructions do not already define the canonical QA contract. + +## Priority Order + +1. Root instructions such as `AGENTS.md`, `CLAUDE.md`, or repository-specific agent docs +2. Dedicated umbrella commands in `Makefile`, `Justfile`, task runners, or CI wrapper scripts +3. CI workflows under `.github/workflows/` +4. Ecosystem-native manifests such as `package.json`, `go.mod`, `pyproject.toml`, or `Cargo.toml` +5. Language-default commands as a last resort + +## Common Signals + +### Makefile or Justfile + +Treat `verify`, `check`, `ci`, `test`, `lint`, `build`, `start`, `run`, and `dev` as high-confidence targets. + +### package.json + +Prefer explicit scripts in this order: + +1. `verify`, `check`, `ci` +2. `test`, `test:ci`, `test:e2e`, `test:integration` +3. `lint`, `typecheck` +4. `build` +5. `start`, `dev`, `serve`, `preview` + +### Go modules + +If no umbrella command exists, treat `go test ./...`, `go build ./...`, and repository formatting/lint commands as the minimum baseline. Prefer repository wrappers over direct Go commands when both exist. + +### Python projects + +Look for `pytest`, `tox`, `nox`, `ruff`, `mypy`, `python -m build`, and any scripts declared in `pyproject.toml`. + +### Rust projects + +Treat `cargo test`, `cargo build`, `cargo fmt --check`, and `cargo clippy --all-targets --all-features -- -D warnings` as strong defaults when the repository does not define wrappers. + +### Mixed Repositories + +When multiple ecosystems exist, identify the product entrypoint first. Do not assume every manifest is part of the same runtime surface. + +## Scenario Selection Rules + +Always cover: + +1. A baseline verification gate +2. The workflows directly touched by the change +3. At least one adjacent regression-critical workflow +4. Startup or readiness if the change can affect bootstrapping +5. A realistic fixture path if the feature consumes external projects, repos, files, or APIs + +## Evidence Rules + +Capture exact commands, inputs, outputs, and artifact paths. Prefer observable outcomes over interpretation. diff --git a/.agents/skills/systematic-qa/scripts/discover-project-contract.py b/.agents/skills/systematic-qa/scripts/discover-project-contract.py new file mode 100755 index 000000000..962e8244b --- /dev/null +++ b/.agents/skills/systematic-qa/scripts/discover-project-contract.py @@ -0,0 +1,195 @@ +#!/usr/bin/env python3 + +import argparse +import json +import re +from pathlib import Path + +try: + import tomllib +except ModuleNotFoundError: # pragma: no cover + tomllib = None + + +MAKEFILE_TARGETS = { + "install": ["install", "deps", "setup", "bootstrap"], + "verify": ["verify", "check", "ci"], + "lint": ["lint", "fmt", "format"], + "test": ["test", "unit", "integration", "e2e"], + "build": ["build", "compile"], + "start": ["start", "run", "dev", "serve"], +} + +PACKAGE_JSON_TARGETS = { + "install": [], + "verify": ["verify", "check", "ci"], + "lint": ["lint", "lint:ci", "typecheck", "format:check"], + "test": ["test", "test:ci", "test:unit", "test:integration", "test:e2e"], + "build": ["build"], + "start": ["start", "dev", "serve", "preview"], +} + + +def read_text(path: Path) -> str: + return path.read_text(encoding="utf-8") + + +def add_command(result: dict, category: str, command: str) -> None: + commands = result["commands"][category] + if command not in commands: + commands.append(command) + + +def add_signal(result: dict, signal: str) -> None: + if signal not in result["signals"]: + result["signals"].append(signal) + + +def parse_makefile(path: Path, runner: str, result: dict) -> None: + add_signal(result, path.name) + targets = [] + for line in read_text(path).splitlines(): + match = re.match(r"^([A-Za-z0-9_.-]+):(?:\s|$)", line) + if not match: + continue + target = match.group(1) + if target.startswith("."): + continue + targets.append(target) + + for category, preferred in MAKEFILE_TARGETS.items(): + for target in preferred: + if target in targets: + add_command(result, category, f"{runner} {target}") + + +def parse_package_json(path: Path, result: dict) -> None: + add_signal(result, path.name) + payload = json.loads(read_text(path)) + scripts = payload.get("scripts", {}) + if not isinstance(scripts, dict): + return + + if (path.parent / "package-lock.json").exists(): + add_command(result, "install", "npm ci") + elif (path.parent / "pnpm-lock.yaml").exists(): + add_command(result, "install", "pnpm install --frozen-lockfile") + elif (path.parent / "yarn.lock").exists(): + add_command(result, "install", "yarn install --frozen-lockfile") + else: + add_command(result, "install", "npm install") + + for category, preferred in PACKAGE_JSON_TARGETS.items(): + for target in preferred: + if target not in scripts: + continue + if target == "test": + add_command(result, category, "npm test") + elif target == "start": + add_command(result, category, "npm start") + else: + add_command(result, category, f"npm run {target}") + + +def parse_go_mod(path: Path, result: dict) -> None: + add_signal(result, path.name) + add_command(result, "install", "go mod download") + add_command(result, "test", "go test ./...") + add_command(result, "build", "go build ./...") + + +def parse_cargo_toml(path: Path, result: dict) -> None: + add_signal(result, path.name) + add_command(result, "install", "cargo fetch") + add_command(result, "verify", "cargo test && cargo build") + add_command(result, "lint", "cargo fmt --check") + add_command(result, "lint", "cargo clippy --all-targets --all-features -- -D warnings") + add_command(result, "test", "cargo test") + add_command(result, "build", "cargo build") + + +def parse_pyproject(path: Path, result: dict) -> None: + add_signal(result, path.name) + data = {} + if tomllib is not None: + data = tomllib.loads(read_text(path)) + + if (path.parent / "poetry.lock").exists(): + add_command(result, "install", "poetry install") + elif (path.parent / "uv.lock").exists(): + add_command(result, "install", "uv sync") + elif (path.parent / "requirements.txt").exists(): + add_command(result, "install", "python3 -m pip install -r requirements.txt") + + tool = data.get("tool", {}) if isinstance(data, dict) else {} + if "pytest" in tool or "pytest.ini_options" in tool.get("pytest", {}): + add_command(result, "test", "pytest") + else: + add_command(result, "test", "pytest") + + if "ruff" in tool: + add_command(result, "lint", "ruff check .") + if "black" in tool: + add_command(result, "lint", "black --check .") + if "mypy" in tool: + add_command(result, "lint", "mypy .") + + if "build-system" in data: + add_command(result, "build", "python3 -m build") + + +def collect_ci_signal(root: Path, result: dict) -> None: + workflows = root / ".github" / "workflows" + if not workflows.exists(): + return + files = sorted(p.name for p in workflows.iterdir() if p.is_file()) + if files: + add_signal(result, ".github/workflows") + + +def build_result(root: Path) -> dict: + result = { + "root": str(root.resolve()), + "signals": [], + "commands": { + "install": [], + "verify": [], + "lint": [], + "test": [], + "build": [], + "start": [], + }, + "notes": [ + "Prefer repository-defined umbrella commands over ecosystem defaults.", + "Treat every discovered command as a candidate until repository instructions or CI confirm ownership.", + ], + } + + if (root / "Makefile").exists(): + parse_makefile(root / "Makefile", "make", result) + if (root / "Justfile").exists(): + parse_makefile(root / "Justfile", "just", result) + if (root / "package.json").exists(): + parse_package_json(root / "package.json", result) + if (root / "go.mod").exists(): + parse_go_mod(root / "go.mod", result) + if (root / "Cargo.toml").exists(): + parse_cargo_toml(root / "Cargo.toml", result) + if (root / "pyproject.toml").exists(): + parse_pyproject(root / "pyproject.toml", result) + + collect_ci_signal(root, result) + return result + + +def main() -> None: + parser = argparse.ArgumentParser(description="Discover candidate QA commands for a repository.") + parser.add_argument("--root", default=".", help="Repository root to inspect.") + args = parser.parse_args() + root = Path(args.root).resolve() + result = build_result(root) + print(json.dumps(result, indent=2, sort_keys=True)) + + +if __name__ == "__main__": + main() diff --git a/.compozy/tasks/ext-ideas/research/analysis_claude_code.md b/.compozy/tasks/ext-ideas/research/analysis_claude_code.md deleted file mode 100644 index c937ed697..000000000 --- a/.compozy/tasks/ext-ideas/research/analysis_claude_code.md +++ /dev/null @@ -1,301 +0,0 @@ -# Claude Code Extensibility Ecosystem -- Research Analysis for AGH - -**Date:** 2026-04-11 -**Scope:** MCP servers, hooks, skills/commands, plugins, Agent SDK patterns, CLAUDE.md conventions, workflow automations -**Purpose:** Identify concrete extension ideas adaptable to AGH's three-dimensional extension model (Resources, Capabilities, Actions) - ---- - -## Overview of Findings - -Claude Code has evolved from a standalone CLI agent into a full extensible platform with a maturing plugin marketplace (101 official plugins, thousands of community skills). The ecosystem is organized around five extension axes: - -1. **MCP Servers** -- external tool connections via the Model Context Protocol (3,000+ integrations, 251+ vendor-verified) -2. **Hooks** -- lifecycle event callbacks (12 events: PreToolUse, PostToolUse, UserPromptSubmit, Stop, SessionStart, Notification, Setup, Elicitation, ElicitationResult, PostCompact, PermissionDenied, PostToolUseFailure) -3. **Skills & Commands** -- reusable slash-command instructions (`.claude/skills/*/SKILL.md` or `.claude/commands/*.md`) -4. **Plugins** -- bundled packages of skills + hooks + MCP servers + commands (official marketplace with `claude-plugins-official` repo) -5. **Agent SDK** -- programmatic agent building in Python/TypeScript with subagent orchestration, hooks, and tool control - -The most impactful patterns for AGH are: hook-based policy enforcement, MCP-driven tool federation, skill-as-instruction files, multi-agent delegation, and classifier-based permission gating. - ---- - -## Extension Catalog - -### MCP Servers (Resources: MCP) - -| Name | Category | Description | AGH Mapping | -|------|----------|-------------|-------------| -| **GitHub MCP** | DevOps | Full repo management: PRs, issues, code search, branches, commits via API | Resource: MCP server; Capability: agent.driver integration for PR-driven workflows | -| **Filesystem MCP** | Core | Read/write/organize local files with configurable access boundaries | Resource: MCP server (AGH already has file access; useful as a sandboxed alternative) | -| **PostgreSQL MCP** | Database | Natural language database queries and schema inspection | Resource: MCP server; Capability: memory.backend alternative | -| **Playwright MCP** | Testing | Browser automation, E2E testing, screenshot capture, UI interaction | Resource: MCP server; Action: session-level test execution | -| **Memory MCP** | Persistence | Persistent knowledge graph across sessions | Capability: memory.backend; maps directly to AGH's memory layer | -| **Notion MCP** | Productivity | Read/write Notion pages, databases, blocks | Resource: MCP server for knowledge management | -| **Figma MCP** | Design | Read Figma frames/components, design-to-code pipeline | Resource: MCP server; Capability: prompt.provider (design context) | -| **Brave Search MCP** | Research | Privacy-first web search with source citation | Resource: MCP server; Action: observe queries | -| **Supabase MCP** | Backend | Database, auth, edge functions, storage integration | Resource: MCP server | -| **Sequential Thinking MCP** | Reasoning | Enhanced problem-solving via structured thinking steps | Capability: message.transform (reasoning augmentation) | -| **Sentry MCP** | Monitoring | Real-time error fetching, debugging, report creation | Resource: MCP server; Capability: observe.exporter | -| **Linear MCP** | Project Mgmt | Create/update/query issues, sprint management | Resource: MCP server | -| **Slack MCP** | Communication | Send messages, search history, manage channels | Resource: MCP server; Action: notification fan-out | -| **Jira MCP** | Project Mgmt | JQL search, status transitions, comments, ticket creation | Resource: MCP server | -| **Neon MCP** | Database | Serverless Postgres with branching, migrations, query tuning | Resource: MCP server; Capability: memory.backend | - -### Hooks (Capabilities: permission.gate, content.validate, message.transform) - -| Hook / Pattern | Category | Description | AGH Mapping | -|----------------|----------|-------------|-------------| -| **Dangerous command blocker** | Security | PreToolUse on Bash: block `rm -rf`, `DROP TABLE`, force-push commands | Capability: permission.gate | -| **Sensitive file protector** | Security | PreToolUse on Edit/Write: block changes to `.env`, `package-lock.json`, `.git/` | Capability: permission.gate | -| **Auto-formatter** | Quality | PostToolUse on Edit: run Prettier/Black/gofmt after every file edit | Capability: content.validate (post-action) | -| **Auto-test runner** | Quality | PostToolUse on Edit: run test suite on modified files for instant regression feedback | Capability: content.validate | -| **Auto-commit agent work** | DevOps | PostToolUse on Edit: create micro-commits to track agent changes | Action: session event recording | -| **Prompt logger** | Observability | UserPromptSubmit: log every prompt with timestamp to audit file | Capability: observe.exporter | -| **Context injector** | Augmentation | UserPromptSubmit: inject project context, environment info, or relevant docs before processing | Capability: prompt.provider | -| **Tool input modifier** | Transform | PreToolUse (v2.0.10+): transparently modify tool inputs (add dry-run flags, redact secrets) | Capability: message.transform | -| **Permission classifier** | Gating | Transcript classifier (Sonnet 4.6) evaluates tool calls against natural-language rules | Capability: permission.gate (AI-based) | -| **PermissionDenied retry** | Recovery | PermissionDenied hook: retry with modified parameters or defer decision | Capability: permission.gate | -| **PostCompact context preserver** | Memory | PostCompact: ensure critical context survives summarization | Capability: memory.backend | -| **Setup initialization** | Lifecycle | Setup hook: run maintenance scripts, environment checks on session init | Action: session lifecycle | - -### Skills & Commands (Resources: skills) - -| Skill / Command | Category | Description | AGH Mapping | -|-----------------|----------|-------------|-------------| -| **Frontend Design (Anthropic)** | UI | Design system + philosophy injection; bold aesthetics, typography, animations (277K installs) | Resource: skill; Capability: prompt.provider | -| **Taste** | UI | Collection improving AI frontend code quality (6.9K stars) | Resource: skill | -| **Apple HIG Designer** | UI | Interfaces following Apple Human Interface Guidelines | Resource: skill; Capability: prompt.provider | -| **Shannon (AI Pen Testing)** | Security | Autonomous pen testing, 96% exploit success rate, 50+ vulnerability types | Resource: skill; Capability: content.validate | -| **VibeSec** | Security | Secure code patterns and vulnerability prevention (496 stars) | Resource: skill | -| **Skill-Threat-Modeling** | Security | STRIDE threat modeling and security review workflows | Resource: skill | -| **Code Review** | Quality | Structured review: security, performance, style violations | Resource: skill; Capability: content.validate | -| **Test Planner/Executor** | Testing | Risk-based test scenario creation (E2E, integration, unit) + execution | Resource: skill | -| **Commit Helper** | DevOps | Conventional commits, co-author tags, force-push prevention | Resource: skill; hook integration | -| **Project Bootstrap** | Scaffolding | New project scaffolding with preferred stack, linting, CI config | Resource: skill | -| **cc-devops-skills** | DevOps | Comprehensive DevOps skill set: deploy, infrastructure, monitoring | Resource: skill | -| **Ship command** | Workflow | Review diff, run tests, commit, push -- all in one `/ship` | Resource: skill (compound workflow) | -| **Valyu (Research)** | Research | Web search + 36 specialized data sources (SEC, PubMed, FRED, etc.) | Resource: MCP + skill; Capability: prompt.provider | - -### Plugins (Resources: bundled packages) - -| Plugin | Category | Description | AGH Mapping | -|--------|----------|-------------|-------------| -| **Security Guidance (Anthropic)** | Security | Scans file edits for vulnerabilities before execution; blocks + explains | Resource: hook + skill bundle; Capability: content.validate, permission.gate | -| **Local-Review** | Quality | 5 parallel review agents, scores issues, only flags 80+ severity | Resource: agent orchestration; Action: multi-session coordination | -| **Superpowers** | Workflow | Structured lifecycle planning + skills for brainstorming, TDD, debugging, review | Resource: skill bundle | -| **Shipyard** | DevOps | Lifecycle mgmt + IaC validation (Terraform, Ansible, Docker, K8s, CloudFormation) + auditor agent | Resource: skill + agent bundle | -| **Claude-Mem** | Memory | Action capture, compression, context injection via SQLite + Chroma vector search | Capability: memory.backend; Resource: MCP | -| **Ralph Wiggum** | Testing | Visual testing by driving Xcode simulator for Swift apps | Resource: skill + MCP | -| **Figma Plugin** | Design | Read Figma files, generate code from frames/components | Resource: MCP + skill bundle | -| **Language Servers (12)** | IDE | Real-time code intelligence for specific programming languages | Capability: prompt.provider (context enrichment) | -| **Feature-dev (Anthropic)** | Workflow | Guided feature development workflow | Resource: skill | -| **Commit-commands (Anthropic)** | DevOps | Standardized commit workflows | Resource: skill | - -### Agent SDK Patterns (Actions: Host API) - -| Pattern | Category | Description | AGH Mapping | -|---------|----------|-------------|-------------| -| **Subagent delegation** | Orchestration | Spawn specialized child agents with own context window and tool access | Action: session spawning via Host API | -| **Explore-Plan-Act** | Workflow | Sequential three-phase loop with escalating permissions | Action: session state machine (maps to AGH session lifecycle) | -| **Operator/Orchestrator** | Coordination | Central agent decomposes tasks, delegates to specialized sub-agents, synthesizes results | Action: multi-session coordination via Host API | -| **Split-and-Merge** | Parallelism | Multiple agents in isolated git worktrees working in parallel, merge results | Action: parallel session management | -| **Custom agents via Markdown** | Configuration | `.claude/agents/*.md` with YAML frontmatter defining name, tools, model, system prompt | Resource: agent definition (maps directly to AGH agent config) | -| **Research pipeline** | Workflow | Explore subagents gather info, then act on aggregated results | Action: session chaining | -| **Tool allowlist/blocklist** | Security | `allowed_tools` / `disallowed_tools` for fine-grained tool access per agent | Capability: permission.gate | -| **Context compaction** | Memory | Auto-summarize when context limit approaches, preserve critical info | Capability: memory.backend; Action: observe (context health metrics) | - -### CLAUDE.md / Configuration Patterns (Resources: skills; Capability: prompt.provider) - -| Pattern | Category | Description | AGH Mapping | -|---------|----------|-------------|-------------| -| **Hierarchical config files** | Config | Root CLAUDE.md + subdirectory overrides; auto-loaded based on working context | Resource: skill loading by workspace path | -| **Path-scoped rules** | Config | YAML frontmatter restricts rule activation to matching directories | Capability: prompt.provider (context-aware) | -| **Compaction instructions** | Memory | "When compacting, always preserve X" directives in CLAUDE.md | Capability: memory.backend (consolidation rules) | -| **Auto-memory (MEMORY.md)** | Memory | Agent auto-detects patterns and writes own notes (v2.1.32+) | Capability: memory.backend (maps to AGH dream/consolidation) | -| **Hook-enforced rules** | Governance | Critical rules as hooks (100% enforcement) vs. CLAUDE.md instructions (~70% compliance) | Capability: permission.gate vs. prompt.provider | -| **Custom command files** | Workflow | `.claude/commands/*.md` becoming slash commands with shell execution | Resource: skill with action execution | - ---- - -## Detailed Analysis of High-Impact Extensions - -### 1. Hook-Based Policy Enforcement (PreToolUse) - -**What it does:** Intercepts every tool call before execution. Inspects the tool name, arguments, and context. Can approve, deny (exit code 2), or modify the call. The most powerful control mechanism in Claude Code. - -**Why it matters:** CLAUDE.md instructions achieve ~70% compliance. Hooks achieve 100%. For security-critical rules (no force push, no production data deletion, no secrets in commits), this gap is unacceptable. - -**AGH mapping:** This maps directly to AGH's `permission.gate` capability. AGH should implement a PreToolUse hook system where: -- Hooks are registered per-agent or per-workspace -- Each hook receives the tool call as structured input (tool name, arguments, session context) -- Hooks return allow/deny/modify decisions -- Hooks can be shell scripts, Go plugins, or HTTP endpoints -- Multiple hooks chain with configurable precedence - -**Key insight from Claude Code:** The three-tier handler system (Command hooks for simple checks, Prompt hooks for semantic evaluation, Agent hooks for deep analysis) is a powerful graduated model. AGH could adopt this with shell-based hooks for speed and agent-based hooks for complex policy decisions. - -### 2. MCP Server Federation - -**What it does:** Connects Claude Code to 3,000+ external tools via a standardized protocol. Each MCP server is a subprocess exposing tools, resources, and prompts over JSON-RPC. Claude Code discovers tools on-demand via Tool Search (lazy loading), reducing context consumption by ~95%. - -**Why it matters:** No single agent can have all tools built in. MCP makes the tool surface area effectively infinite while keeping the runtime lean. - -**AGH mapping:** AGH already supports MCP as a resource type. Key lessons from Claude Code's implementation: -- **Lazy tool discovery** is essential at scale (10+ servers). AGH should implement tool search / on-demand schema loading rather than dumping all tool definitions into agent context. -- **Three scope levels** (user/local/project) map to AGH's global/workspace scoping. Add a `.mcp.json` project-level config for team-shared MCP servers. -- **Skills + MCP composition**: Claude Code skills can orchestrate MCP tools into workflows. AGH's skill system should support MCP tool references in skill definitions. - -### 3. Skills as Instruction Files - -**What it does:** A `SKILL.md` file with YAML frontmatter (name, description, trigger conditions) + markdown body (instructions Claude follows). No compilation, no build step. Skills load on-demand via slash commands or auto-detection based on task context. - -**Why it matters:** This is the lowest-friction extension mechanism. Anyone who can write markdown can create a skill. It democratizes agent customization. - -**AGH mapping:** AGH's skill system should adopt this pattern: -- Skills are markdown files with frontmatter metadata -- Stored in `~/.agh/skills/` (global) or `.agh/skills/` (workspace) -- Auto-discovered and lazy-loaded based on task context -- Can reference other skills, MCP tools, and hooks -- Budget-capped (1% of context window, ~8K chars fallback) to prevent context bloat -- Keep skills under 500 words / 2K tokens for optimal performance - -### 4. Multi-Agent Orchestration (Subagents) - -**What it does:** The operator pattern decomposes complex tasks and delegates to specialized sub-agents, each with their own context window, tool access, and instructions. Sub-agents can run in parallel in isolated git worktrees. - -**Why it matters:** Single-agent context windows are finite. Complex tasks (refactor + test + review + deploy) benefit from specialized agents that don't pollute each other's context. - -**AGH mapping:** This is core to AGH's architecture. Key patterns to adopt: -- **Custom agents via markdown** (`.claude/agents/*.md`): AGH already has agent definitions in TOML config. Extend to support workspace-level agent definitions in markdown for quick customization. -- **Split-and-merge in worktrees**: AGH should support spawning sessions in isolated worktrees with automatic branch management and merge coordination. -- **Explore-Plan-Act lifecycle**: Map to AGH's session state machine. Three phases with escalating tool permissions. - -### 5. Classifier-Based Permission Gating - -**What it does:** A fast AI classifier (running on a smaller model) evaluates each tool call against natural-language rules before execution. Two-stage: fast single-token filter, then chain-of-thought only if flagged. Rules are written in prose, not regex. - -**Why it matters:** Traditional permission systems use regex or glob patterns. Prose rules ("don't modify infrastructure files unless the user explicitly asked for infrastructure changes") capture intent that patterns cannot. - -**AGH mapping:** This is a sophisticated `permission.gate` capability: -- Use a smaller/faster model as a classifier for tool call evaluation -- Rules defined in natural language in config -- Two-stage evaluation for performance (fast filter + deep reasoning) -- Configurable per-agent and per-workspace -- Precedence: deny rules > allow exceptions > explicit user intent - -### 6. Plugin Marketplace Model - -**What it does:** Plugins bundle skills + hooks + MCP servers + commands into installable packages. Official marketplace (`claude-plugins-official`) with 101 plugins, plus community marketplaces. Install via `/plugin install name@registry`. - -**Why it matters:** Individual skills and hooks are useful but fragmented. Plugins provide complete, tested workflows. The marketplace model enables distribution and discovery. - -**AGH mapping:** AGH should plan for a plugin/extension registry: -- Extensions bundle: agent definitions, skills, hooks, MCP server configs -- Registry format: Git repos with standardized manifest files -- Install via CLI: `agh plugin install name@registry` -- Scope control: user-level vs. workspace-level installation -- Enterprise: managed registries with approval workflows - -### 7. Auto-Memory and Dream Consolidation - -**What it does:** Claude Code v2.1.32 auto-generates MEMORY.md by observing user patterns, preferences, and project conventions. This is separate from CLAUDE.md (human-written project docs). The Claude-Mem plugin adds SQLite + Chroma vector search for hybrid memory retrieval. - -**Why it matters:** Memory that builds itself from observation is more complete and current than manually maintained docs. Vector search enables semantic retrieval of relevant context. - -**AGH mapping:** This maps directly to AGH's memory and dream consolidation layers: -- Auto-memory: AGH's observe layer already captures events. The consolidation/dream system should synthesize these into persistent memory entries. -- Dual-scope: global memory (user preferences) + workspace memory (project conventions) -- AGH already has this. -- Hybrid retrieval: keyword + vector search over consolidated memories. -- Compaction rules: configurable instructions for what to preserve during context compaction. - ---- - -## Key Takeaways for AGH Extension Ideas - -### High-Priority Extensions to Build - -1. **Hook pipeline with PreToolUse/PostToolUse** -- The single highest-impact extension mechanism. Three handler tiers (command/prompt/agent) provide graduated complexity. Essential for permission.gate and content.validate capabilities. - -2. **Lazy MCP tool discovery** -- As AGH connects to more MCP servers, eager tool loading will bloat agent context. Implement on-demand tool search and schema fetching. - -3. **Skill files with auto-discovery** -- Markdown-based skill definitions with YAML frontmatter. Lowest friction for users. Budget-capped context injection. - -4. **Permission classifier** -- AI-based tool call evaluation using natural-language rules. More expressive than regex patterns. Essential for autonomous agent operation. - -5. **Plugin bundling format** -- Define a standard for packaging skills + hooks + MCP configs + agent definitions as installable extensions. - -### Medium-Priority Extensions - -6. **Subagent orchestration with worktree isolation** -- Spawn parallel agents in isolated git worktrees. Operator pattern for complex multi-phase tasks. - -7. **Auto-memory from observation** -- Agent-generated memory entries from event stream analysis, distinct from human-configured project docs. - -8. **Hierarchical config with path scoping** -- Config files that activate only when the agent works in matching directories. - -9. **PostCompact hooks** -- Ensure critical context survives memory consolidation. - -10. **CI/CD integration actions** -- GitHub Actions / GitLab CI integration for automated code review, security audit, release notes. - -### Design Principles Learned - -- **Deterministic enforcement via hooks, not instructions.** Instructions are probabilistic (~70%). Hooks are deterministic (100%). Use hooks for must-enforce rules, instructions for should-follow guidance. -- **Lazy loading is essential at scale.** Claude Code's Tool Search pattern (95% context reduction) is critical when connecting 10+ MCP servers. -- **Prose rules beat regex for intent.** Permission rules written as natural language capture nuance that glob patterns cannot. -- **Skills should be small.** Under 500 words / 2K tokens. Focused on one workflow. Include examples for better accuracy. -- **Three scope levels** (user/workspace/project-shared) cover all organizational needs. -- **Plugins are the distribution unit.** Individual skills/hooks are building blocks; plugins are the installable product. - -### AGH Extension Model Mapping Summary - -| Claude Code Concept | AGH Dimension | AGH Component | -|---------------------|---------------|---------------| -| MCP Server | Resource | MCP (already supported) | -| Skill / Command | Resource | Skills (already supported) | -| Hook (PreToolUse) | Capability | permission.gate, content.validate | -| Hook (PostToolUse) | Capability | content.validate, observe.exporter | -| Hook (UserPromptSubmit) | Capability | prompt.provider, message.transform | -| Hook (tool input modifier) | Capability | message.transform | -| Permission classifier | Capability | permission.gate (AI-based) | -| Auto-memory / MEMORY.md | Capability | memory.backend (dream consolidation) | -| Agent definition (.md) | Resource | Agent (extend TOML config with markdown) | -| Plugin bundle | Resource | New: composite extension package | -| Subagent delegation | Action | Host API: session spawning | -| Operator pattern | Action | Host API: multi-session coordination | -| Split-and-merge | Action | Host API: parallel session management | -| Context compaction | Action | Observe: context health metrics + memory | -| Tool Search | Action | Host API: lazy MCP tool discovery | - ---- - -## Sources - -- [Hooks reference - Claude Code Docs](https://code.claude.com/docs/en/hooks) -- [Extend Claude with skills - Claude Code Docs](https://code.claude.com/docs/en/skills) -- [Connect Claude Code to tools via MCP - Claude Code Docs](https://code.claude.com/docs/en/mcp) -- [Configure permissions - Claude Code Docs](https://code.claude.com/docs/en/permissions) -- [Agent SDK overview - Claude Code Docs](https://code.claude.com/docs/en/agent-sdk/overview) -- [Best Practices for Claude Code - Claude Code Docs](https://code.claude.com/docs/en/best-practices) -- [Discover and install prebuilt plugins through marketplaces - Claude Code Docs](https://code.claude.com/docs/en/discover-plugins) -- [Claude Code auto mode - Anthropic](https://www.anthropic.com/engineering/claude-code-auto-mode) -- [Using CLAUDE.MD files - Claude Blog](https://claude.com/blog/using-claude-md-files) -- [Building agents with the Claude Agent SDK - Claude Blog](https://claude.com/blog/building-agents-with-the-claude-agent-sdk) -- [awesome-claude-code - GitHub (hesreallyhim)](https://github.com/hesreallyhim/awesome-claude-code) -- [awesome-mcp-servers - GitHub (wong2)](https://github.com/wong2/awesome-mcp-servers) -- [awesome-claude-code-toolkit - GitHub (rohitg00)](https://github.com/rohitg00/awesome-claude-code-toolkit) -- [awesome-agent-skills - GitHub (VoltAgent)](https://github.com/VoltAgent/awesome-agent-skills) -- [claude-plugins-official - GitHub (anthropics)](https://github.com/anthropics/claude-plugins-official) -- [claude-code-hooks-mastery - GitHub (disler)](https://github.com/disler/claude-code-hooks-mastery) -- [Claude Code Hooks Reference: All 12 Events - Pixelmojo](https://www.pixelmojo.io/blogs/claude-code-hooks-production-quality-ci-cd-patterns) -- [Claude Code hooks: A practical guide - eesel AI](https://www.eesel.ai/blog/hooks-in-claude-code) -- [Claude Code Hooks: A Practical Guide - DataCamp](https://www.datacamp.com/tutorial/claude-code-hooks) -- [Claude Code Hook Examples - Steve Kinney](https://stevekinney.com/courses/ai-development/claude-code-hook-examples) -- [CLAUDE.md best practices - DEV Community](https://dev.to/cleverhoods/claudemd-best-practices-from-basic-to-adaptive-9lm) -- [Claude Code Skills vs MCP Servers - DEV Community](https://dev.to/williamwangai/claude-code-skills-vs-mcp-servers-what-to-use-how-to-install-and-the-best-ones-in-2026-548k) -- [Best Claude Code Skills & Plugins 2026 - DEV Community](https://dev.to/raxxostudios/best-claude-code-skills-plugins-2026-guide-4ak4) -- [10 Must-Have Skills for Claude 2026 - Medium](https://medium.com/@unicodeveloper/10-must-have-skills-for-claude-and-any-coding-agent-in-2026-b5451b013051) -- [Claude Code 2.0.13 Plugin Marketplace - Medium](https://alirezarezvani.medium.com/claude-code-2-0-13-be2c0a723856) -- [The Complete Guide to Building Agents with the Claude Agent SDK - Nader Dabit](https://nader.substack.com/p/the-complete-guide-to-building-agents) -- [Top 10 MCP Servers for Claude Code - Apidog](https://apidog.com/blog/top-10-mcp-servers-for-claude-code/) -- [10 Must-Have MCP Servers for Claude Code - Medium](https://roobia.medium.com/the-10-must-have-mcp-servers-for-claude-code-2025-developer-edition-43dc3c15c887) -- [Piebald-AI/claude-code-system-prompts - GitHub](https://github.com/Piebald-AI/claude-code-system-prompts) diff --git a/.compozy/tasks/ext-ideas/research/analysis_ecosystem.md b/.compozy/tasks/ext-ideas/research/analysis_ecosystem.md deleted file mode 100644 index 81a2eeee0..000000000 --- a/.compozy/tasks/ext-ideas/research/analysis_ecosystem.md +++ /dev/null @@ -1,463 +0,0 @@ -# AI Agent Extension Ecosystem Research - -## Overview - -This document captures research into the AI agent extension ecosystem as of April 2026, with a focus on concrete extension ideas that could be adapted for AGH's three-dimensional extension model (Resources, Capabilities, Actions). The research covers OpenFang (a Rust-based agent OS), the MCP server ecosystem, extension patterns from major AI coding tools, emerging protocols (A2A), agent memory systems, workflow orchestration, permission/sandbox patterns, and developer feature requests. - ---- - -## 1. OpenFang: The Closest Comparable System - -OpenFang is an open-source Agent Operating System built in Rust -- the closest architectural analog to AGH. It compiles to a single ~32MB binary (137K LOC, 14 Rust crates) and runs agents as background daemons. - -### 1.1 Built-in Tools (53 tools in openfang-runtime) - -OpenFang ships 53 tools in its `openfang-runtime` crate, spanning several categories: - -| Category | Tools | Description | -|---|---|---| -| Web | web_search, browser_automation, web_fetch | Search engines, headless browser control, URL fetching | -| File | file_read, file_write, file_list, file_delete | Workspace-confined file operations with path traversal prevention | -| Code/Process | process_start, code_execute | Subprocess spawning with allowlist validation, env-clearing, timeout enforcement | -| Media | image_generation, tts (text-to-speech) | Image creation via AI models, voice synthesis | -| Data | knowledge_graph, data_analyze | Graph-based knowledge storage, structured data analysis | -| Infrastructure | docker_run, docker_build | Container management for isolated execution | -| Communication | email_send, notification_push | Outbound messaging capabilities | - -All tool code runs inside a WASM sandbox with dual metering (fuel + epoch interruption). File operations are workspace-confined. Subprocesses are env-cleared and timeout-enforced. - -### 1.2 Hands System (7 Bundled Agent Packages) - -"Hands" are OpenFang's core innovation -- self-contained autonomous capability packages that combine configuration, expert knowledge, operational procedures, and tool access into a single deployable unit. - -Each Hand bundles: -- `HAND.toml` manifest -- System prompt with multi-phase operational playbook -- `SKILL.md` expert knowledge -- Configurable settings -- Dashboard metrics - -| Hand | Domain | What It Does | -|---|---|---| -| Clip | Content | Transforms long-form video into short clips with captions, thumbnails, voice-overs | -| Lead | Sales | Discovers, enriches, scores, deduplicates qualified leads on schedule with ICP profiling | -| Collector | Intelligence | Monitors targets and gathers competitive intelligence | -| Predictor | Forecasting | Makes predictions with Brier score tracking for calibration | -| Researcher | Productivity | Cross-references sources, fact-checks (CRAAP evaluation), generates cited reports | -| Twitter | Communication | Manages X/Twitter accounts autonomously | -| Browser | Automation | Web automation for scraping, form-filling, and interaction | - -**AGH mapping**: Hands map directly to AGH's Resources (agents + skills bundled together). AGH could implement a similar concept where an "agent package" bundles an agent definition, skills, hooks, and MCP servers into a single deployable unit. - -### 1.3 Channel Adapters (40 Adapters) - -OpenFang connects to 40 messaging platforms: Telegram, Discord, Slack, WhatsApp, Signal, Matrix, Teams, Google Chat, Feishu, DingTalk, Mastodon, Bluesky, LinkedIn, Reddit, IRC, WebChat, and 24+ more. - -Each adapter supports per-channel model overrides, DM/group policies, rate limiting, and output formatting. New adapters implement the `ChannelAdapter` trait. - -**AGH mapping**: These map to AGH's Capabilities dimension. A `channel.adapter` capability type would allow AGH to expose agent sessions across messaging platforms. The adapter pattern (trait/interface implementation) aligns with AGH's interface-based extension model. - -### 1.4 Skills System (60 Bundled Skills) - -OpenFang ships 60 bundled skills compiled into the binary, using the `SKILL.md` format (YAML frontmatter + Markdown body). Categories include CI/CD, Ansible, Prometheus, Nginx, Kubernetes, Terraform, Helm, Docker, sysadmin, shell-scripting, Linux networking. - -Three skill types exist: -1. **Prompt-only skills (SKILL.md)** -- inject expert domain knowledge into system prompt -2. **Python skills** -- run as subprocesses, communicate via JSON over stdin/stdout -3. **Rust/WASM skills** -- compiled to WASM, run in sandboxed environment with fuel metering - -Each skill has a `skill.toml` manifest with metadata, runtime config, tool declarations, and capability requirements. - -**AGH mapping**: AGH already has a skills system. OpenFang's `skill.toml` manifest pattern (declaring required capabilities like `NetConnect`) is worth adopting. The three-tier skill type system (prompt-only, subprocess, WASM) is a good model for AGH's skill extensibility. - ---- - -## 2. MCP Server Ecosystem - -As of March 2026, there are 5,000+ community MCP servers, with 440 curated in the best-of-mcp-servers list (930K total GitHub stars across 34 categories). - -### 2.1 Most Popular MCP Servers by Category - -| Category | Server | Stars/Installs | What It Does | AGH Mapping | -|---|---|---|---|---| -| **Documentation** | Context7 | 11K views, 690 installs (FastMCP #1) | Injects fresh, version-specific docs into prompts | Resource (MCP) + Capability (prompt.provider) | -| **Browser** | Playwright MCP | 30K stars, ~6K views | Structured browser automation via accessibility snapshots | Resource (MCP) + Capability (agent.driver tool) | -| **Git/GitHub** | GitHub MCP | Most-starred MCP server | PR management, issue triaging, code review automation | Resource (MCP) | -| **Database** | PostgreSQL MCP | High adoption | Natural language to SQL, schema introspection | Resource (MCP) | -| **Database** | Supabase MCP | Growing | Postgres + edge functions + schema management | Resource (MCP) | -| **Filesystem** | Filesystem MCP | Official reference | Secure file read/write/search within allowed directories | Resource (MCP) | -| **Memory** | Memory MCP | Official reference | Persistent knowledge graph across sessions | Resource (MCP) + Capability (memory.backend) | -| **Reasoning** | Sequential Thinking | Popular | Structured step-by-step reasoning | Capability (message.transform) | -| **Search** | Firecrawl MCP | Growing | Web scraping with JS rendering, anti-bot, clean markdown output | Resource (MCP) | -| **Cloud** | AWS MCP | 8.7K stars | Integration with AWS services and resources | Resource (MCP) | -| **Automation** | Zapier MCP | Growing | Connects to thousands of apps via Zapier workflows | Resource (MCP) | -| **Automation** | Pipedream MCP | Growing | 2,500 APIs, 8,000+ prebuilt tools | Resource (MCP) | -| **Cloud** | Cloudflare MCP | Growing | Workers/KV/R2/D1 management | Resource (MCP) | -| **Data** | MindsDB MCP | 39K stars | Unified data platform across databases | Resource (MCP) | -| **Search/RAG** | Pinecone MCP | Growing | Vector similarity search for RAG | Resource (MCP) + Capability (memory.backend) | - -### 2.2 MCP Apps (January 2026) - -Anthropic launched MCP Apps -- interactive UIs that render dashboards, forms, and charts directly inside Claude. Launch partners: Amplitude, Asana, Box, Clay, Hex, Salesforce. - -**AGH mapping**: AGH could support MCP Apps as a UI extension point, where MCP servers can provide rendered components in the web UI. - -### 2.3 Recommended Starting Stack for Developers - -1. Context7 (documentation injection) -2. Playwright (browser automation) -3. GitHub (PR/issue management) -4. PostgreSQL or Supabase (database) -5. Memory (persistent knowledge graph) - -**AGH mapping**: AGH should ship with built-in MCP server support and potentially bundle or recommend these servers as defaults for developer-focused use cases. - ---- - -## 3. AI Coding Agent Extension Patterns - -### 3.1 Extension Architectures Across Tools - -| Tool | Extension Mechanism | Key Pattern | AGH Relevance | -|---|---|---|---| -| **Claude Code** | Skills (SKILL.md), hooks, custom slash commands | Markdown-based skills with YAML frontmatter; 12 lifecycle hook events; project/user scope | Directly applicable -- AGH already uses similar patterns | -| **Cursor** | Rules files, MCP, Composer | `.cursor/rules` for project context; multi-agent Composer (8 parallel agents) | Rules files map to AGH config; parallel agents map to session management | -| **Cline** | MCP Marketplace, subagents, CLI 2.0 | Client-side architecture; 5M+ installs; dedicated MCP marketplace for discovery | MCP marketplace concept; subagent spawning | -| **Roo Code** | Custom Modes, Mode Gallery | Specialized AI personas with scoped tool permissions per mode | Maps to AGH agent definitions with capability restrictions | -| **Continue.dev** | `.continuerc.json`, local indexing, @docs | Lancet protocol for local vector indexing; semantic codebase search | Maps to AGH memory.backend + prompt.provider | -| **Aider** | Git-native, BYOM | Terminal-first; git-aware diffs; repository map | Maps to AGH's CLI-first approach | -| **Goose** | MCP extensions, recipes, custom distributions | YAML recipe workflows; Extension Manager UI; custom distros | Recipes map to AGH skills; distros map to agent configs | -| **Windsurf** | Cascade, rules | Cascade learns project patterns; greenfield-optimized | Pattern learning maps to AGH memory system | - -### 3.2 Claude Code's Extension Model (Most Relevant to AGH) - -Claude Code's model is the most directly relevant since AGH manages Claude Code as a subprocess: - -**Skills**: Markdown files in `.claude/skills//SKILL.md` with supporting scripts, templates, examples. Two invocation controls: -- `disable-model-invocation: true` -- only human can invoke (for side-effect workflows) -- `user-invocable: false` -- only model can invoke (background knowledge) - -**Hooks (12 lifecycle events)**: -- PreToolUse, PostToolUse, PostToolUseFailure -- SessionStart, SessionEnd, Stop -- SubagentStart, SubagentStop -- UserPromptSubmit, Notification -- PreCompact, PermissionRequest - -"Hooks guarantee behavior; prompts suggest it." This is a critical design principle. - -**AGH mapping**: AGH's hooks system should mirror these 12 events. The separation between "hooks guarantee" and "prompts suggest" maps perfectly to AGH's distinction between deterministic hooks and AI-driven skills. - -### 3.3 Goose's Extension Model - -Goose (29K+ stars, Apache 2.0, now under Linux Foundation's AAIF) provides: -- **MCP-native extensions**: Any MCP server becomes a Goose extension automatically -- **Recipes**: Reusable YAML workflow definitions packaging goals, required extensions, structured inputs, and sub-recipes -- **Custom Distributions**: Preconfigured provider + extension + branding bundles -- **Extension Manager UI**: Desktop app for browsing, toggling, configuring extensions -- **ACP integration**: Connects to VS Code, Cursor, Windsurf, JetBrains via Agent Client Protocol - -**AGH mapping**: Goose's recipe system maps to AGH's skills. Custom distributions map to workspace-level agent configurations. The Extension Manager UI concept could inform AGH's web UI design. - -### 3.4 Roo Code's Custom Modes - -Roo Code's differentiating feature is Custom Modes -- specialized AI personas with: -- Tailored system instructions per mode -- Scoped tool permissions (e.g., security reviewer can read but not write) -- Community Mode Gallery for sharing configurations -- 5 built-in modes: Code, Architect, Ask, Debug, Custom - -**AGH mapping**: This maps to AGH agent definitions with per-agent capability restrictions. AGH could implement a "mode" concept as a lightweight agent configuration overlay. - ---- - -## 4. Emerging Protocols and Patterns - -### 4.1 A2A (Agent-to-Agent Protocol) - -Google's A2A protocol (April 2025, now v0.3 as of July 2025) enables communication between opaque agent systems. Now under the Linux Foundation with 150+ supporting organizations. - -**Core concepts**: -- **Agent Cards**: JSON manifests at `/.well-known/agent.json` listing name, endpoint, skills, auth -- **Task lifecycle**: pending -> in-progress -> completed/failed, with SSE streaming -- **Transport**: HTTP, SSE, JSON-RPC (v0.3 adds gRPC) -- **Complementary to MCP**: MCP = agent-to-tool, A2A = agent-to-agent - -**AGH mapping**: A2A maps directly to AGH's agent network protocol (Phase 3). AGH sessions could publish Agent Cards, accept tasks from external agents via A2A, and delegate subtasks to remote agents. This is a natural fit for AGH's HTTP/SSE API. - -| A2A Concept | AGH Mapping | -|---|---| -| Agent Card | Agent definition + session metadata exposed via HTTP | -| Task submission | New session creation or message to existing session | -| Task streaming | SSE event stream (already implemented) | -| Capability discovery | Agent catalog + skills listing | - -### 4.2 Agent Memory Systems - -The memory landscape in 2026 spans three architectural categories: - -| Category | Examples | Description | AGH Mapping | -|---|---|---|---| -| Extended attention | Infini-attention, recursive LMs | Scale the context window itself | Out of scope (model-level) | -| Memory-augmented transformers | Hybrid models | Learned memory modules in model | Out of scope (model-level) | -| External persistent memory | Mem0, Letta, LangChain Memory | Store/retrieve/manage memory outside model | Capability (memory.backend) | - -**Key frameworks**: - -| Framework | Approach | Key Feature | AGH Relevance | -|---|---|---|---| -| **Mem0** | Dedicated memory layer | Vector memory (semantic similarity) + graph memory (relationships) | High -- memory.backend implementation | -| **Letta** | Long-context agent architecture | Core memory blocks (persistent labeled context), archival memory (DB-backed), memory editing tools | High -- maps to AGH's dual-scope memory | -| **LangChain Memory** | Modular memory types | Conversation buffer, summary, entity, knowledge graph | Medium -- patterns for memory.backend | -| **ReMe** | Open-source memory kit | Multiple vector store backends, "remember me, refine me" | Medium -- reference implementation | - -**Advanced patterns emerging in 2026**: -- **Conflict resolution**: When user preferences change, compress old memory into temporal reflection summaries rather than deleting -- **Multi-agent shared memory**: Strict access controls to prevent race conditions and cross-agent contamination -- **Graph memory in production**: For complex entity relationships (medical, enterprise hierarchies, technical systems) -- **Memory cost**: 1M-token context window costs ~15x more per turn than equivalent persistent memory retrieval - -**AGH mapping**: AGH's existing dual-scope memory (global + workspace) with dream consolidation is well-positioned. Extensions should add: -- Vector-backed memory.backend (semantic search) -- Graph-backed memory.backend (relationship tracking) -- Memory conflict resolution (temporal reflection summaries) -- Cross-session memory sharing with access controls - -### 4.3 Workflow Orchestration Patterns - -Five dominant patterns have emerged: - -| Pattern | Description | Use Case | AGH Mapping | -|---|---|---|---| -| Sequential pipeline | Step-by-step, each stage builds on previous | Progressive refinement tasks | Session chaining via hooks | -| Hierarchical multi-agent | Manager-subordinate delegation | Complex multi-department tasks | Session spawning + parent-child relationships | -| Decentralized swarm | Peer agents collaborate without central control | Resilient, flexible problem-solving | A2A-connected sessions | -| Group chat | Shared conversation thread, chat manager facilitates | Consensus-building (limit to 3 agents) | Multi-agent session with turn management | -| DAG-based | Directed acyclic graphs define task dependencies | Complex pipelines with parallel steps | Workflow engine as new capability | - -**AGH mapping**: AGH's session model could be extended with a `workflow.engine` capability that supports DAG-based task orchestration across sessions. Parent sessions could spawn child sessions with defined dependencies. - ---- - -## 5. Permission, Sandbox, and Human-in-the-Loop Patterns - -### 5.1 Three Levels of Human Oversight - -| Level | Description | When to Use | -|---|---|---| -| Human-out-of-the-loop | Agent acts fully autonomously | Low-risk, well-defined tasks | -| Human-in-the-loop | Agent pauses for approval on specific actions | High-risk or destructive actions | -| Human-on-the-loop | Supervisor monitors overall flow, intervenes on anomalies | Medium-risk continuous operations | - -### 5.2 Permission Patterns - -| Pattern | Description | AGH Mapping | -|---|---|---| -| Per-tool permission policies | Read vs. write access per tool | Capability (permission.gate) | -| Environment-scoped permissions | Allow destructive ops in staging only | Config-level permission rules | -| Approval vs. suspension | Gatekeeping (yes/no) vs. clarification (need more info) | Action (Host API) with two response types | -| Planning/execution separation | Planner proposes under broad permissions, executor acts under strict permissions | Two-phase session with different agent configs | -| Tool trust spectrum | Classify tools from harmless (search) to destructive (delete) | permission.gate with risk-level classification | -| Centralized governance UI | Dashboard for managing who/what/where/when | Web UI extension | - -### 5.3 Sandbox Strategies - -| Strategy | Description | AGH Mapping | -|---|---|---| -| WASM sandbox | Dual-metered execution (fuel + epoch) | Capability (could wrap tool execution) | -| MicroVMs | Firecracker/gVisor for full isolation | Heavy-weight, for untrusted code | -| Short-lived credentials | Temporary tokens scoped per task | Hook (PreToolUse) for credential injection | -| Zero-trust networking | All connections explicitly allowed | Config-level network policies | -| Workspace confinement | File operations restricted to workspace | Already in AGH's workspace model | - -**AGH mapping**: AGH's permission.gate capability should implement the tool trust spectrum. The planning/execution separation pattern maps to AGH's ability to configure different agent definitions for different phases of a workflow. - ---- - -## 6. Observability and Tracing - -### 6.1 OpenTelemetry as the Standard - -OpenTelemetry has emerged as the universal standard for AI agent observability. Key developments: - -| Project | What It Does | AGH Mapping | -|---|---|---| -| Traceloop OTel MCP Server | AI agents query distributed traces for automated debugging | Resource (MCP) | -| FastMCP native OTel | Zero-config tracing for tool/prompt/resource operations | Capability (observe.exporter) | -| AG2 OTel Tracing | Structured hierarchical traces with GenAI semantic conventions | Capability (observe.exporter) reference | -| Grafana Cloud + OpenLIT | Pre-built dashboards for MCP observability | Reference architecture | - -**Key metrics to track**: Per-tool latency, error rates, call volume anomalies, end-to-end traces connecting agent reasoning to tool execution. - -**Proposed MCP protocol change**: Add standardized OTel trace spans directly into MCP protocol, with trace context propagation via HTTP headers (SSE/Streamable HTTP) or explicit parameters (stdio). - -**AGH mapping**: AGH's observe.exporter capability should export OpenTelemetry-compatible traces. The GenAI semantic conventions (model name, provider, token usage, cost, temperature, tool call arguments/results) should be adopted for AGH's event recording. - ---- - -## 7. What Developers Most Want - -Based on GitHub issues, Reddit discussions, and developer surveys: - -### 7.1 Top Feature Requests - -| Request | Frequency | Description | AGH Mapping | -|---|---|---|---| -| **Better large codebase handling** | Very high | Index whole repos, semantic search across files | Capability (prompt.provider) with codebase indexing | -| **Issue-to-PR automation** | High | Assign GitHub issue, agent implements + tests + deploys | Action (Host API) + workflow orchestration | -| **Multi-file agentic workflows** | High | Parallel agents working on different codebase areas | Session management with concurrent agents | -| **Bring Your Own Model (BYOM)** | High | Connect any LLM provider via API keys | Capability (agent.driver) with provider abstraction | -| **Fine-grained permissions** | High | Approval gates before destructive actions, per-task autonomy levels | Capability (permission.gate) | -| **MCP tool discovery** | Medium-high | Browse, install, configure MCP servers easily | Resource (MCP) with registry/marketplace | -| **Reusable workflows/recipes** | Medium | Save and share task automation patterns | Resource (skills) with workflow support | -| **Cost tracking and budgets** | Medium | Token usage monitoring, per-session cost limits | Capability (observe.exporter) + config | -| **Audit trails** | Medium | Complete record of every agent action for compliance | Already in AGH's observe system | -| **Local/offline model support** | Medium | Run with Ollama, Docker Model Runner | Capability (agent.driver) | -| **Custom agent personas** | Medium | Different "modes" for different tasks (code, review, plan) | Resource (agents) with mode overlays | -| **CI/CD integration** | Medium | Agents triggered by CI events, results fed back | Hook + Action (Host API) | - -### 7.2 Anti-Patterns to Avoid - -- Using AI for architecture decisions (better for implementation) -- Infinite agent loops without cost/iteration limits -- Agents that rewrite entire files instead of surgical diffs -- Hardcoded model dependencies (vendor lock-in) -- Trust-all-tools security model - ---- - -## 8. Consolidated Extension Ideas for AGH - -### 8.1 High-Priority Extensions (Strong ecosystem demand, clear AGH mapping) - -| Extension | Type | Dimension | Description | -|---|---|---|---| -| **OTel Observe Exporter** | Capability | observe.exporter | Export AGH events as OpenTelemetry traces with GenAI semantic conventions | -| **Vector Memory Backend** | Capability | memory.backend | Semantic similarity search over agent memory using embeddings | -| **Graph Memory Backend** | Capability | memory.backend | Relationship-aware memory using knowledge graphs | -| **A2A Protocol Gateway** | Capability | agent.driver (extension) | Accept/delegate tasks via Google's Agent-to-Agent protocol | -| **Permission Gate: Risk Classifier** | Capability | permission.gate | Classify tool calls by risk level, require approval for destructive actions | -| **Codebase Indexer** | Capability | prompt.provider | Index workspace files for semantic search, inject relevant context | -| **GitHub MCP Bundle** | Resource | MCP | Pre-configured GitHub MCP server for PR/issue/code management | -| **Workflow Engine** | Capability | (new) | DAG-based task orchestration across sessions | -| **Agent Package (Hand-style)** | Resource | agents + skills + hooks | Bundled autonomous capability packages | -| **Channel Adapter Framework** | Capability | (new) | Expose sessions via messaging platforms (Slack, Discord, Telegram) | - -### 8.2 Medium-Priority Extensions (Growing demand, useful differentiation) - -| Extension | Type | Dimension | Description | -|---|---|---|---| -| **Cost/Budget Tracker** | Capability | observe.exporter | Track token usage, enforce per-session cost limits | -| **Content Validator: PII** | Capability | content.validate | Detect and mask personally identifiable information | -| **Content Validator: Secret Scanner** | Capability | content.validate | Prevent secrets/credentials from leaking into agent context | -| **Prompt Injection Scanner** | Capability | content.validate | Detect prompt injection attempts in skill/tool inputs | -| **Custom Distribution Builder** | Action | Host API | Package agent configs + skills + MCP into shareable bundles | -| **MCP Server Registry** | Action | Host API | Browse, install, configure MCP servers from a catalog | -| **Webhook/Event Bridge** | Resource | hooks | Trigger sessions from external events (CI/CD, webhooks, cron) | -| **Planning/Execution Splitter** | Capability | message.transform | Separate planning phase (broad tools) from execution (restricted tools) | - -### 8.3 Lower-Priority / Exploratory Extensions - -| Extension | Type | Dimension | Description | -|---|---|---|---| -| **Multi-Agent Group Chat** | Action | Host API | Multiple agents in shared conversation with turn management | -| **Agent Card Publisher** | Action | Host API | Publish `.well-known/agent.json` for A2A discovery | -| **Mode Gallery** | Resource | skills | Community marketplace for agent mode/persona configurations | -| **Recipe/Workflow YAML** | Resource | skills | Goose-style reusable workflow definitions | -| **Memory Conflict Resolver** | Capability | memory.backend | Temporal reflection summaries when knowledge changes | -| **Browser Automation Tool** | Capability | agent.driver (tool) | Playwright-based browser control for agents | -| **Local Model Provider** | Capability | agent.driver | Connect to Ollama/local models as agent backends | - ---- - -## 9. Key Takeaways - -### 9.1 The ecosystem is converging on three standards - -1. **MCP** (Model Context Protocol) for agent-to-tool communication -- 5,000+ servers, universal adoption -2. **A2A** (Agent-to-Agent) for agent-to-agent communication -- 150+ organizations, Linux Foundation governance -3. **OpenTelemetry** for agent observability -- emerging as the universal tracing standard for AI agents - -AGH should support all three natively. - -### 9.2 Skills/extensions are becoming the primary differentiator - -Every major tool (Claude Code, Goose, OpenFang, Cline, Roo Code) has a skills/extension system. The winning pattern is: -- **Markdown-based** skill definitions (low barrier to authorship) -- **YAML manifests** for metadata and capability declarations -- **Three tiers**: prompt-only (cheapest), subprocess (flexible), sandboxed (secure) -- **Community marketplace** for discovery and sharing - -AGH's existing skills system is well-aligned. Priority: add manifest-based capability declarations and a registry. - -### 9.3 Memory is the next competitive frontier - -Persistent memory across sessions is transitioning from experimental to production-critical. The key patterns are: -- **Dual-scope** (global + workspace) -- AGH already has this -- **Vector + graph** hybrid -- AGH should add both backends -- **Dream/consolidation** -- AGH already has this (rare advantage) -- **Cost optimization** -- persistent memory retrieval is 15x cheaper than large context windows - -### 9.4 Permission and safety are table stakes - -Every production agent system implements: -- Per-tool, per-action permission policies -- Planning/execution separation -- Human-in-the-loop for destructive actions -- Audit trails -- Sandbox isolation - -AGH's permission.gate capability should implement risk-based classification with configurable approval thresholds. - -### 9.5 Hooks/middleware are the deterministic control layer - -The universal pattern across all frameworks: "Hooks guarantee behavior; prompts suggest it." Claude Code's 12 hook events (PreToolUse, PostToolUse, SessionStart, etc.) represent the industry standard. AGH's hook system should match or exceed this coverage. - -### 9.6 Workflow orchestration is emerging but not yet standardized - -DAG-based workflows, hierarchical agent delegation, and pipeline patterns are common but each framework implements them differently. AGH has an opportunity to provide a clean, Go-native workflow engine that leverages its session model. - -### 9.7 Channel adapters are a differentiator for non-IDE use cases - -OpenFang's 40 channel adapters demonstrate demand for agent access beyond CLI/IDE. Slack, Discord, and Telegram are the highest-demand channels. AGH could start with 3-5 high-value adapters. - ---- - -## Sources - -- [OpenFang -- The Agent Operating System](https://www.openfang.sh/) -- [OpenFang GitHub](https://github.com/RightNow-AI/openfang) -- [OpenFang Skill Development Docs](https://www.openfang.sh/docs/skill-development) -- [OpenFang Channel Adapters Docs](https://www.openfang.sh/docs/channel-adapters) -- [awesome-mcp-servers (GitHub)](https://github.com/wong2/awesome-mcp-servers) -- [best-of-mcp-servers (GitHub)](https://github.com/tolkonepiu/best-of-mcp-servers) -- [MCP Awesome Directory (1200+ servers)](https://mcp-awesome.com/) -- [Top 10 Most Popular MCP Servers -- FastMCP](https://fastmcp.me/blog/top-10-most-popular-mcp-servers) -- [Top 15 MCP Servers -- DEV Community](https://dev.to/jangwook_kim_e31e7291ad98/top-15-mcp-servers-every-developer-should-install-in-2026-n1h) -- [Agent2Agent Protocol (A2A) -- Google Blog](https://developers.googleblog.com/en/a2a-a-new-era-of-agent-interoperability/) -- [A2A Protocol Specification](https://a2a-protocol.org/latest/specification/) -- [A2A GitHub](https://github.com/a2aproject/A2A) -- [A2A Protocol Upgrade -- Google Cloud Blog](https://cloud.google.com/blog/products/ai-machine-learning/agent2agent-protocol-is-getting-an-upgrade) -- [Linux Foundation A2A Project](https://www.linuxfoundation.org/press/linux-foundation-launches-the-agent2agent-protocol-project-to-enable-secure-intelligent-communication-between-ai-agents) -- [AI Agent Memory Frameworks 2026 -- MachineLearningMastery](https://machinelearningmastery.com/the-6-best-ai-agent-memory-frameworks-you-should-try-in-2026/) -- [Memory for AI Agents -- The New Stack](https://thenewstack.io/memory-for-ai-agents-a-new-paradigm-of-context-engineering/) -- [State of AI Agent Memory 2026 -- Mem0](https://mem0.ai/blog/state-of-ai-agent-memory-2026) -- [Architecture of Memory Systems in AI Agents -- Analytics Vidhya](https://www.analyticsvidhya.com/blog/2026/04/memory-systems-in-ai-agents/) -- [Goose AI Agent -- GitHub](https://github.com/block/goose) -- [Goose Documentation](https://goose-docs.ai/) -- [Goose AI Review 2026](https://aitoolanalysis.com/goose-ai-review/) -- [Cline vs Roo Code vs Continue 2026 -- DevToolReviews](https://www.devtoolreviews.com/reviews/cline-vs-roo-code-vs-continue) -- [Roo Code GitHub](https://github.com/RooCodeInc/Roo-Code) -- [Claude Code Skills Documentation](https://code.claude.com/docs/en/skills) -- [Claude Code Hooks -- Dotzlaw Consulting](https://www.dotzlaw.com/insights/claude-hooks/) -- [Claude Agent SDK Hooks Lifecycle](https://pkg.go.dev/github.com/dotcommander/agent-sdk-go/examples/hooks-lifecycle) -- [OpenTelemetry MCP Server -- Traceloop](https://github.com/traceloop/opentelemetry-mcp-server) -- [MCP Observability with OTel -- SigNoz](https://signoz.io/blog/mcp-observability-with-otel/) -- [Distributed Tracing for Agentic Workflows -- Red Hat](https://developers.redhat.com/articles/2026/04/06/distributed-tracing-agentic-workflows-opentelemetry) -- [How to Sandbox AI Agents 2026 -- Northflank](https://northflank.com/blog/how-to-sandbox-ai-agents) -- [Human-in-the-Loop for AI Agents -- Permit.io](https://www.permit.io/blog/human-in-the-loop-for-ai-agents-best-practices-frameworks-use-cases-and-demo) -- [AI Agent Security Guide 2026 -- MintMCP](https://www.mintmcp.com/blog/ai-agent-security) -- [2026 Guide to Agentic Workflow Architectures -- StackAI](https://www.stackai.com/blog/the-2026-guide-to-agentic-workflow-architectures) -- [Best AI Coding Agents 2026 -- Faros](https://www.faros.ai/blog/best-ai-coding-agents-2026) -- [Best AI for Coding Reddit 2026](https://www.aitooldiscovery.com/guides/best-ai-for-coding-reddit) -- [10 Things Developers Want from Agentic IDEs -- RedMonk](https://redmonk.com/kholterhoff/2025/12/22/10-things-developers-want-from-their-agentic-ides-in-2025/) diff --git a/.compozy/tasks/ext-ideas/research/analysis_hermes.md b/.compozy/tasks/ext-ideas/research/analysis_hermes.md deleted file mode 100644 index c2cb93ffd..000000000 --- a/.compozy/tasks/ext-ideas/research/analysis_hermes.md +++ /dev/null @@ -1,299 +0,0 @@ -# Hermes Agent (hermes-agent) -- Extension & Plugin Research for AGH - -## Overview - -Hermes Agent is an open-source, self-improving AI agent framework built by Nous Research, released February 2026, written in Python. It has ~23k GitHub stars, 142 contributors, and ships as a single CLI binary with a multi-platform messaging gateway. Hermes is the closest comparable project to AGH in spirit: a daemon-like agent harness with persistent memory, session management, tool orchestration, and a rich extension model. - -Key architectural parallels to AGH: -- **Single-binary daemon** with CLI and gateway modes -- **SQLite-backed persistence** with FTS5 for session search -- **Plugin/extension model** spanning tools, hooks, memory backends, and skills -- **MCP integration** as both client and server -- **Subagent delegation** for parallel workstreams -- **Multi-channel communication** (Telegram, Discord, Slack, WhatsApp, Signal, CLI) - -Hermes is three months old and already has 80+ community extensions. This analysis extracts concrete extension ideas for AGH's three-dimensional model: Resources, Capabilities, and Actions. - ---- - -## Table of Extensions and Tools - -### Built-in Tools (47 registered tools across 20 toolsets) - -| Name / Toolset | Category | Description | AGH Mapping | -|---|---|---|---| -| `terminal` | Execution | Six backends: local, Docker, SSH, Daytona, Singularity, Modal. Background process management (list, poll, wait, log, kill, write). PTY mode for interactive CLIs. | **Capability: agent.driver** -- AGH's ACP driver spawns subprocesses; terminal backends map to driver variants. Add Docker/SSH/serverless driver backends. | -| `web` | Search/Extract | Web search, page extraction, URL safety checking, website policy compliance | **Resource: tool** -- Web search as a built-in tool exposed via ACP. Could be an MCP server integration. | -| `browser` | Automation | Full browser automation via CDP (navigate, click, type, screenshot). Backends: Browserbase cloud, Browser Use cloud, local Chrome, local Chromium. | **Capability: agent.driver** or **Resource: MCP** -- Browser automation as an MCP server or specialized driver. | -| `file` | Filesystem | File read/write/edit with persistent context | **Resource: tool** -- Already covered by ACP agent file tools. | -| `vision` | Multimodal | Image analysis via vision-capable models. Clipboard paste support. | **Capability: content.validate** or **message.transform** -- Vision as a content processing capability. | -| `image_gen` | Creative | Text-to-image via FAL.ai FLUX 2 Pro with auto-upscaling | **Resource: MCP** -- Image generation as an MCP server. | -| `tts` | Voice | Text-to-speech with 5 backends (Edge TTS, NeuTTS, ElevenLabs, etc.). Markdown stripping for natural speech. | **Resource: MCP** -- TTS as an MCP tool server. | -| `transcription` | Voice | STT via faster-whisper (local), Groq, or OpenAI. Hallucination filtering (26 known phrases). | **Resource: MCP** -- Transcription as an MCP tool server. | -| `cronjob` | Scheduling | Built-in cron scheduler with natural language. Jobs attach skills, deliver results to any platform. Pause/resume/edit. | **Resource: hook** + **Action: session** -- Cron as a hook trigger that creates scheduled sessions. High value for AGH. | -| `delegation` | Orchestration | Spawn isolated subagents (up to 3 concurrent) with own conversation, terminal, and restricted toolsets. Zero-context-cost via RPC. | **Action: session** -- Subagent delegation maps directly to AGH session spawning. Critical capability. | -| `code_execution` | Execution | Sandboxed Python execution with RPC access to all Hermes tools. 300s timeout, 50 tool calls max, 50KB stdout cap. | **Capability: agent.driver** -- Code execution sandbox as a driver variant or tool. | -| `memory` | Persistence | Dual-file memory (MEMORY.md + USER.md) injected into system prompt. 8 pluggable backends. | **Capability: memory.backend** -- Direct mapping. AGH already has this dimension. | -| `session_search` | Recall | SQLite FTS5 full-text search over all past sessions with LLM summarization | **Action: observe** -- Session search as an observe/query capability. | -| `skills` | Knowledge | On-demand knowledge documents with progressive disclosure. Auto-creation from experience. | **Resource: skill** -- Direct mapping. AGH already has skills. | -| `todo` | Planning | Task/todo management within agent sessions | **Resource: tool** -- Simple tool, low priority. | -| `moa` | Routing | Multi-model orchestration/routing (Mixture of Agents) | **Capability: prompt.provider** -- Model routing as a prompt/provider capability. | -| `homeassistant` | IoT | Smart home control: list entities, control devices, watch state changes. Auto-enabled via HASS_TOKEN. | **Resource: MCP** -- Home Assistant as an MCP server integration. | -| `rl` | Training | RL training pipeline with Atropos (trajectory API), Tinker (training service), and custom environments. GRPO with LoRA. | **Capability: observe.exporter** -- Training data export. Unique to Hermes; not directly applicable to AGH v1. | -| `voice_mode` | Interface | Push-to-talk terminal, voice messages in messengers, Discord VC join/listen/speak | **Resource: hook** -- Voice as a communication channel hook. | -| `clarify` | UX | Ask user for clarification when instructions are ambiguous | **Capability: permission.gate** -- Clarification as a gating mechanism. | -| `send_message` | Communication | Send messages across all connected platforms (Telegram, Discord, Slack, etc.) | **Resource: MCP** -- Messaging as an MCP server (Hermes already does this as MCP server mode). | - -### Plugin System - -| Plugin / Feature | Category | Description | AGH Mapping | -|---|---|---|---| -| `pre_llm_call` hook | Lifecycle | Fires before each LLM call. Can inject context into ephemeral system prompt. Used by memory plugins to inject recalled context. | **Resource: hook** -- Pre-processing hook. Maps to AGH's hook system. Critical for memory injection. | -| `post_llm_call` hook | Lifecycle | Fires after each LLM response. Used by memory plugins to retain conversation turns. | **Resource: hook** -- Post-processing hook. Maps to AGH's hook system. | -| `pre_tool_call` hook | Lifecycle | Fires before tool execution. Can intercept/modify tool calls. | **Resource: hook** -- Tool interception hook. Maps to AGH's hook system. | -| `post_tool_call` hook | Lifecycle | Fires after tool execution. Can process/modify tool results. | **Resource: hook** -- Tool result processing hook. | -| `on_session_start` hook | Lifecycle | Fires when a session begins. Used for initialization, context loading. | **Resource: hook** -- Session lifecycle hook. AGH already has session state machine events. | -| `on_session_end` hook | Lifecycle | Fires when a session ends. Used for cleanup, memory extraction. | **Resource: hook** -- Session lifecycle hook. | -| CLI subcommand registration | Extension | Plugins can register new CLI subcommands via the plugin context API. | **Resource: hook** (CLI extension) -- AGH could allow extensions to register CLI commands. | -| Request-scoped API hooks | Extension | Hooks receive correlation IDs for request tracing. | **Capability: observe.exporter** -- Observability enhancement. | -| Env var prompting on install | UX | Plugins prompt for required env vars during installation. | **Resource: hook** (install lifecycle) -- Plugin installation UX. | -| Plugin discovery (3 sources) | Architecture | `~/.hermes/plugins/` (user), `.hermes/plugins/` (project), pip entry points | **Architecture** -- AGH could support user-dir, project-dir, and Go plugin discovery. | - -### Memory Providers (8 pluggable backends) - -| Provider | Category | Description | AGH Mapping | -|---|---|---|---| -| **Built-in** (MEMORY.md + USER.md) | Local | Two curated markdown files injected into system prompt. Agent-editable. | **Capability: memory.backend** -- Default backend. AGH's current memory system. | -| **Honcho** | Cloud/Self-hosted | Dialectic reasoning and deep user modeling. Builds model of how user thinks, not just what they said. AGPL v3.0. | **Capability: memory.backend** -- Advanced user modeling backend. High value concept. | -| **Hindsight** | Local/Cloud | Best recall accuracy (91.4% on LongMemEval). Async prefetch + retain. Semantic, graph, temporal retrieval. | **Capability: memory.backend** -- High-accuracy retrieval backend. | -| **Holographic** | Local SQLite | HRR (Holographic Reduced Representations). Sub-millisecond retrieval. Zero deps. Trust scoring with decay. | **Capability: memory.backend** -- Lightweight local backend. Interesting for AGH's SQLite approach. | -| **RetainDB** | Cloud (paid) | Hybrid search (Vector + BM25 + Reranking). 7 memory types. Delta compression. | **Capability: memory.backend** -- Cloud backend option. | -| **Mem0** | Cloud | Fastest setup, free tier. Simple extraction. | **Capability: memory.backend** -- Easy onboarding backend. | -| **ByteRover** | Local Markdown | Human-readable, inspectable memory stored as Markdown files. | **Capability: memory.backend** -- Debug-friendly backend. | -| **OpenViking** | Local | Tiered memory loading (L0/L1/L2) for token efficiency. | **Capability: memory.backend** -- Tiered loading is a smart optimization. | - -### Skills System - -| Skill Category | Examples | Description | AGH Mapping | -|---|---|---|---| -| Apple/macOS | iMessage, Reminders, Notes, FindMy | macOS-specific automation. Platform-gated (only loads on macOS). | **Resource: skill** -- Platform-conditional skills. AGH could gate skills by OS/platform. | -| Agent Orchestration | Multi-agent workflows, coding agent spawning | Skills for delegating to and coordinating with other agents. | **Resource: skill** + **Action: session** -- Multi-agent coordination skills. | -| Data Science | Jupyter, data analysis, visualization | Interactive exploration and notebook-based workflows. | **Resource: skill** -- Domain knowledge skills. | -| Creative | ASCII art, hand-drawn diagrams, visual design | Creative output skills. | **Resource: skill** -- Domain knowledge skills. | -| DevOps | Infrastructure automation | CI/CD, deployment, infrastructure skills. | **Resource: skill** -- Domain knowledge skills. | -| Media | YouTube transcripts, GIF search, music gen, audio viz | Media processing and generation. | **Resource: skill** -- Domain knowledge skills. | -| MLOps | Model hub, GPU cloud, eval benchmarks, quantization | ML workflow automation. | **Resource: skill** -- Domain knowledge skills. | -| Smart Home | Light/switch/sensor control | Home automation skills. | **Resource: skill** -- Domain knowledge skills. | -| Social Platforms | Posting, reading, monitoring | Social media automation. | **Resource: skill** -- Domain knowledge skills. | - -### Community Extensions (Selected from 80+) - -| Extension | Author | Status | Description | AGH Mapping | -|---|---|---|---|---| -| **hermes-workspace** | outsourc-e | Production | Web-based GUI: chat, terminal, memory browser, skills manager, inspector | **Resource: MCP** / Web UI -- AGH already has web UI via HTTP/SSE. Workspace concept maps to AGH's web layer. | -| **mission-control** | builderz-labs | Production | Agent fleet orchestration dashboard. Dispatch tasks, track costs, coordinate multi-agent workflows. 3.7k stars. | **Action: session** + **observe** -- Fleet management is a natural AGH extension for multi-session orchestration. | -| **hermes-payguard** | nativ3ai | Experimental | USDC/x402 payment plugin with spending limits and approval flows | **Capability: permission.gate** -- Payment gating/approval as a permission gate. | -| **hindsight** (plugin) | Vectorize | Production | Long-term memory layer. retain/recall/reflect workflows. 8.3k stars. | **Capability: memory.backend** -- Memory backend plugin. | -| **hermes-web-search-plus** | robbyczgw-cla | Beta | Multi-provider web search with intelligent routing (Serper, Tavily, Exa) | **Resource: MCP** -- Search aggregation as an MCP server. | -| **lintlang** | roli-lpci | Beta | Static linter for agent configs/prompts. HERM v1.1 scoring. | **Resource: tool** -- Config validation tool. Could be a pre-session hook. | -| **hermes-plugins** (4-pack) | 42-evey | Beta | Goal management, inter-agent bridge, model selection, cost control | **Multiple** -- Each maps to different AGH dimensions. | -| **hermes-skill-factory** | community | Beta | Auto-generates SKILL.md files from successful workflows | **Resource: skill** -- Skill auto-generation. High value for AGH's skills system. | -| **hermes-weather-plugin** | FahrenheitResearch | Beta | Professional weather with NWS model imagery, NEXRAD radar | **Resource: MCP** -- Domain-specific MCP server. | -| **hermes-agent-acp-skill** | Rainhoole | Beta | Multi-agent delegation bridging Hermes, Codex, and Claude Code | **Resource: skill** + **Capability: agent.driver** -- Cross-agent delegation. Directly relevant to AGH's ACP model. | -| **Anthropic-Cybersecurity-Skills** | community | Production | 734+ security skills mapped to MITRE ATT&CK. 3.6k stars. | **Resource: skill** -- Security skill library. | -| **autonovel** | NousResearch | Production | Autonomous novel-writing pipeline (100k+ words) | **Resource: skill** -- Long-running workflow skill. | -| **hermes-agent-self-evolution** | NousResearch | Research | Evolutionary self-improvement via DSPy and GEPA | **Capability: observe.exporter** -- Self-improvement pipeline. Research-grade. | -| **HermesHub** | amanning3390 | Production | Curated skills marketplace with security scanning (65+ threat rules), creator marketplace, x402 payments | **Architecture** -- Skills marketplace concept for AGH. | -| **vessel-browser** | unmodeled-tyler | Experimental | AI-native Linux browser with MCP control | **Resource: MCP** -- Browser as MCP server. | -| **orahermes-agent** | jasperan | Production | Oracle AI Agent Harness with OCI GenAI integration | **Capability: agent.driver** -- Enterprise driver variant. | -| **portable-hermes-agent** | rookiemann | Beta | Windows desktop app bundling 100 tools, GUI, local models | **Architecture** -- Desktop packaging concept. | - -### MCP Integration Details - -| Feature | Description | AGH Mapping | -|---|---|---| -| MCP Client (native-mcp) | Auto-discovers MCP servers, registers tools, supports stdio + HTTP transports | **Resource: MCP** -- AGH already supports MCP. Enhance with auto-discovery. | -| MCP Server Mode | Hermes exposes its messaging capabilities as an MCP server (list conversations, read history, send messages) | **Resource: MCP** -- AGH could expose session/memory/observe as MCP server. | -| Dynamic tool updates | Handles `notifications/tools/list_changed` for runtime tool registry updates | **Resource: MCP** -- Dynamic tool refresh. | -| Security filtering | Allow/block lists and attribute-based rules for MCP tools | **Capability: permission.gate** -- MCP tool filtering as a permission gate. | -| OAuth 2.1 PKCE | Full OAuth flow for MCP server authentication | **Resource: MCP** -- Auth for MCP servers. | -| OSV Malware Scanning | Automatic vulnerability scanning of MCP extension packages | **Capability: content.validate** -- Security scanning for extensions. | -| IDE Integration | VS Code, Zed, JetBrains can register MCP servers that Hermes picks up | **Resource: MCP** -- IDE-sourced MCP server discovery. | - ---- - -## Detailed Analysis of High-Impact Extensions - -### 1. Pluggable Memory Backends - -**What Hermes does:** Memory is abstracted behind a provider ABC (Abstract Base Class). Eight backends implement it, from local SQLite (Holographic) to cloud services (Hindsight, Honcho). Memory providers hook into `pre_llm_call` to inject recalled context and `post_llm_call` to retain new information. The agent sees a unified interface regardless of backend. - -**Why it matters for AGH:** AGH already has `memory.backend` as a Capability dimension. Hermes proves that the community will build diverse memory backends if the interface is clean. The key insight is that memory providers need lifecycle hooks (pre/post LLM call) to be truly useful -- simple CRUD is not enough. - -**AGH adaptation:** -- Define a `MemoryBackend` interface in `internal/memory/` with `Recall(ctx, query) -> []Memory` and `Retain(ctx, turn) error` methods -- Wire it into the session lifecycle via the existing hook system -- Ship Holographic-style SQLite backend as default (zero deps, local-first) -- Allow registration of additional backends via the plugin system - -### 2. Cron/Scheduled Sessions - -**What Hermes does:** A built-in cron scheduler lets users define recurring tasks in natural language. Jobs can attach skills, use specific tools, and deliver results to any messaging platform. Jobs support pause/resume/edit. - -**Why it matters for AGH:** AGH sessions are currently user-initiated. Scheduled sessions enable autonomous operation: nightly code reviews, morning briefings, periodic health checks, automated testing runs. This is a natural extension of AGH's session lifecycle. - -**AGH adaptation:** -- Add a `scheduler` package under `internal/` with cron expression parsing -- Integrate with `internal/session` to create sessions on schedule -- Persist job definitions in `globaldb` -- Expose via UDS API for CLI management and HTTP API for web UI -- Map to **Resource: hook** (cron trigger) + **Action: session.create** - -### 3. Subagent Delegation with Isolated Context - -**What Hermes does:** The `delegate_task` tool spawns child agent instances with isolated context, restricted toolsets, and their own terminal sessions. Up to 3 concurrent subagents. Results are collected with zero context cost to the parent. - -**Why it matters for AGH:** AGH already manages sessions, but parent-child session relationships and context isolation are not yet modeled. Delegation enables complex workflows: a parent session spawns specialized child sessions for parallel tasks, collects results, and synthesizes. - -**AGH adaptation:** -- Add parent-child session relationships in `internal/session` -- Add a `delegate` action to the Host API -- Child sessions inherit parent's workspace but get restricted tool access -- Results flow back via the observe system -- Map to **Action: session.delegate** + **Capability: agent.driver** (child driver selection) - -### 4. Skill Auto-Generation (Skill Factory) - -**What Hermes does:** After completing a task successfully, the agent analyzes its steps, identifies reusable patterns, and writes a SKILL.md file capturing the workflow. Next time a similar task arises, it loads the skill. Every 15 tasks, the agent evaluates and refines skills. - -**Why it matters for AGH:** This is the "self-improving" core of Hermes. For AGH, it means agents can build institutional knowledge over time. A DevOps agent that deploys 50 times creates a deployment skill that captures all edge cases. - -**AGH adaptation:** -- Add skill generation to `internal/skills/` triggered by session completion hooks -- Use the observe system to capture successful session trajectories -- LLM-based skill extraction as a post-session hook -- Store generated skills in workspace-scoped skill directory -- Map to **Resource: skill** (auto-generated) + **Resource: hook** (session.end trigger) - -### 5. MCP Server Mode (Exposing Agent Capabilities) - -**What Hermes does:** Hermes can act as an MCP server, exposing its messaging capabilities to other MCP clients. Other agents (Claude Code, Codex, Cursor) can use Hermes's messaging, conversation history, and platform delivery as tools. - -**Why it matters for AGH:** AGH could expose its session management, memory, skills, and observe capabilities as MCP tools. This makes AGH a "capability provider" for any MCP-compatible agent, not just a harness for running agents. - -**AGH adaptation:** -- Add MCP server mode to `internal/api/` alongside HTTP and UDS servers -- Expose key Host API actions as MCP tools: `agh_create_session`, `agh_query_memory`, `agh_list_skills`, `agh_get_events` -- This makes AGH composable with other agent systems -- Map to **Resource: MCP** (server role) + **Action: all Host API actions** - -### 6. Security Scanning for Extensions (Skills Guard) - -**What Hermes does:** All hub-installed skills pass through a security scanner checking 65+ threat rules across 8 categories: data exfiltration, prompt injection, destructive commands, obfuscation, hardcoded secrets, network abuse, env abuse, supply-chain signals. Critical findings block installation. - -**Why it matters for AGH:** As AGH's extension ecosystem grows, untrusted extensions become a risk vector. A validation layer for skills, hooks, and MCP servers prevents malicious or buggy extensions from compromising the agent or host system. - -**AGH adaptation:** -- Add a `validate` package under `internal/skills/` or a general `internal/security/` -- Implement pattern-based scanning for skill content before loading -- Gate MCP server connections through permission checks -- Map to **Capability: content.validate** + **Capability: permission.gate** - -### 7. Multi-Platform Messaging Gateway - -**What Hermes does:** A single gateway process handles 14 platform adapters (Telegram, Discord, Slack, WhatsApp, Signal, Feishu/Lark, WeCom, DingTalk, SMS/Twilio, Mattermost, Matrix, Webhook, Home Assistant, CLI). Cross-platform conversation continuity -- start on Telegram, continue on Discord. - -**Why it matters for AGH:** AGH currently exposes HTTP/SSE (web) and UDS (CLI). Adding messaging platform adapters would make AGH accessible from anywhere, enabling always-on agent availability. - -**AGH adaptation:** -- Add a `gateway` package under `internal/api/` with adapter interface -- Each platform adapter implements message receive/send -- Route incoming messages to session creation/resumption -- Map to **Resource: hook** (message adapters) + **Action: session** - -### 8. Credential Pool with Rotation - -**What Hermes does:** Same-Provider Credential Pools let you configure multiple API keys for the same provider. Thread-safe least-used strategy distributes load. 401 failures trigger automatic rotation. - -**Why it matters for AGH:** Multi-key management is essential for production deployments where rate limits and key rotation are concerns. AGH agents making many API calls benefit from automatic key distribution. - -**AGH adaptation:** -- Add credential pool support in `internal/config/` -- Thread-safe rotation with `sync.RWMutex` -- Automatic failover on auth errors -- Map to **Capability: agent.driver** (provider credential management) - ---- - -## Key Takeaways for AGH Extension Ideas - -### Highest-Priority Extensions (Immediate Value) - -1. **Pluggable memory backends** -- AGH already has the `memory.backend` dimension. Ship a clean interface and one or two backends (SQLite-based local, plus one cloud option). The pre/post LLM call hook pattern is essential. - -2. **Cron/scheduled sessions** -- Natural extension of AGH's session lifecycle. Enables autonomous operation without user initiation. Relatively straightforward to implement with AGH's existing session manager. - -3. **Subagent delegation** -- AGH manages sessions; parent-child relationships and context isolation unlock complex multi-step workflows. This is a differentiator. - -4. **MCP server mode** -- Expose AGH's Host API as MCP tools so other agents can use AGH as a capability provider. Composability multiplier. - -### Medium-Priority Extensions (Ecosystem Growth) - -5. **Skill auto-generation** -- Self-improving skills from session trajectories. Requires observe system maturity but delivers compounding value. - -6. **Security scanning for extensions** -- Content validation for skills and MCP servers. Important as the extension ecosystem grows. - -7. **Credential pool/rotation** -- Production-grade key management. Important for reliability. - -8. **Platform-conditional resource loading** -- Skills/tools that only load on specific OS/platforms (like Hermes's macOS-only skills). - -### Lower-Priority but Interesting (Future Phases) - -9. **Multi-platform messaging gateway** -- Telegram/Discord/Slack adapters. High effort, niche demand initially. - -10. **RL training pipeline** -- Trajectory generation and model fine-tuning from agent interactions. Research-grade, relevant for Phase 3. - -11. **Voice mode** -- STT/TTS pipeline. Niche but differentiating for certain use cases. - -12. **Skills marketplace** -- Community skill distribution with security scanning. Requires ecosystem scale. - -### Architectural Lessons from Hermes - -- **Hook-based extension is king.** Hermes's four lifecycle hooks (`pre_llm_call`, `post_llm_call`, `on_session_start`, `on_session_end`) plus two tool hooks (`pre_tool_call`, `post_tool_call`) enable the vast majority of extensions without touching core code. AGH's hook system should prioritize these six hook points. - -- **Memory providers need lifecycle integration, not just CRUD.** The ability to inject context before LLM calls and retain information after is what makes memory backends actually useful. Simple read/write APIs are insufficient. - -- **Skills as markdown documents with progressive disclosure** is the winning pattern. Low barrier to create (just write markdown), easy to share, and the progressive disclosure pattern minimizes token waste. - -- **Plugin discovery from three sources** (user home, project directory, package registry) covers all use cases: personal customization, project-specific tools, and community distribution. - -- **MCP dual-role (client + server)** makes the agent composable. Being only an MCP client limits the agent to consuming tools; being also an MCP server makes it a building block for larger systems. - -- **Security scanning is not optional.** Hermes learned this early and gates all community extensions through 65+ threat rules. AGH should build this in from the start, not bolt it on later. - ---- - -## Sources - -- [NousResearch/hermes-agent GitHub](https://github.com/nousresearch/hermes-agent) -- [Hermes Agent Documentation](https://hermes-agent.nousresearch.com/docs/) -- [Tools & Toolsets Reference](https://hermes-agent.nousresearch.com/docs/user-guide/features/tools) -- [Skills System](https://hermes-agent.nousresearch.com/docs/user-guide/features/skills/) -- [MCP Integration](https://hermes-agent.nousresearch.com/docs/user-guide/features/mcp) -- [Memory Providers](https://hermes-agent.nousresearch.com/docs/user-guide/features/memory-providers) -- [RL Training](https://hermes-agent.nousresearch.com/docs/user-guide/features/rl-training) -- [Voice Mode](https://hermes-agent.nousresearch.com/docs/user-guide/features/voice-mode/) -- [Home Assistant Integration](https://hermes-agent.nousresearch.com/docs/user-guide/messaging/homeassistant) -- [Plugin Guide](https://hermes-agent.nousresearch.com/docs/guides/build-a-hermes-plugin/) -- [awesome-hermes-agent](https://github.com/0xNyk/awesome-hermes-agent) -- [Hermes Agent Ecosystem Map](https://hermes-ecosystem.vercel.app/) -- [HermesHub Skills Marketplace](https://github.com/amanning3390/hermeshub) -- [Hindsight Memory Provider](https://hindsight.vectorize.io/sdks/integrations/hermes) -- [Hermes Agent v0.5.0 Release](https://github.com/NousResearch/hermes-agent/releases/tag/v2026.3.28) -- [Hermes Agent v0.7.0 Release](https://github.com/NousResearch/hermes-agent/releases/tag/v2026.4.3) -- [Bundled Skills Catalog](https://hermes-agent.nousresearch.com/docs/reference/skills-catalog) -- [Creating Skills](https://hermes-agent.nousresearch.com/docs/developer-guide/creating-skills/) -- [Architecture](https://hermes-agent.nousresearch.com/docs/developer-guide/architecture/) -- [Hermes Agent on DEV Community](https://dev.to/arshtechpro/hermes-agent-a-self-improving-ai-agent-that-runs-anywhere-2b7d) -- [Hermes Agent Memory Explained (Vectorize)](https://vectorize.io/articles/hermes-agent-memory-explained) -- [Memory Providers Compared (Vectorize)](https://vectorize.io/articles/hermes-agent-memory-providers-compared) diff --git a/.compozy/tasks/ext-ideas/research/integrations.md b/.compozy/tasks/ext-ideas/research/integrations.md deleted file mode 100644 index e9be48f71..000000000 --- a/.compozy/tasks/ext-ideas/research/integrations.md +++ /dev/null @@ -1,333 +0,0 @@ -# AGH Extension Ideas — Third-Party Integrations - -**Date**: 2026-04-11 -**Sources**: 4 parallel research agents covering DevOps/CI, Communication/Productivity, Data/AI/Search, and Browser/Media/Specialized integrations -**Purpose**: Catalog concrete third-party integrations that could be built as AGH extensions - ---- - -## Executive Summary - -Four parallel research agents surveyed the MCP ecosystem (21,000+ servers on Glama.ai) and mapped **120+ third-party integrations** across 12 categories. The key finding: **~80% of integrations have existing MCP servers** that AGH can wrap as subprocess extensions with minimal effort. The remaining 20% need custom extensions built from REST APIs. - -AGH's differentiator over standalone MCP servers is the **Host API** — extensions can combine external tool access with session memory, skills, observe events, and cross-tool orchestration to create stateful, context-aware workflows. - ---- - -## Priority Summary — Top 30 Integrations - -### Tier 1: Ship First (highest impact, production-ready MCP servers) - -| # | Integration | Category | MCP Status | Use Case | -|---|---|---|---|---| -| 1 | **GitHub** | DevOps | Official | PR lifecycle, issue management, code review automation | -| 2 | **Slack** | Communication | Official (47 tools) | Team Q&A bot, incident coordination, deploy notifications | -| 3 | **Linear** | Project Mgmt | Community | Ticket-to-PR automation, sprint ops, bug triage | -| 4 | **Notion** | Knowledge Base | Official | Living docs, research compilation, sprint planning | -| 5 | **Sentry** | Monitoring | Official | Error alert → investigate → fix → PR pipeline | -| 6 | **Playwright** | Browser | Official (Microsoft) | E2E testing, web scraping, form automation | -| 7 | **Supabase** | Database | Official (20+ tools) | Full BaaS: DB, auth, storage, edge functions | -| 8 | **Firecrawl** | Web Scraping | Official | Web-to-markdown, site crawling, content extraction | -| 9 | **GitHub Actions** | CI/CD | Community | CI monitoring, failure diagnosis, workflow optimization | -| 10 | **Stripe** | Finance | Official (25 tools) | Billing ops, subscription mgmt, revenue reports | - -### Tier 2: Build Next (strong value, mature ecosystem) - -| # | Integration | Category | MCP Status | Use Case | -|---|---|---|---|---| -| 11 | **Datadog** | Monitoring | Official (GA) | Observability investigation, latency diagnosis | -| 12 | **Google Workspace** | Productivity | Community (100+ tools) | Email, calendar, docs, sheets automation | -| 13 | **Figma** | Design | Official (Code Connect) | Design-to-code, component sync, design review | -| 14 | **Jira + Confluence** | Project Mgmt | Official (Atlassian) | Enterprise issue tracking, knowledge management | -| 15 | **Neon** | Database | Official | Branch-safe migrations, query tuning | -| 16 | **Terraform** | Infrastructure | Official (HashiCorp) | IaC provisioning, plan/apply workflows | -| 17 | **Kubernetes** | Infrastructure | Multiple | Pod debugging, deployment management, log analysis | -| 18 | **Snyk** | Security | Official (11 tools) | SAST, SCA, container scanning, SBOM | -| 19 | **SonarQube** | Security | Official (423 stars) | Code quality gates, tech debt tracking | -| 20 | **Brave Search** | Web Search | Official | Privacy-first research, error investigation | - -### Tier 3: Differentiate (strategic value, growing demand) - -| # | Integration | Category | MCP Status | Use Case | -|---|---|---|---|---| -| 21 | **Grafana** | Monitoring | Official | Dashboard-driven diagnosis, anomaly detection | -| 22 | **PagerDuty** | Monitoring | Community | On-call copilot, incident lifecycle management | -| 23 | **Ollama** | AI/ML | Community | Local model inference, privacy-sensitive ops | -| 24 | **OpenRouter** | AI/ML | Community | Multi-model gateway, cost-optimized inference | -| 25 | **n8n** | Automation | Community | Self-hosted workflow automation (1,396 nodes) | -| 26 | **Home Assistant** | IoT | Official | Smart home control, energy monitoring | -| 27 | **AWS S3** | Cloud Storage | Official | File management, data pipeline triggers | -| 28 | **Exa** | Web Search | Community | Neural semantic search for research | -| 29 | **PostHog** | Analytics | Official | Product analytics, feature flags, experiments | -| 30 | **Twitter/X** | Social Media | Community | Social media management, brand monitoring | - ---- - -## Detailed Integration Catalog - -### 1. DevOps & Developer Tools - -| Integration | MCP Server | Tools | Key Capability | Example AGH Workflow | -|---|---|---|---|---| -| **GitHub** | Official + community | Full API | PRs, issues, code search, branches, Actions | Agent receives Linear ticket → researches codebase → implements fix → opens PR → monitors CI → responds to review comments | -| **GitLab** | Official | MR, pipelines, issues | Merge requests, CI pipelines, code browsing | Pipeline fails → agent reads logs → correlates with recent MR → auto-fixes or creates issue with root cause | -| **GitHub Actions** | Community | Workflow mgmt | Trigger, cancel, rerun workflows, read logs | Agent monitors builds → diagnoses flaky tests → detects failure patterns → suggests workflow optimizations | -| **CircleCI** | Official | Failure diagnosis | Error summaries, flaky test detection, rollbacks | Build fails → agent diagnoses → correlates with commits → creates fix PR or triggers rollback | -| **Jenkins** | Official plugin | CI/CD automation | Job management, build logs, pipeline control | Enterprise CI management with complex multi-stage pipelines | -| **ArgoCD** | K8s MCP Toolkit | GitOps | App sync, deployment status, rollback | Agent monitors sync status → detects drift → checks pod health → applies fix or rolls back | -| **Vercel** | Official handler | Deployments | Deploy, rollback, environment mgmt | Agent deploys to staging → runs smoke tests → promotes to production → posts summary | -| **Railway** | Official | Service mgmt | Deploy, scale, configure environments | Agent manages Railway services lifecycle | -| **SonarQube** | Official (423 stars) | Quality gates | Bugs, vulnerabilities, code smells, tech debt | PR created → agent runs analysis → auto-fixes simple issues → blocks merge if quality gate fails | -| **Snyk** | Official (11 tools) | Security scanning | SAST, SCA, IaC, container, SBOM, AI-BOM | Nightly scans → triage by severity → auto-PR for critical vulns → SBOM for compliance | -| **Semgrep** | Built into binary | Static analysis | Custom rules, vulnerability detection | Pre-commit scanning → inline PR comments with fix suggestions | -| **Terraform** | Official (HashiCorp) | IaC | Registry, plan, apply, workspace mgmt | Agent generates HCL → runs plan → presents for approval → applies → updates docs | -| **Pulumi** | Official | IaC (code-based) | Infrastructure in Go/TS/Python | Agent writes Pulumi Go code for infrastructure changes | -| **Kubernetes** | Multiple (kubectl, k8m, Lens) | Cluster mgmt | Pods, logs, events, helm, istio | Alert → agent checks pods → reads logs → identifies OOM kill → scales up → creates post-mortem | -| **AWS** | Official (60+ servers) | Cloud ops | Lambda, ECS, S3, EC2, RDS, CloudWatch | Agent monitors cloud costs → identifies unused resources → proposes cleanup with savings | -| **Docker** | Community | Container mgmt | Build, run, manage containers | Agent builds images → runs tests in containers → pushes to registry | -| **Dependabot/Renovate** | **None (build opportunity)** | — | Dependency update management | Agent monitors deps → creates grouped PRs → runs security scans → auto-merges safe patches | - -### 2. Communication & Messaging - -| Integration | MCP Server | Tools | Key Capability | Example AGH Workflow | -|---|---|---|---|---| -| **Slack** | Official (47 tools, GA Feb 2026) | Full workspace | Channels, messages, threads, canvases, search | Agent monitors #help-engineering → researches codebase → posts threaded answer with code refs | -| **Discord** | Community (multiple) | Server mgmt | Channels, messages, forums, reactions | Community support bot → searches docs + past issues → provides answers | -| **Microsoft Teams** | Official (Work IQ) | Chat/channels | Create chats, post messages, manage channels | Meeting prep agent → pulls docs from SharePoint → posts briefing to Teams channel | -| **Telegram** | Community (multiple) | Bot API + MTProto | Messaging, media, groups | Ops notification pipeline → deployment status, health alerts, CI results | -| **WhatsApp** | Community (beta) | Web API | Send/receive messages | Customer response agent → looks up CRM → drafts contextual replies | -| **Email (Gmail)** | Community (100+ tools) | Full Gmail API | Send, read, search, label, filter | Email triage → categorize by urgency → draft routine replies → escalate important ones | -| **Email (Outlook)** | Official (Work IQ) | Graph API | Messages, calendar, files | Report distributor → generates status reports → formats as email → sends to stakeholders | -| **Matrix** | **None (build opportunity)** | — | Decentralized messaging | Self-hosted comms agent for privacy-focused organizations | - -### 3. Productivity & Project Management - -| Integration | MCP Server | Tools | Key Capability | Example AGH Workflow | -|---|---|---|---|---| -| **Notion** | Official | Full API | Pages, databases, blocks, search | Code changes → agent auto-updates Notion docs → cross-references existing pages | -| **Obsidian** | Community (60+ servers) | Vault access | Read, write, search, tags, backlinks | Personal knowledge agent → auto-creates notes from conversations → links related concepts | -| **Google Workspace** | Community (100+ tools) | Full suite | Gmail, Calendar, Docs, Sheets, Drive | Meeting notes agent → records action items → creates Doc → assigns tasks → sends follow-ups | -| **Microsoft 365** | Official + community | Full suite | Word, Excel, SharePoint, OneDrive, Teams | Onboarding automator → sets up accounts → creates folders → sends welcome email | -| **Linear** | Community | Full API | Issues, projects, cycles, teams | Auto-ticket from TODO/FIXME → sprint reporter → bug-to-fix pipeline → PR-to-ticket linker | -| **Jira** | Official (Atlassian Rovo) | OAuth 2.1 | JQL, epics, sprints, transitions | Ticket auto-population → enriches with codebase context → cross-system sync | -| **Confluence** | Official (Atlassian Rovo) | OAuth 2.1 | Pages, spaces, search | Runbook maintainer → architecture doc generator → post-mortem writer | -| **Asana** | Official (mcp.asana.com) | Full API | Tasks, projects, sections, custom fields | Task breakdown agent → high-level description → subtasks with estimates and dependencies | -| **Monday.com** | Official | GraphQL API | Boards, items, updates, documents | Board automator → external events create/update items automatically | -| **ClickUp** | Community | Broad coverage | Tasks, docs, goals, OKRs, chat | OKR tracker → monitors key results → weekly updates → flags at-risk objectives | -| **Shortcut** | Official (hosted) | OAuth | Stories, Epics, Docs, iterations | Story enricher → researches codebase → adds technical details and acceptance criteria | -| **Figma** | Official (Code Connect) | Design data | Nodes, auto-layout, variants, tokens | Design-to-code → reads frame → maps to codebase components → generates production React | -| **Miro** | Official (beta) | Board mgmt | Elements, boards, collaboration | Architecture diagrammer → creates system diagrams from codebase analysis | -| **Excalidraw** | Official + community | Canvas toolkit | Elements, real-time sync, WebSocket | Architecture sketch → generates diagrams from natural language descriptions | - -### 4. Databases & Data - -| Integration | MCP Server | Tools | Key Capability | Example AGH Workflow | -|---|---|---|---|---| -| **PostgreSQL** | Multiple (official, Google Toolbox) | SQL access | Read-only queries, schema inspection | Bug investigation → query production DB → find anomaly → write migration + code patch | -| **MySQL** | Community + Google Toolbox | SQL access | Query, schema browsing | Legacy system analysis → understand schema → generate Go structs | -| **MongoDB** | Official | Document CRUD | Aggregation pipelines, Atlas mgmt | Identify slow aggregations → propose index optimizations → validate with explain() | -| **Redis** | Official (Dec 2025) | Data mgmt | Keys, TTLs, pub/sub, search | Debug cache stampede → inspect TTLs → implement jittered expiration fix | -| **Supabase** | Official (20+ tools, OAuth) | Full BaaS | DB, auth, storage, edge functions | Bootstrap entire backend through natural language in single session | -| **Neon** | Official (29 tools) | Serverless Postgres | Branch-based migrations, query tuning | Create branch → test migration → validate queries → merge or rollback | -| **PlanetScale** | Official | MySQL branching | Branches, deploy requests | Feature branch → apply migrations → run integration tests → deploy request | -| **Turso** | Official (--mcp flag) | Edge SQLite | Schema design, data ops | Set up edge database → design schema → generate client code → deploy replicas | -| **DynamoDB** | Official (AWS Labs) | NoSQL modeling | Design patterns, cost analysis, code gen | Analyze MySQL schema → design DynamoDB single-table model → generate Go SDK code | -| **Google MCP Toolbox** | Official (Google) | **40+ data sources** | Postgres, MySQL, MongoDB, Redis, Neo4j, Snowflake... | Single extension → access any database → join data across engines | - -### 5. Vector Stores & Search - -| Integration | MCP Server | Tools | Key Capability | Example AGH Workflow | -|---|---|---|---|---| -| **LanceDB** | Community (multiple) | Embedded vectors | Zero-config, disk-based, semantic search | AGH semantic memory backend → index memories + skills → retrieve during sessions | -| **Qdrant** | Official | Vector search | HNSW, filtered search, code search | Index codebase → semantic code search during debugging → find similar patterns | -| **Milvus** | Official (5 search tools) | Industrial-scale | Billion-vector, hybrid search | Large codebase indexing → semantic function search → incident similarity matching | -| **Pinecone** | Via unified (weave-mcp) | Managed vectors | Billion-scale managed | Documentation indexing → semantic search during coding sessions | -| **Elasticsearch** | Community + Google Toolbox | Full-text + analytics | Index mgmt, document ops | Search application logs → diagnose production errors → generate root cause analysis | -| **Algolia** | Official (Go + Node + hosted) | Enterprise search | Synonyms, ranking, analytics | Configure search indexes → set up ranking rules → test quality → deploy | -| **Meilisearch** | Official | Dev-friendly search | Typo-tolerant, fast | Index knowledge base → instant typo-tolerant search → build UI component | -| **Firecrawl** | Official (98K stars) | Web-to-markdown | Crawl, scrape, media parse | Crawl competitor docs → convert to markdown → index for semantic search | -| **Brave Search** | Official | Privacy-first search | Independent index, no tracking | Web research for debugging → search error messages, Stack Overflow, GitHub issues | -| **Exa** | Community | Neural search | Semantic understanding | Semantic code search → "Go implementation of event sourcing with SQLite" | -| **Tavily** | Community | RAG-optimized | Fact-checked, concise results | Research tasks → find API docs, known issues, best practices | - -### 6. AI/ML Platforms - -| Integration | MCP Server | Tools | Key Capability | Example AGH Workflow | -|---|---|---|---|---| -| **Ollama** | Community | Local inference | Run Llama, Mistral, etc. locally | Local embedding generation → privacy-sensitive code analysis → offline operation | -| **OpenRouter** | Community | 100+ models | Multi-model gateway, cost routing | Dynamic model selection → fast model for quick questions → reasoning model for complex analysis | -| **Hugging Face** | Community | Model discovery | Hub access, model search | Search for embedding model → download via Ollama → benchmark on domain data → configure as memory backend | -| **Replicate** | Community | Hosted inference | Run any OSS model via API | Image generation for mockups → specialized NLP models for code analysis | -| **Groq** | Via OpenRouter | Ultra-fast inference | Custom LPU hardware | Rapid code analysis → real-time review suggestions during pair programming | - -### 7. Monitoring & Observability - -| Integration | MCP Server | Tools | Key Capability | Example AGH Workflow | -|---|---|---|---|---| -| **Sentry** | Official | Error tracking | Stack traces, error frequency, releases | Critical error → query Sentry → search codebase → create fix PR → verify error rate drops | -| **Datadog** | Official (GA Mar 2026) | Full observability | Logs, metrics, traces, APM, SLOs | Latency spike → query traces → correlate with deployment → identify commit → revert PR | -| **Grafana** | Official | Dashboard access | Data sources, incidents, metrics | Query dashboards for anomalies → correlate with deployments → generate incident summaries | -| **PagerDuty** | Community | Incident mgmt | Acknowledge, resolve, reassign, analytics | Alert → acknowledge → gather context from Datadog/Sentry → diagnose → remediate → resolve → post-mortem | - -### 8. Automation & Workflow - -| Integration | MCP Server | Tools | Key Capability | Example AGH Workflow | -|---|---|---|---|---| -| **Zapier** | Official | 8,000+ apps | Cross-app automation, auth handling | Code review → create Jira ticket + Slack summary + Google Sheet update in one session | -| **n8n** | Community | 1,396 nodes | Self-hosted, privacy-first | Design workflow → monitor GitHub for issues → classify with AI → assign → track | -| **Inngest** | Official | Durable execution | Go SDK, event-driven, AgentKit | Orchestrate deployment pipeline → test → build → stage → smoke test → promote → notify | -| **Temporal** | Community | Durable workflows | Retry logic, long-running processes | Data pipeline orchestration → retry handlers → compensation → monitoring | -| **Pipedream** | Community | 2,700+ apps | Code-first (Python/Node/Go/Bash) | Webhook listener → process alerts → trigger remediation | - -### 9. Finance - -| Integration | MCP Server | Tools | Key Capability | Example AGH Workflow | -|---|---|---|---|---| -| **Stripe** | Official (25 tools) | Payment lifecycle | Customers, subscriptions, invoices, refunds | Monitor churn → generate invoices → create discount codes → revenue reports | -| **Coinbase** | Official | Crypto ops | Wallet mgmt, onramps, stablecoins | Portfolio management → track balances → execute trades → tax reporting | -| **Yahoo Finance** | Community | Market data | Prices, fundamentals, earnings | Stock screening → earnings analysis → peer comparison → research notes | -| **Plaid** | **None (build opportunity)** | — | Bank account aggregation | Connect bank accounts → categorize transactions → spending patterns → budget reports | - -### 10. Browser & Media - -| Integration | MCP Server | Tools | Key Capability | Example AGH Workflow | -|---|---|---|---|---| -| **Playwright** | Official (Microsoft) | Full browser | Click, fill, navigate, screenshot | E2E testing → navigate pages → assert content → report results via observe API | -| **Browserbase** | Official | Cloud browsers | Bot evasion, managed sessions | Competitive intelligence → scrape JS-heavy pricing pages → extract structured data | -| **YouTube** | Community (490+ stars) | Transcripts | Transcript extraction, search | Research playlist → extract transcripts → summarize → build knowledge base | -| **DALL-E / Flux** | Community | Image generation | Text-to-image | Generate diagrams, illustrations, hero images for documentation | -| **ElevenLabs** | Community | TTS | Voice synthesis | Convert blog posts to podcast-style audio narration | -| **Spotify** | Community (93 tools) | Music control | Playback, playlists, catalog search | Curate workout playlists → analyze track features → learn preferences over time | - -### 11. Social Media - -| Integration | MCP Server | Tools | Key Capability | Example AGH Workflow | -|---|---|---|---|---| -| **Twitter/X** | Community (8+ servers) | Posting, search | Tweets, threads, mentions, analytics | Draft tweets from product updates → schedule threads → monitor engagement → weekly analytics | -| **Bluesky** | Community (57 tools) | Full AT Protocol | Posting, firehose, social graph | Cross-post content → monitor brand mentions → audience analytics | -| **LinkedIn** | Via aggregators | Posting | Articles, engagement tracking | Draft LinkedIn articles from internal knowledge → optimize posting times | -| **recast-mcp** | Community | Multi-platform | URL → platform-specific content | Blog post → LinkedIn article + Twitter thread + Reddit post + newsletter | - -### 12. Specialized & Niche - -| Integration | MCP Server | Tools | Key Capability | Example AGH Workflow | -|---|---|---|---|---| -| **Home Assistant** | Official (built-in) | IoT control | Devices, automations, energy | Manage daily routines → lighting, HVAC, security → energy reports | -| **AWS S3** | Official (multiple) | Object storage | Buckets, objects, presigned URLs | Monitor for new files → process CSVs → generate presigned URLs → manage lifecycle | -| **Google Maps** | Official (18+ tools) | Geolocation | Geocoding, routing, POI search | Optimize multi-stop delivery routes → calculate ETAs → generate static maps | -| **Mapbox** | Official | Geospatial | Routing, isochrones, map matching | Real estate analysis → isochrone maps → commute times → nearby amenities | -| **SendGrid** | Community (14+ tools) | Email marketing | Campaigns, templates, deliverability | Create campaigns from briefs → manage segments → A/B test subjects | -| **Meta-MCP (Magg)** | Community | Self-extending | Discover + install MCP servers at runtime | Agent lacks a tool → discovers and installs appropriate MCP server → uses it → permanently learns | -| **Agoragentic** | Community | Agent marketplace | Agent-to-agent services + crypto payments | Agent hires specialized agents for subtasks → pays via USDC on Base L2 | - ---- - -## Build-From-Scratch Opportunities - -These integrations have no existing MCP server and represent differentiation for AGH: - -| Integration | What to Build | Why It Matters | -|---|---|---| -| **Dependabot/Renovate** | Dependency update mgmt with security scanning | Combining updates + security + auto-merge is unique | -| **GitHub Security Alerts** | Dependabot alerts, secret scanning, code scanning | Notable gap — GitHub's security features lack MCP | -| **Plaid** | Banking data aggregation | Personal finance agent enabler | -| **Matrix** | Decentralized messaging | Serves privacy-focused organizations | -| **MQTT (standalone)** | IoT device communication | Industrial monitoring beyond Home Assistant | -| **Remotion** | Programmatic video in React | Data visualization videos | -| **ConvertKit** | Creator email platform | Creator economy automation | -| **LaunchDarkly** | Feature flag management | Agent-controlled progressive rollouts | -| **Incident.io** | Modern incident management | Growing platform with no MCP | - ---- - -## Recommended Extension Bundles - -### Bundle 1: Development Lifecycle -**Goal**: Ticket → Code → PR → Merged, fully autonomous - -- GitHub MCP (version control, PRs) -- Linear or Jira MCP (issue tracking) -- GitHub Actions MCP (CI monitoring) -- SonarQube + Snyk MCP (quality + security gates) -- Slack MCP (team notifications) - -### Bundle 2: Incident Response -**Goal**: Alert → Diagnose → Fix → Resolve, with cross-tool investigation - -- Sentry MCP (error tracking) -- Datadog MCP (metrics, traces, logs) -- PagerDuty MCP (incident lifecycle) -- Grafana MCP (dashboards) -- Kubernetes MCP (infrastructure) -- Slack MCP (coordination) -- GitHub MCP (fix PRs) - -### Bundle 3: Infrastructure Operations -**Goal**: Provision → Deploy → Monitor → Optimize - -- Terraform MCP (IaC) -- AWS/GCP/Azure MCP (cloud resources) -- Kubernetes + ArgoCD MCP (orchestration) -- Datadog/Grafana MCP (monitoring) -- Slack MCP (notifications) - -### Bundle 4: Knowledge Worker -**Goal**: Research → Document → Share → Keep Updated - -- Notion/Confluence MCP (knowledge base) -- Google Workspace MCP (email, docs, sheets) -- Firecrawl MCP (web research) -- Brave/Exa MCP (search) -- Figma MCP (design context) -- Obsidian MCP (personal knowledge) - -### Bundle 5: Data & Analytics -**Goal**: Query → Analyze → Report → Automate - -- Google MCP Toolbox (40+ data sources) -- Supabase/Neon MCP (primary databases) -- LanceDB/Qdrant MCP (vector search) -- PostHog MCP (product analytics) -- n8n/Inngest MCP (workflow automation) - ---- - -## Architecture Recommendations - -### 1. Thin Wrapper Pattern (Default) -Most integrations have existing MCP servers. AGH extensions wrap them as subprocesses, adding: -- Session context (memory, workspace awareness) -- Observe event emission (audit trail) -- Credential management (TOML config) -- Cross-tool orchestration (compose multiple MCP servers in one workflow) - -### 2. Unified Gateway Pattern (For Categories) -For categories with many providers (databases, search, vector stores), use a single extension that supports multiple backends: -- **Google MCP Toolbox** covers 40+ data sources -- **weave-mcp** covers 11 vector databases -- **MCP Omnisearch** covers 7 search providers -- **Composio** covers thousands of APIs - -### 3. Security Boundaries -- 43% of public MCP servers have command injection vulnerabilities -- 7.6% of ClawHub skills contain dangerous patterns -- AGH extensions must enforce permission boundaries, rate limiting, and audit logging -- Read-only by default; write access requires explicit opt-in -- Credentials managed via TOML config, never hardcoded - -### 4. AGH Differentiator -Unlike standalone MCP servers, AGH extensions can: -- **Remember** — Store findings in session memory for future reference -- **Learn** — Generate skills from successful workflows -- **Orchestrate** — Compose multiple tools across services in a single session -- **Observe** — Record full audit trail of cross-system operations - ---- - -## Sources - -Detailed per-category research files: -- [integrations_devops.md](research/integrations_devops.md) — 37 integrations across DevOps/CI/CD -- [integrations_communication.md](research/integrations_communication.md) — 35 integrations across comms/productivity -- [integrations_data_ai.md](research/integrations_data_ai.md) — 50+ integrations across data/AI/search -- [integrations_specialized.md](research/integrations_specialized.md) — 50 integrations across browser/media/finance/IoT/niche diff --git a/.compozy/tasks/extensability/analysis/analysis_claude_code.md b/.compozy/tasks/extensability/analysis/analysis_claude_code.md deleted file mode 100644 index eb0f90543..000000000 --- a/.compozy/tasks/extensability/analysis/analysis_claude_code.md +++ /dev/null @@ -1,238 +0,0 @@ -# Claude Code Harness Analysis for AGH Extensibility - -## Overview - -Claude Code is a TypeScript agentic CLI that bridges natural-language intent to code and shell operations. Its architecture is a seven-layer stack: Entry Points, Bootstrap/Configuration, Setup, UI Layer, QueryEngine (async-generator core loop), Tool System (50+ tools), and Services/State. The harness manages a disciplined cycle of "send message -> stream response -> execute tools -> loop" with everything else -- the TUI, the service container, the permission engine -- existing to feed that loop. - -This analysis identifies the key features, architectural patterns, and capabilities from Claude Code and classifies each as either **CORE** (essential for any agent OS minimal core) or **EXTENSION** (should be a plugin/extension on top of the core) for AGH's architecture. - -AGH's philosophy is a robust minimal core with a highly extensible plugin system. The classification below applies that lens: features that every ACP-compatible agent session needs regardless of agent type belong in core; features that are domain-specific, agent-specific, or can be composed from core primitives belong as extensions. - -## Key Features Analysis - -### Foundational Architecture - -| Feature | Classification | Rationale | -|---------|---------------|-----------| -| **Core Query/Agent Loop** (async turn cycle: normalize -> call model -> execute tools -> loop) | **CORE** | This is the beating heart of any agent OS. AGH's session package already owns session lifecycle; the turn-based execution loop with tool dispatch is the irreducible minimum for running any ACP agent. Every agent type needs this cycle. | -| **Tool Interface Contract** (uniform schema: identity, input schema, execution, permissions, concurrency metadata) | **CORE** | A uniform tool abstraction is what makes the system extensible without changing the core loop. AGH needs a `ToolDriver` interface (like `AgentDriver`) that all tools implement. The contract must include: name, input validation, execution, read-only/concurrency-safe flags, and permission check. | -| **Tool Registry with Dynamic Loading** | **CORE** | The registry that maps tool names to implementations and supports runtime registration (for MCP tools, plugin tools) is core infrastructure. Without it, extensibility requires recompilation. | -| **Tool Execution Pipeline** (validate -> permission check -> pre-hooks -> execute -> post-hooks -> result truncation) | **CORE** | The ordered pipeline through which every tool call passes is the single enforcement point for safety, validation, and extensibility. This is not optional -- it is how the core guarantees invariants for any extension. | -| **Tool Partitioning** (concurrent reads, serial writes) | **CORE** | Smart concurrency based on `isReadOnly()` and `isConcurrencySafe()` flags is a significant performance optimization that belongs in the core orchestrator. It halves wall-clock latency for read-heavy tool batches and prevents write races. | -| **Message Normalization** (role alternation, tool result hoisting, thinking block rules) | **CORE** | Every ACP provider will have message format requirements. The normalization layer that transforms internal state to provider-compatible format is essential infrastructure that sits between the session store and the model call. | -| **Streaming Response Handling** (async generator yielding events to observers) | **CORE** | AGH already has SSE for the web UI and the notifier pattern for fan-out. The streaming pipeline from model to observers is core -- it is how the daemon surfaces real-time events to all consumers (web UI, CLI, hooks). | -| **Result Truncation / Large Output Handling** (persist to disk, send preview + path) | **CORE** | Preventing a single tool result from consuming the entire context window is a safety invariant. The core should enforce per-tool `maxResultSize` and handle overflow to disk automatically. | - -### Permission and Security Model - -| Feature | Classification | Rationale | -|---------|---------------|-----------| -| **Multi-Level Permission Rule Cascade** (policy > user > project > local > session) | **CORE** | The layered permission system where higher-level sources cannot be overridden by lower ones is essential for any production agent OS, especially one targeting enterprise deployment. AGH's config package already does TOML merge -- extending it to permission rules with cascade semantics is a natural fit. | -| **Permission Decision Waterfall** (deny rules -> tool-specific check -> mode -> allow rules -> classifier -> user prompt) | **CORE** | The ordered evaluation that short-circuits on definitive answers is the enforcement mechanism. Without it, every tool either runs unchecked or requires manual approval. The waterfall structure belongs in core; specific classifiers can be extensions. | -| **Permission Modes** (default, plan, auto, acceptEdits, bypass) | **CORE** | Modes define baseline strictness and are critical for both interactive and automated use. `bypass` mode enables CI/automation; `plan` mode enables safe exploration; `auto` mode reduces prompt fatigue. These are fundamental operational modes, not domain-specific extensions. | -| **Plan Mode as Hard Constraint** (restricts tools to read-only, requires approval to escalate) | **CORE** | Plan mode enforced at the tool layer (not as a suggestion to the model) is a key safety pattern. The core must support tool-scope restriction based on session mode. | -| **Multi-Resolver Race Pattern** (parallel permission resolvers with first-safe-answer-wins) | **EXTENSION** | The sophisticated `createResolveOnce` pattern with parallel resolvers (user click, hook classifier, bridge UI) is an optimization. The core needs a permission resolution interface; the parallel race with multiple resolver types is an advanced capability. | -| **LLM-Based Safety Classifier** (transcript classifier for auto-approve) | **EXTENSION** | Using a separate LLM to classify tool safety is powerful but expensive and model-dependent. The core should define a `PermissionClassifier` interface; the LLM-based implementation is an extension. | -| **Iron Gate** (hardcoded categorical restrictions that no classifier can bypass) | **CORE** | Certain actions must be categorically refused regardless of any classifier, user setting, or mode. A small set of hardcoded deny rules that cannot be overridden is a safety floor that belongs in core. | -| **Permission Explanation** (LLM-generated risk assessment for user prompts) | **EXTENSION** | Natural-language command risk explanation is a UX enhancement that uses side-queries. Not essential for the minimal core. | - -### Hook System (Lifecycle Extensibility) - -| Feature | Classification | Rationale | -|---------|---------------|-----------| -| **Lifecycle Event Bus** (25+ events: PreToolUse, PostToolUse, SessionStart, SessionEnd, UserPromptSubmit, etc.) | **CORE** | The hook event taxonomy is the primary extensibility surface. AGH's notifier pattern is already a typed interface for fan-out -- extending it with a formal lifecycle event bus (pre/post tool, session lifecycle, prompt submission) is core infrastructure that enables all extensions. | -| **Hook Output Protocol** (structured JSON: continue/block, updatedInput, additionalContext, transformedResult) | **CORE** | The protocol by which hooks communicate decisions back to the core loop is the contract. Without a structured protocol, hooks are fire-and-forget side effects. With it, hooks can block, modify, and transform -- making them load-bearing extensibility points. | -| **Hook Types** (command, prompt, agent, http, function) | **EXTENSION** | The five execution engines for hooks are implementations of the hook contract. The core needs to define the `HookExecutor` interface and ship a basic `command` executor. `prompt`, `agent`, `http`, and `function` types are extensions that plug into the same interface. | -| **Hook Matcher Syntax** (regex/glob filters for event-specific keys like tool names) | **CORE** | Matchers determine which hooks fire for which events. A simple but expressive matching syntax (exact name, pipe-separated, glob) is core because it determines hook specificity. | -| **PreToolUse Blocking and Modification** (hooks can block execution or rewrite tool inputs) | **CORE** | The ability for pre-execution hooks to block or modify is essential for verification gates, policy enforcement, and input sanitization. This is not a nice-to-have -- it is how organizations enforce coding standards, security policies, and workflow rules. | -| **PostToolUse Result Transformation** (hooks can redact or augment tool results) | **CORE** | Result transformation enables secret redaction, output enrichment, and audit logging. This is a security-critical capability that belongs in core. | -| **Enterprise Hook Enforcement** (MDM-managed hooks that users cannot remove) | **EXTENSION** | MDM enforcement is an enterprise deployment concern. The core should support hook source precedence; the MDM-specific enforcement is an enterprise extension. | - -### Memory and Session Persistence - -| Feature | Classification | Rationale | -|---------|---------------|-----------| -| **Event-Sourced Session Store** (append-only event log per session) | **CORE** | AGH already has this via `sessiondb` (per-session SQLite event store). This is the foundation of session persistence and replay. | -| **Session Resume / Replay** (restore conversation state from persisted events) | **CORE** | The ability to resume a session from persisted state is fundamental for the daemon model. AGH's transcript package already handles replay message assembly -- this belongs in core. | -| **Tiered Memory Architecture** (conversation history, session memory, instruction files, cross-session auto-memory, team memory) | **CORE (framework) / EXTENSION (implementations)** | The framework for tiered memory (AGH's `memory` package with dual-scope global + workspace) is core. The specific implementations -- auto-extraction subagent, Sonnet-based semantic recall, team memory sync -- are extensions that plug into the memory framework. | -| **Persistent Instruction File** (CLAUDE.md / project-level config loaded every session) | **CORE** | AGH's config package handles TOML loading. A mechanism for per-workspace instruction files that agents receive in their system prompt is core infrastructure. | -| **Background Memory Extraction** (forked subagent extracting facts after each turn) | **EXTENSION** | The extraction subagent is a specific implementation of the memory write path. The core needs a `MemoryWriter` interface; the LLM-based extraction is an extension (and AGH already has `dream consolidation` as its analog). | -| **AutoDream / Memory Consolidation** (periodic background merge, dedup, prune) | **EXTENSION** | AGH already has this in `internal/memory/consolidation`. The consolidation runtime is an extension that uses the core memory and session interfaces. The core provides the scheduling, locking, and memory access primitives. | -| **Semantic Recall** (LLM side-query to select relevant memories per turn) | **EXTENSION** | Using a separate model as a relevance filter is a specific recall strategy. The core defines a `MemoryRecaller` interface; LLM-based semantic recall is one implementation. | -| **Session Memory Summary** (structured summary maintained during conversation for compaction) | **EXTENSION** | Session memory as a pre-built summary for fast compaction is a specific optimization strategy. The core provides the compaction trigger; the SM-Compact strategy is an extension. | - -### Context Management - -| Feature | Classification | Rationale | -|---------|---------------|-----------| -| **Token Counting** (hybrid API + heuristic estimation) | **CORE** | Context budget awareness is essential for any long-running agent session. The core needs a token estimation facility -- even a rough heuristic -- to know when compaction or truncation is needed. | -| **Context Compaction Cascade** (5-layer: tool result budget -> snip -> microcompact -> context collapse -> autocompact) | **CORE (framework) / EXTENSION (strategies)** | The framework that runs compaction strategies in order of increasing cost/loss is core. The specific strategies (snip, microcompact, SM-compact, full conversation compaction) are extensions that register with the framework. The core provides: threshold detection, strategy ordering, circuit breaker, post-compact cleanup. | -| **Static/Dynamic System Prompt Split** (cached prefix + per-request dynamic tail) | **CORE** | Splitting the system prompt into a cacheable static portion and a per-request dynamic portion is a cost and latency optimization that benefits every session. This belongs in the core prompt-building pipeline. | -| **Tool Result Budget** (per-tool maxResultSize with overflow to disk) | **CORE** | Already classified above under Tool Execution Pipeline. | -| **Circuit Breaker** (halt compaction after N consecutive failures) | **CORE** | Preventing infinite retry on compaction failure is a safety mechanism. The core compaction framework should include a circuit breaker. | - -### Plugin and Skills System - -| Feature | Classification | Rationale | -|---------|---------------|-----------| -| **Three-Layer Plugin Reconciliation** (intent in settings, materialization on disk, activation at runtime) | **CORE** | The separation of what-the-user-wants from what-is-installed from what-is-active makes the plugin system robust to partial failures. AGH's skills package should adopt this pattern -- it is the foundation of reliable extension management. | -| **Plugin Lifecycle Operations** (install, uninstall, enable, disable, update) | **CORE** | The CRUD operations for plugins, including the enable/disable vs install/uninstall distinction, are core plugin management. | -| **Marketplace Discovery** | **EXTENSION** | The marketplace UI, browsing, and discovery pipeline are value-added features on top of the core plugin lifecycle. The core needs a plugin registry and loader; marketplace is an extension. | -| **Skills as Markdown Procedures** (SKILL.md with frontmatter for activation, not code) | **CORE** | The concept of skills as prompt-and-procedure pairs (not compiled code) is a key design insight. Skills occupy zero token budget at rest (only metadata visible until activated). AGH already has a skills catalog -- the SKILL.md contract with `description`, `when_to_use`, and `allowed-tools` frontmatter is the right abstraction for the core. | -| **Progressive Disclosure** (skill content materializes into context only when activated) | **CORE** | This is not just a nice optimization -- it is what makes a large skill library practical. The core skill loader must support lazy materialization based on activation, not eager loading. | -| **Skill Improvement** (background process watches for user corrections and proposes skill updates) | **EXTENSION** | Automatic skill refinement based on session corrections is an advanced feature. | -| **Agent Definitions** (markdown files declaring subagent identity, tools, prompts) | **CORE** | Agent definitions are how AGH will support heterogeneous agent types. The markdown-with-frontmatter format for declaring agent capabilities, tool scopes, and system prompts belongs in core. | -| **Plugin-Provided Hooks** | **CORE** | Plugins must be able to register hooks. This is a natural intersection of the plugin and hook systems. | -| **Plugin Policy Enforcement** (allowlist/blocklist per organization) | **EXTENSION** | Enterprise-grade plugin policy enforcement is an enterprise extension. | - -### MCP Integration - -| Feature | Classification | Rationale | -|---------|---------------|-----------| -| **MCP Host Implementation** (connect to multiple servers, expose tools/resources/prompts) | **CORE** | AGH is designed as an ACP-based system. MCP is the standard protocol for agent-to-tool communication. Acting as an MCP host that can connect to external MCP servers and expose their tools identically to built-in tools is core infrastructure. | -| **Tool Namespacing** (`mcp____` convention) | **CORE** | When multiple servers can expose same-named tools, namespacing is essential for unambiguous dispatch. This is a core registry concern. | -| **Transport Abstraction** (stdio, SSE, HTTP, WebSocket, in-process) | **CORE (interface) / EXTENSION (transports)** | The transport interface is core. AGH should ship with `stdio` (most common for local tools) and `SSE/HTTP` (for remote). WebSocket and in-process transports are extensions. | -| **Session Recovery** (auto-reconnect on session expiry, 401 handling) | **CORE** | MCP sessions are stateful and servers restart. Transparent reconnection is essential for reliability in long-running daemon sessions. | -| **Output Size Management** (truncate large MCP results, persist to disk) | **CORE** | Already covered under result truncation -- applies uniformly to MCP and built-in tools. | -| **OAuth Flow for Remote MCP Servers** | **EXTENSION** | Browser-based OAuth for remote MCP servers is a specific authentication pattern. The core needs an MCP auth interface; OAuth is one implementation. | -| **MCP Server Approval Dialog** (user must approve new servers before connection) | **CORE** | Security boundary: preventing a malicious workspace config from silently launching subprocesses. The core must gate MCP server activation on explicit approval. | - -### Agent Swarm and Subagents - -| Feature | Classification | Rationale | -|---------|---------------|-----------| -| **Subagent Spawning** (fork a new agent loop with its own context, tools, and system prompt) | **CORE** | AGH's ACP layer already spawns agents as subprocesses. The ability to spawn subagent sessions -- whether as forked loops, separate processes, or separate ACP instances -- is core to an agent OS. | -| **Three Execution Models** (fork/cache-shared, teammate/process-isolated, worktree/filesystem-isolated) | **CORE (interface) / EXTENSION (models)** | The core needs a `SubagentExecutionModel` interface. The fork model (cache-shared, same process) is core for efficiency. Teammate (separate process with mailbox) and worktree (git isolation) are extensions. | -| **Tool Scope Restriction per Agent** (subagents get a filtered tool set) | **CORE** | Different agents need different capabilities. The ability to filter the tool registry per-agent based on definitions or mode is core. | -| **File-Based Mailbox** (inter-agent communication via filesystem) | **EXTENSION** | The specific IPC mechanism (filesystem mailbox vs UDS vs channels) is an implementation choice. AGH already has UDS for CLI IPC -- agent-to-agent communication can use the same mechanism. The mailbox pattern is an extension. | -| **Plan Approval Flow** (teammate requests leader approval before escalating to act mode) | **CORE** | The two-phase commit where a subagent must get approval before gaining destructive capabilities is a safety pattern. The core needs a mechanism for capability escalation requests between agents. | -| **Shared Task List** (TodoV2: create, update, list tasks across agents) | **EXTENSION** | Task coordination across agents is a specific orchestration pattern. The core provides session state and messaging; shared task management is an extension. | -| **Swarm UI** (terminal spinners, progress tracking for multiple concurrent agents) | **EXTENSION** | The visualization of multi-agent activity is a UI concern. The core emits events; the UI renders them. | -| **Agent Memory Snapshots** (persist/restore agent knowledge across sessions and worktrees) | **EXTENSION** | Memory snapshotting for subagent continuity is an advanced persistence feature. | - -### Settings System - -| Feature | Classification | Rationale | -|---------|---------------|-----------| -| **Hierarchical Settings Cascade** (policy > flag > local > project > user) | **CORE** | AGH's config package already handles TOML loading and merge. Extending it with a formal precedence hierarchy that supports enterprise policy overrides is core infrastructure. | -| **Schema Validation** (Zod-style validation with per-rule error isolation) | **CORE** | Validating configuration against a schema and isolating individual rule errors (so one bad rule does not invalidate the file) is essential for reliability. AGH should use Go struct tags + validation, with the same per-rule isolation principle. | -| **Hot-Reload** (file watcher with stability windows, internal-write suppression) | **CORE** | Settings changes should take effect without daemon restart. The daemon model makes this especially important -- the daemon is long-lived and needs to react to config changes. | -| **MDM / Enterprise Policy Enforcement** (OS-level managed settings that cannot be overridden) | **EXTENSION** | Enterprise MDM integration is a deployment concern. The core supports the precedence hierarchy; MDM-specific readers (plist, registry) are extensions. | -| **Environment Variable Injection** (settings-driven env vars for tool subprocesses) | **CORE** | Tools that spawn subprocesses need controllable environment. The settings system injecting env vars from config is a core feature for corporate proxy, custom paths, and similar concerns. | - -### Observability and Diagnostics - -| Feature | Classification | Rationale | -|---------|---------------|-----------| -| **Event Recording** (structured event logging for every tool call, turn, and lifecycle event) | **CORE** | AGH's `observe` package already handles event recording and health metrics. This is core. | -| **Diagnostic Command** (single entry point showing installation health, conflicts, warnings) | **CORE** | A `doctor` equivalent that surfaces configuration errors, agent definition issues, permission conflicts, and health status is essential for operations. | -| **Telemetry Pipeline** (fan-out to multiple sinks) | **EXTENSION** | The specific telemetry sinks (Datadog, analytics collectors) are deployment-specific. The core provides structured events; telemetry export is an extension. | -| **Feature Flags** | **EXTENSION** | Remote feature flag evaluation is an operational concern, not a core requirement. The core can use build tags and config toggles. | -| **Auto-Update** | **EXTENSION** | Self-update mechanisms are distribution-specific and not part of the agent OS core. | -| **PII Redaction** (regex-based credential scrubbing before any data leaves the machine) | **CORE** | Any system that persists or transmits agent transcripts must scrub credentials. The redaction pipeline belongs in core. | - -### Remote and Bridge System - -| Feature | Classification | Rationale | -|---------|---------------|-----------| -| **Remote Session Control** (local CLI controllable from web UI via authenticated channel) | **EXTENSION** | Remote control is a specific deployment mode. The core exposes HTTP/SSE and UDS APIs; remote bridge is an extension that uses those APIs. | -| **Session Teleport** (move active session between environments) | **EXTENSION** | Session migration across environments is an advanced operational feature. | -| **SSH Tunnel Integration** | **EXTENSION** | SSH-based remote access is a specific transport. | - -### UI and Rendering - -| Feature | Classification | Rationale | -|---------|---------------|-----------| -| **Terminal UI** (React/Ink TUI with permission dialogs, progress, streaming) | **EXTENSION** | AGH uses a web SPA (React 19, Vite, TanStack). The specific UI technology is an extension concern. The core provides HTTP/SSE APIs that any UI consumes. | -| **Permission Dialog UX** (tool-specific approval UI with "always allow" options) | **EXTENSION** | The specific UI for permission requests is a frontend concern. The core provides the permission decision API. | - -## Architectural Patterns Worth Adopting - -### 1. Uniform Tool Interface as Core Abstraction - -**Pattern**: Every capability the agent can invoke -- filesystem, shell, web, MCP, custom -- implements the same interface with uniform schema, permissions, and execution semantics. - -**Why AGH should adopt this**: AGH's ACP layer handles agent spawning, but tool execution within agent sessions needs the same uniformity. Define a `ToolDriver` interface in Go: - -```go -type ToolDriver interface { - Name() string - InputSchema() Schema - IsReadOnly() bool - IsConcurrencySafe() bool - CheckPermissions(ctx context.Context, input any) PermissionDecision - Call(ctx context.Context, input any) (ToolResult, error) -} -``` - -This becomes the extension point for all tool implementations, including MCP-proxied tools. - -### 2. Lifecycle Hook Bus with Structured Protocol - -**Pattern**: A typed event bus with 25+ lifecycle events, where hooks can block, modify, or transform operations via a structured JSON output protocol. - -**Why AGH should adopt this**: AGH's notifier pattern is already a typed fan-out interface. Extending it to a formal hook bus with: -- Pre/post execution events for tool calls -- Session lifecycle events (start, end, resume) -- Permission decision events -- Context management events (pre/post compact) - -The structured output protocol (`continue`, `stopReason`, `updatedInput`, `transformedResult`) is what transforms hooks from passive observers into active participants. This is the pattern that turns AGH from a product into a platform. - -### 3. Three-Layer Extension Reconciliation - -**Pattern**: Separate intent (what the user configured), materialization (what is installed on disk), and activation (what is live in the runtime). - -**Why AGH should adopt this**: AGH's skills catalog should adopt this exact pattern. A skill can be configured but not installed, installed but disabled, or active and running. Each layer reconciles independently, making the system robust to partial failures (corrupted skill file does not crash the daemon). - -### 4. Progressive Disclosure for Skills/Capabilities - -**Pattern**: Skills and agent definitions declare short metadata (name, description, when_to_use) that stays in context permanently. The full content materializes only when activated. - -**Why AGH should adopt this**: With dozens or hundreds of skills, eager loading would blow the context budget. AGH's skill loader should present only metadata to the agent until activation, keeping the per-turn token cost constant regardless of skill library size. - -### 5. Permission Cascade with Short-Circuit Evaluation - -**Pattern**: A waterfall of permission checks ordered from most-restrictive to most-permissive, where each stage can short-circuit with a definitive answer. - -**Why AGH should adopt this**: AGH needs a permission system for tool execution. The waterfall pattern (deny rules first, then tool-specific logic, then mode check, then allow rules, then user prompt) is the right structure because it guarantees that deny rules are always enforced and safe operations auto-approve without user interaction. - -### 6. Smart Concurrency via Tool Metadata - -**Pattern**: Partition tool calls by `isReadOnly()` and `isConcurrencySafe()` -- run reads in parallel, writes serially. - -**Why AGH should adopt this**: AGH manages agent sessions that invoke tools. When an agent requests multiple tool calls in one turn, the daemon should partition them using the same metadata flags. This is a low-effort, high-impact optimization. - -### 7. Static/Dynamic Prompt Split for Cache Efficiency - -**Pattern**: Split system prompts into a rarely-changing static prefix (cacheable) and a per-request dynamic tail. - -**Why AGH should adopt this**: The system prompt for ACP agents includes tool schemas, role instructions, and coding conventions (static) plus environment info, git status, and memory (dynamic). Splitting these lets the API cache the expensive static portion. - -## Extension System Insights - -### What Makes Claude Code's Extension Model Work - -1. **Small, stable core interfaces**: The `Tool` interface, the `HookJSONOutput` protocol, and the `SKILL.md` contract are small and stable. Extensions implement them without needing to understand the rest of the codebase. - -2. **Extensions cannot violate core invariants**: The permission waterfall, the tool execution pipeline, and the hook lifecycle all run in the core. Extensions plug into these pipelines -- they do not bypass them. A malicious plugin cannot skip the permission check because the check happens in the core pipeline, not in the plugin. - -3. **Extensions are declared, not coded (where possible)**: Skills are Markdown files. Agent definitions are Markdown files. Hook configurations are JSON in settings. Permission rules are strings in settings. This low-code approach to extensions makes the system accessible to non-developers and auditable by security teams. - -4. **Progressive complexity**: Simple extensions (a permission rule, a command hook) require zero code. Medium extensions (a skill with a procedure) require Markdown. Complex extensions (a plugin with MCP servers, tools, and hooks) require a manifest and code. The system supports all three levels without forcing everyone to the most complex level. - -5. **Fail-safe degradation**: Missing plugins do not crash the daemon. Failed hooks return errors but do not block the pipeline (unless they explicitly return `continue: false`). Unreadable memory files silently degrade to no-memory. The core is designed to keep running even when extensions fail. - -### Recommendations for AGH's Extension System - -1. **Define the extension contract in Go interfaces, not in plugin APIs**: AGH's extensions should implement Go interfaces (`ToolDriver`, `HookExecutor`, `MemoryRecaller`, `PermissionClassifier`). The daemon loads extensions that fulfill these interfaces. This is Go-native and avoids the complexity of a plugin framework. - -2. **Support declarative extensions via TOML/YAML/Markdown**: Not every extension needs compiled code. Skills (Markdown), permission rules (TOML config), hook commands (shell commands in TOML), and agent definitions (Markdown frontmatter) should all work without compilation. - -3. **Use the notifier pattern for the hook bus**: AGH's existing notifier pattern is the right foundation. Extend it with typed lifecycle events and the structured output protocol so hooks can participate in decisions, not just observe them. - -4. **Make MCP a first-class citizen**: Since AGH speaks ACP, it should also speak MCP for tool access. MCP tools should be indistinguishable from built-in tools in the tool registry, permission system, and hook pipeline. This is what makes the tool ecosystem open-ended. - -5. **Ship a minimal set of bundled tools and let everything else be extensions**: The core should ship with: file read, file write, file edit, shell execution, glob, grep, and MCP bridge. Everything else -- web fetch, web search, notebook editing, remote triggers -- should be extensions that demonstrate the tool interface. - -6. **Invest in the permission system early**: Claude Code's permission model is its most mature subsystem and arguably its most important. AGH should build the permission cascade and the hook bus before building advanced features, because every advanced feature depends on them. diff --git a/.compozy/tasks/extensability/analysis/analysis_goclaw.md b/.compozy/tasks/extensability/analysis/analysis_goclaw.md deleted file mode 100644 index 6b67f1d82..000000000 --- a/.compozy/tasks/extensability/analysis/analysis_goclaw.md +++ /dev/null @@ -1,343 +0,0 @@ -# GoClaw Analysis for AGH Extensibility - -## Overview - -GoClaw is a multi-tenant AI agent gateway written in Go 1.26 that routes end-user messages through a think-act-observe agent loop, executes tools against pluggable LLM providers, and streams responses back into multiple messaging channels (Telegram, Feishu, Zalo, Discord, WhatsApp). It is structured as a single control-plane binary with a six-layer stack: Client, Gateway, Agent Execution, Provider Bridge, Storage, and Shared Infrastructure. - -**Key differentiators from AGH:** -- GoClaw is a multi-tenant SaaS gateway; AGH is a single-user local daemon -- GoClaw owns the LLM provider bridge (direct API calls); AGH spawns ACP-compatible agents as subprocesses -- GoClaw uses PostgreSQL + pgvector; AGH uses SQLite -- GoClaw has a central event bus (`bus.MessageBus`); AGH uses a typed Notifier pattern with direct function calls -- GoClaw runs its own agent loop in-process; AGH delegates execution to external agent processes (Claude Code, Codex, etc.) - -**What makes GoClaw especially relevant:** Both are Go single-binary systems. GoClaw has solved many extensibility problems (dynamic tools, channel adapters, hook systems, MCP bridging, skills discovery) that AGH will need as it grows through Phase 2 (Memory/Skills/State) and Phase 3 (Agent network protocol). - ---- - -## Key Features Analysis - -| Feature | GoClaw Implementation | Classification for AGH | Rationale | -|---|---|---|---| -| **Think-Act-Observe Loop** | `internal/agent/loop.go` -- in-process LLM call + tool execution cycle with parallel tool dispatch, iteration limits, budget guards | **N/A (different model)** | AGH delegates execution to external agents via ACP/JSON-RPC over stdio. AGH does not own the agent loop -- the spawned agent (Claude Code, etc.) does. However, the iteration/budget guard patterns are worth borrowing for session-level cost control. | -| **Tool Registry** | `tools.Registry` -- unified `Tool` interface (`Name/Schema/Invoke`) for built-in, dynamic, and MCP-sourced tools | **CORE** | AGH already has a tools concept through ACP. A unified tool registry interface that normalizes tools regardless of source (built-in, MCP, dynamic) should be core infrastructure. | -| **MessageBus (Event Bus)** | `internal/bus/bus.go` -- buffered channels (1000-slot), inbound/outbound message routing, event broadcasting with per-subscriber filtering | **CORE (limited)** | AGH explicitly rejects a generic event bus in its architecture principles ("no event bus, no NATS"). AGH uses a typed Notifier pattern instead. However, the *specific patterns* from GoClaw's bus -- deduplication helpers, debounce helpers, non-blocking publish with drop-on-full -- are worth adopting as utilities within AGH's existing Notifier. | -| **Channel Adapter System** | `Channel` interface (`Start/Stop/Send/Health`) with 5 implementations (Telegram, Discord, etc.), `ChannelManager` orchestrator, `RunContext` for streaming state | **EXTENSION** | AGH's primary interfaces are HTTP/SSE (web UI) and UDS (CLI). Messaging platform adapters are clearly extension territory -- they add reach without changing core behavior. The `Channel` interface pattern is excellent for plugin design. | -| **Hook System (Loop-Level)** | Typed function-pointer fields on `Loop` struct (`EnsureUserFilesFunc`, `SeedUserFilesFunc`, `ContextFileLoaderFunc`, `BootstrapCleanupFunc`) with nil-check invocation | **CORE** | AGH should adopt this pattern for session lifecycle hooks. Function-pointer fields are compile-time safe, zero-reflection, and fit AGH's "direct function calls through interfaces" principle. Perfect for hooks like `OnSessionStart`, `OnSessionEnd`, `OnEventRecorded`, `OnMemoryConsolidation`. | -| **Hook System (Handler-Level)** | Pre/post hooks on RPC method handlers (`preValidate`, `postTurn`) | **CORE** | Handler-level hooks for the API layer (HTTP/UDS) are core infrastructure. AGH's `api/httpapi` and `api/udsapi` should support pre/post hooks for audit, analytics, and custom validation. | -| **Dynamic/Custom Tools** | `DynamicTool` wrapping `CustomToolDef` -- shell command templates with `{{.key}}` substitution, per-tool timeouts, encrypted env vars | **EXTENSION** | Shell-command-based tools are an extension mechanism, not core. AGH should provide a `DynamicTool` plugin point but not bake shell execution into the core. The template rendering and shell escaping patterns are reusable. | -| **MCP Bridge** | `mcp.Manager` with connection pooling, three transports (stdio/SSE/streamable-HTTP), tool namespacing (`mcp__{server}__{tool}`), hybrid search mode (40-tool threshold + BM25 lazy loading), per-agent/user access grants | **CORE** | AGH already spawns ACP agents via stdio -- MCP bridge is a natural extension of the same pattern. Tool namespacing, connection management, health monitoring, and hybrid search mode should be core infrastructure since MCP is becoming the standard interop protocol. | -| **Hybrid Tool Search** | BM25 search over deferred tools when tool count > 40, with `mcp_tool_search` and `mcp_tool_activate` meta-tools | **CORE** | Critical for scaling. As AGH accumulates tools from multiple MCP servers, the context budget pressure becomes real. The search-then-activate pattern should be core. | -| **Memory (Vector Embeddings)** | pgvector-backed semantic search, configurable chunking with overlap, cosine similarity retrieval, top-K injection into system prompt | **CORE** | AGH already has `internal/memory` with dual-scope persistent memory. GoClaw's chunking strategy (configurable chunk size + overlap), dedup-by-hash, and the integration pattern (search at run-start, inject into context) validate AGH's approach. | -| **Knowledge Graph** | LLM-based entity/relation extraction, PostgreSQL storage, BFS path finding, fuzzy entity dedup | **EXTENSION** | Knowledge graphs are expensive to build and maintain (require LLM calls for extraction). This is a Phase 2+ extension that sits on top of the memory system. AGH should provide the interface but not bundle the implementation. | -| **Skills System** | Document-based skills (`SKILL.md` with YAML frontmatter), five-tier loader hierarchy, BM25 + pgvector hybrid discovery, hot-reload, agent self-evolution | **CORE (loader + discovery) / EXTENSION (self-evolution)** | AGH already has `internal/skills` with catalog and loader. GoClaw validates the search-then-load pattern and the separation of skills (procedural knowledge) from tools (executable capabilities). The loader hierarchy and discovery engine are core. Self-evolution (nudges at 70%/90% budget) is an extension. | -| **LLM Provider Bridge** | `Provider` interface (`Chat/ChatStream/DefaultModel/Name`), 5 implementations, `providers.Registry`, provider-specific workarounds (thinking passback, token clamping, synthetic streaming) | **N/A (different model)** | AGH does not own the LLM call -- it delegates to ACP agents. However, the `Registry` pattern (lazy map + RWMutex, O(1) lookup) and the encrypted credential storage pattern are directly applicable to AGH's agent/driver management. | -| **Agent Teams and Delegation** | Subagents (self-cloned goroutines), delegation (permission-gated inter-agent handoffs via AgentLinks), team coordination (Kanban task boards with Lead/Member roles) | **EXTENSION** | Multi-agent coordination is Phase 3 territory. AGH should define the interfaces (delegate, handoff, team) but implement them as extensions. The subagent pattern (spawn a background goroutine running the same loop) maps to AGH spawning additional ACP sessions. | -| **Cron and Scheduling** | In-process scheduler with three modes (`cron`/`at`/`every`), JSON file persistence, exponential backoff retry, 200-entry ring buffer log | **EXTENSION** | Scheduling is an extension. AGH could expose a `Scheduler` interface in core but the cron implementation should be a plugin. GoClaw's pattern of dispatching scheduled jobs through the same agent loop (as synthetic `RunRequest`) is elegant and worth copying. | -| **Heartbeat System** | Per-agent periodic self-check with `HEARTBEAT.md` checklist, `HEARTBEAT_OK` suppression, stagger offset, active-hours window | **EXTENSION** | Specialized scheduling for agent self-monitoring. Extension built on top of the scheduler interface. The `HEARTBEAT_OK` suppression pattern is clever for silent monitoring. | -| **Context Files and Agent Identity** | `SOUL.md`, `IDENTITY.md`, `USER.md`, `BOOTSTRAP.md` -- virtual filesystem interceptor routes agent file reads/writes to DB, per-user vs shared scoping | **EXTENSION** | AGH already has workspace management. The context file interception pattern (virtual FS layer that redirects specific filenames to a different backend) is interesting but heavy. AGH's simpler approach of injecting context through the ACP protocol is more appropriate for its architecture. | -| **Shell Execution Security** | Four-gate pipeline: deny patterns, credentialed binary detection, approval flow, sandbox routing. Output scrubbing with `ScrubCredentials`. Docker sandbox with `--read-only --cap-drop ALL --network none`. | **EXTENSION** | AGH delegates execution to external agents, so shell security is the agent's responsibility. However, if AGH adds dynamic tool execution, the deny-pattern and credential-scrubbing patterns should be borrowed. | -| **Text-to-Speech** | `tts.Manager` with 4 provider backends (OpenAI, ElevenLabs, Edge, MiniMax), AutoMode triggers, `TtsTool` for agent-initiated synthesis | **EXTENSION** | Clearly an extension. No impact on AGH's core. | -| **RBAC and Security** | 5-layer permission cascade (role hierarchy, API key scopes, global tool policy, per-agent tool policy, owner-only tools), AES-256-GCM encryption at rest, input guard (detection-only) | **CORE (partial)** | AGH needs authentication and authorization for its HTTP/UDS APIs. The role hierarchy pattern, API key hashing (SHA-256), and encrypted credential storage are core. The full 5-layer cascade is overkill for a single-user daemon but the patterns are sound for when AGH supports multiple users. | -| **Audit Logging** | Append-only `audit_logs` table, structured `slog` output, tenant-scoped, queryable via API | **CORE** | AGH already has `internal/observe` for event recording. Audit logging of security-relevant actions (config changes, session management) should be core. | -| **Rate Limiting** | Per-IP/per-token token-bucket rate limiter at the gateway | **EXTENSION** | Single-user daemon does not need rate limiting. Extension for when AGH supports remote access. | -| **Multi-Tenant PostgreSQL** | `context.Context` propagation of `tenant_id`, RLS on all tables, encrypted columns for secrets | **N/A** | AGH is local-first, single-tenant. The context propagation pattern is good Go practice but multi-tenancy is out of scope. | -| **OpenAI-Compatible API** | `POST /v1/chat/completions` drop-in replacement for OpenAI clients | **EXTENSION** | Useful for interoperability but not core to AGH's mission. Could be a thin extension layer over AGH's HTTP API. | -| **WebSocket v3 Protocol** | Frame-based protocol with `RequestFrame`/`ResponseFrame`/`EventFrame`, method router, per-client write channels | **N/A** | AGH uses HTTP/SSE + UDS, not WebSocket RPC. The event frame pattern and per-client write channel pattern are already addressed by AGH's SSE implementation. | -| **Inbound Debounce** | Per-chat-ID debounce timer (500ms) to consolidate rapid user messages | **CORE** | Debouncing is essential for AGH's HTTP/SSE interface. When a user types rapidly in the web UI, debouncing prevents N session runs. Should be a small utility in core. | -| **Message Dedup** | Content-hash dedup with 5-second window to prevent duplicate processing on reconnects | **CORE** | Important for AGH's SSE reconnection scenarios. A small utility. | -| **Connection Health Monitoring** | Per-channel/per-MCP-server health checks with status tracking, reconnection with exponential backoff | **CORE** | AGH spawns subprocesses -- monitoring their health, detecting crashes, and reconnecting is core infrastructure. The health check pattern with `ChannelHealth` struct and the exponential backoff retry are directly applicable. | - ---- - -## Architectural Patterns Worth Adopting - -### 1. Registry Pattern with Lazy Loading and TTL Cache - -GoClaw's `agent.Router` is a lazy-loading cache keyed by agent ID with a 10-minute TTL: - -```go -type Router struct { - agents map[string]*agentEntry - mu sync.RWMutex - resolver ResolverFunc // lazy-create from DB - ttl time.Duration -} -``` - -**Applicability to AGH:** AGH's `session.Manager` could adopt this pattern for agent driver caching. When AGH spawns an ACP agent, the driver instance could be cached and reused across sessions for the same agent type, with TTL-based eviction for config changes. - -**Classification: CORE pattern** -- fits AGH's existing `session/` package. - -### 2. Typed Function-Pointer Hooks (Not Event Bus) - -GoClaw uses function-pointer fields on structs for lifecycle hooks: - -```go -type Loop struct { - ensureUserProfile EnsureUserProfileFunc - seedUserFiles SeedUserFilesFunc - loadContextFiles ContextFileLoaderFunc -} -``` - -Nil-check before invocation makes hooks optional. No reflection, no event bus, compile-time type safety. - -**Applicability to AGH:** This is exactly aligned with AGH's "direct function calls through interfaces" and "no event bus" principles. AGH's `session.Manager`, `observe.Recorder`, and `memory.Manager` should expose typed hook fields for extension points like: - -- `OnSessionCreated func(ctx, session) error` -- `OnEventRecorded func(ctx, event) error` -- `OnConsolidationComplete func(ctx, results) error` - -**Classification: CORE pattern** -- directly implements AGH's architectural principles. - -### 3. Tool Interface Unification - -GoClaw normalizes all tools (built-in, dynamic shell, MCP-sourced) behind a single interface: - -```go -type Tool interface { - Name() string - Schema() json.RawMessage - Invoke(ctx context.Context, args map[string]any) (string, error) -} -``` - -The agent loop does not know where a tool came from. This is achieved through wrapper types like `BridgeTool` for MCP and `DynamicTool` for shell commands. - -**Applicability to AGH:** AGH communicates tools to agents via ACP protocol, but it still needs to manage tool registries for MCP bridging, skill-provided tools, and dynamic tools. A unified `Tool` interface in AGH would normalize these sources before exposing them to ACP agents. - -**Classification: CORE pattern** -- essential for Phase 2 extensibility. - -### 4. Parallel Execution with Deterministic Ordering - -GoClaw dispatches tool calls in parallel but sorts results back to original order: - -```go -for i, tc := range toolCalls { - go func(tc, idx int) { - result := executor.Invoke(ctx, tc) - resultsChan <- indexedResult{idx: idx, result: result} - }(tc, i) -} -sort.Slice(results, func(i, j int) bool { - return results[i].idx < results[j].idx -}) -``` - -**Applicability to AGH:** Useful when AGH needs to execute multiple MCP tool calls or process multiple events concurrently. The `indexedResult` pattern preserves ordering cheaply. - -**Classification: CORE utility** -- small helper in `internal/procutil` or similar. - -### 5. Non-Blocking Publish with Drop-on-Full - -GoClaw's `TryPublishInbound()` is a non-blocking variant that drops messages when the buffer is full rather than blocking producers: - -```go -select { -case bus.inbound <- msg: - return true -default: - slog.Warn("inbound buffer full, message dropped") - return false -} -``` - -**Applicability to AGH:** AGH's Notifier pattern should support this for SSE event delivery. A slow web client should not back-pressure the session execution. AGH's SSE helpers in `api/core` could adopt this. - -**Classification: CORE pattern** -- protects core from slow consumers. - -### 6. Stagger Offset for Periodic Tasks - -GoClaw uses MD5 hash of agent ID to deterministically spread periodic tasks across a time window, preventing thundering herd: - -```go -func StaggerOffset(agentID string) time.Duration { - hash := md5.Sum([]byte(agentID)) - offset := binary.BigEndian.Uint32(hash[:4]) % 30 - return time.Duration(offset) * time.Second -} -``` - -**Applicability to AGH:** Useful for AGH's dream consolidation triggers when multiple workspaces need consolidation around the same time. - -**Classification: CORE utility** -- small helper for scheduling. - -### 7. Context Propagation Over Global State - -GoClaw propagates tenant ID through `context.Context` rather than a global singleton: - -```go -func WithTenantID(ctx context.Context, id uuid.UUID) context.Context { - return context.WithValue(ctx, ctxKeyTenantID, id) -} -``` - -**Applicability to AGH:** AGH already uses `context.Context` as first argument everywhere. This validates the approach. AGH should consider propagating session ID, workspace ID, and request ID through context for observability. - -**Classification: CORE pattern** -- already partially adopted. - ---- - -## Extension System Insights - -### Dynamic Tools: Shell-Command Extension Point - -GoClaw's `DynamicTool` is the most accessible extension mechanism -- operators define tools as shell command templates stored in the database: - -``` -Command: "curl -s {{.url}} | jq '.results[]'" -Parameters: {"url": {"type": "string"}} -TimeoutSeconds: 30 -``` - -**Insight for AGH:** AGH should provide a similar mechanism where users can define tools via TOML config that get exposed to ACP agents through the protocol. The key security patterns to borrow: -- Shell escaping via single-quote wrapping -- Per-tool configurable timeouts with process-group kill -- Encrypted environment variables for credential injection -- Output scrubbing with both static patterns (API key regexes) and dynamic patterns (injected credential values) - -**Recommendation:** Define a `DynamicToolProvider` extension interface in AGH that can be implemented by a shell-command plugin, an HTTP-webhook plugin, or a WASM plugin. - -### Channel Adapters: The Minimal Interface - -GoClaw's `Channel` interface is remarkably small: - -```go -type Channel interface { - Name() string - Start(ctx context.Context) error - Stop(ctx context.Context) error - Send(ctx context.Context, msg OutboundMessage) error - Health() ChannelHealth -} -``` - -**Insight for AGH:** This is the gold standard for a plugin interface -- four methods, clear lifecycle (`Start`/`Stop`), a single operation (`Send`), and a health probe. AGH should define similarly minimal interfaces for its extension points: - -- `AgentDriver` (already exists in `session/` -- `Start/Stop/SendMessage`) -- `ToolProvider` -- `ListTools/InvokeTool/Health` -- `MemoryBackend` -- `Store/Search/Delete/Health` -- `NotificationSink` -- `Send/Health` - -The `Health()` method returning a struct with `Status`, `LastError`, `LastActivity` is a pattern worth standardizing across all AGH extensions. - -### Hook/Event System: Function Pointers > Event Bus - -GoClaw's hook system uses two complementary patterns: - -1. **Loop-level hooks** -- typed function fields on structs, nil-checked before invocation -2. **Bus-level broadcasting** -- buffered channels with subscriber filtering - -**Insight for AGH:** AGH's explicit rejection of event buses is correct for its scope. The function-pointer hook pattern is the right choice. However, AGH should formalize the hook taxonomy: - -| Lifecycle Point | Hook Signature | Where | -|---|---|---| -| Session created | `func(ctx, *Session) error` | `session.Manager` | -| Session ended | `func(ctx, *Session) error` | `session.Manager` | -| Event recorded | `func(ctx, *Event) error` | `observe.Recorder` | -| Memory stored | `func(ctx, *MemoryEntry) error` | `memory.Manager` | -| Dream triggered | `func(ctx, *DreamRequest) error` | `memory/consolidation` | -| Skill loaded | `func(ctx, *Skill) error` | `skills.Catalog` | -| Agent spawned | `func(ctx, *AgentProcess) error` | `acp.Driver` | -| Agent crashed | `func(ctx, *AgentProcess, error) error` | `acp.Driver` | - -Each hook is a `func` field on the owning struct, set via a `With*` functional option at construction time. Nil hooks are no-ops. - -### MCP Bridge: Connection Pooling and Hybrid Search - -GoClaw's MCP bridge solves three problems AGH will face: - -1. **Connection management** -- pooling server connections across sessions, health monitoring with exponential backoff, cleanup on server crash -2. **Tool namespacing** -- `mcp__{server}__{tool}` prevents collisions when multiple MCP servers expose tools with the same name -3. **Context budget management** -- when tool count > 40, switch to hybrid mode where only the top 40 are inline and the rest are searchable via BM25 - -**Insight for AGH:** AGH already spawns ACP agents via stdio -- the same transport used for MCP stdio servers. The `mcp.Manager` pattern (server state with atomic connected flag, reconnection with backoff, health checks every 30s) maps directly to AGH's `acp.Driver` lifecycle. Key recommendations: - -- Adopt `mcp__{server}__{tool}` namespacing for tool deduplication -- Implement the 40-tool hybrid search threshold -- AGH's ACP agents have finite context windows -- Pool MCP server connections across sessions in the `daemon/` composition root -- Use the `BridgeTool` wrapper pattern to present MCP tools through AGH's native tool interface - -### Skills: Search-Then-Load Pattern - -GoClaw's skills are document-based (`SKILL.md`) rather than code-based. Discovery uses BM25 + pgvector hybrid search. Loading injects the skill content into the agent's context window. - -**Insight for AGH:** AGH already has `internal/skills` with a catalog and loader, plus `internal/skills/bundled` for built-in skills. GoClaw validates that skills should be: -- Filesystem-based (markdown with YAML frontmatter) -- Discoverable via search (not eagerly loaded) -- Injected as context (not executed as code) -- Hierarchical (workspace > project > global > bundled) - -The self-evolution mechanism (agent creates new skills from execution history) is fascinating but should be an extension -- it requires monitoring agent execution patterns and triggering skill creation, which is complex orchestration that does not belong in AGH's minimal core. - -### Deduplication and Debounce Helpers - -GoClaw provides two small but critical utilities: - -1. **`DedupeHelper`** -- content-hash dedup with configurable time window (5s default) -2. **`InboundDebounceHelper`** -- per-key debounce timer (500ms default) that consolidates rapid inputs - -**Insight for AGH:** These should be standalone utilities in `internal/` (perhaps `internal/rateutil` or alongside `internal/procutil`). They protect AGH from: -- SSE reconnection storms (dedup) -- Rapid user input in the web UI (debounce) -- Duplicate webhook deliveries from external systems - -Both are small, self-contained, and have zero dependencies -- perfect for AGH's core. - -### Health Monitoring Pattern - -GoClaw standardizes health across all subsystems: - -```go -type ChannelHealth struct { - Status string // "connected" | "connecting" | "disconnected" | "error" - LastError string - LastActivity time.Time - MessageCount int64 -} -``` - -**Insight for AGH:** AGH should define a standard `Health` struct in a shared package and require every subsystem to implement it: -- ACP agent processes: is the process alive, last event time, error count -- MCP server connections: connected/disconnected, last tool call, reconnect attempts -- SQLite databases: writable, size, last vacuum -- Memory system: consolidation status, entry count - -This feeds directly into AGH's `/health` endpoint and the `observe` package. - ---- - -## Summary: What AGH Should Take from GoClaw - -### Adopt as CORE (build into AGH's minimal robust core) - -1. **Typed function-pointer hooks** on `session.Manager`, `observe.Recorder`, `memory.Manager`, `acp.Driver` -2. **Unified Tool interface** for normalizing MCP tools, built-in tools, and dynamic tools -3. **MCP bridge with connection pooling**, health monitoring, namespacing, and hybrid search -4. **Dedup and debounce helpers** as standalone utilities -5. **Standardized Health struct** across all subsystems -6. **Non-blocking publish with drop-on-full** for SSE event delivery -7. **Parallel execution with deterministic ordering** as a utility -8. **Skills search-then-load pattern** (validates AGH's existing approach) - -### Adopt as EXTENSION (plugin/extension system) - -1. **Channel adapters** -- define the `Channel` interface, let extensions implement Telegram/Discord/etc. -2. **Dynamic shell tools** -- define `DynamicToolProvider`, let a shell plugin implement it -3. **Knowledge graph** -- define the interface, let an extension provide LLM-based extraction -4. **Cron/Scheduling** -- define `Scheduler` interface, let an extension implement it -5. **Agent teams/delegation** -- define coordination interfaces for Phase 3 -6. **TTS** -- pure extension, no core impact -7. **Heartbeat system** -- extension on top of scheduler -8. **Skills self-evolution** -- extension on top of skills core -9. **Rate limiting** -- extension for multi-user scenarios -10. **OpenAI-compatible API** -- thin extension layer - -### Key Design Principles Validated by GoClaw - -- **Small interfaces win.** GoClaw's `Channel` (4 methods), `Provider` (4 methods), and `Tool` (3 methods) interfaces are the right granularity. AGH should target 3-5 methods per extension interface. -- **Nil-check hooks beat event buses.** GoClaw's function-pointer hooks are zero-overhead when unused, compile-time safe, and require no subscription management. This aligns perfectly with AGH's "no event bus" principle. -- **Namespace everything.** GoClaw's `mcp__{server}__{tool}` pattern prevents collisions as the tool catalog grows. AGH should adopt this early. -- **Health is not optional.** Every subsystem in GoClaw reports health. AGH should make `Health()` a required method on every extension interface. -- **Search beats eager loading.** GoClaw's BM25 hybrid search for both skills and MCP tools keeps context budgets manageable. AGH should adopt this pattern before the tool/skill catalog grows large. diff --git a/.compozy/tasks/extensability/analysis/analysis_hermes.md b/.compozy/tasks/extensability/analysis/analysis_hermes.md deleted file mode 100644 index a20ae610b..000000000 --- a/.compozy/tasks/extensability/analysis/analysis_hermes.md +++ /dev/null @@ -1,183 +0,0 @@ -# Hermes Agent Analysis for AGH - -## Overview - -Hermes is a Python-based self-improving AI agent by Nous Research, designed as a long-lived background process reachable from any channel (CLI, Telegram, Discord, Slack, WhatsApp, Signal, Email, Matrix, Home Assistant). Its hub-and-spoke architecture centers on a synchronous `AIAgent` core driven by a five-phase `run_conversation()` loop, surrounded by a self-registering tool system, SQLite+FTS5 session store, eight messaging platform adapters, six terminal execution backends, a cron scheduler, a learning loop (persistent memory + skills + session recall), and an ACP adapter for IDE integration. - -Hermes represents a "maximalist kitchen-sink" approach: everything is built in, everything shares the same process, and every interface uses the same registry, session database, memory, and skills directory. This contrasts with AGH's philosophy of a robust minimal core with a highly extensible plugin system. - -### Key Architectural Differences from AGH - -| Dimension | Hermes | AGH | -|-----------|--------|-----| -| Language | Python (synchronous core, async gateway) | Go (single binary) | -| Agent coupling | One hub class (`AIAgent`) that does everything | Separate packages wired via daemon composition root | -| Extension model | Module-level self-registration singletons | Go interfaces + dependency injection | -| Session store | Single SQLite file shared across all interfaces | Per-session event store + global catalog | -| Memory | Flat markdown file (`memory.md`) + optional Honcho | Dual-scope (global + workspace) with dream consolidation | -| Skills | agentskills.io markdown format in `~/.hermes/skills/` | Bundled skill definitions + catalog/loader | -| Communication | Direct subprocess or gateway adapters | ACP over JSON-RPC/stdio | -| Observability | `/insights` command + session cost accounting | Event recording, health metrics, query engine | - -## Key Features Analysis - -| # | Feature | Hermes Implementation | Classification | Rationale | -|---|---------|----------------------|----------------|-----------| -| 1 | **Self-registering tool registry** | Module-level singleton; tools register at import time with name, toolset, schema, handler, `check_fn()`, `requires_env` | **CORE** | AGH already has a tools concept via ACP. A typed Go tool registry with availability gating (`check_fn` equivalent) and toolset composition should be core -- it is the primary extensibility surface. | -| 2 | **Toolset composition** | Named groups (`web`, `research`, `full_stack`) with recursive `includes` for bulk enable/disable | **CORE** | Toolset grouping with enable/disable per-session is load-bearing for agent safety and flexibility. Core registry should support grouping. | -| 3 | **Availability gating via check_fn** | Tools withheld from model when API keys missing or deps unavailable -- model never sees tools it cannot use | **CORE** | Critical reliability property. AGH's tool definitions should include an availability predicate. Prevents hallucinated calls. | -| 4 | **Session store with FTS5** | Single SQLite `state.db` with `sessions`, `messages`, `messages_fts` tables; WAL mode; write retries with jitter | **CORE** | AGH already has SQLite stores. Adding FTS5 for cross-session search is a core capability for any agent OS -- it enables recall without external search infra. The schema pattern (FTS5 virtual table with content-external triggers) is directly portable to Go+SQLite. | -| 5 | **Cross-session recall (session_search)** | FTS5 query -> group by session -> LLM summarization per session group | **EXTENSION** | The two-stage pipeline (FTS5 retrieval + LLM summarization) is an opinionated recall strategy. AGH core should expose the FTS5 search primitive; the LLM summarization layer should be an extension. | -| 6 | **Persistent memory (memory_tool)** | Flat `memory.md` file with categorized facts; injected into system prompt every turn | **CORE** | AGH already has `internal/memory` with dual-scope memory. Hermes validates that simple persistent facts injected into system prompts is table-stakes. Keep in core. | -| 7 | **Memory provider plugins** | `BuiltinMemoryProvider` (markdown) vs `HonchoMemoryProvider` (external API) via `MemoryManager` abstraction | **CORE pattern, EXTENSION providers** | The provider interface pattern belongs in core. Specific providers (Honcho, vector DB, etc.) are extensions. | -| 8 | **Skills pipeline (agentskills.io)** | Markdown files with YAML frontmatter; slash-command activation; injected as user message; auto-proposal from trajectories | **CORE** | AGH already has `internal/skills`. Hermes reinforces that skills should be: (a) markdown-based, (b) frontmatter-indexed, (c) injected as context not system prompt, (d) discoverable via tools. Keep in core. | -| 9 | **Skill auto-proposal** | Agent calls `skill_manage(action="propose")` after complex tasks to create new skills from completed trajectories | **EXTENSION** | Self-improvement is powerful but opinionated. The skill CRUD API should be core; the auto-proposal heuristic ("detect complex task completion and propose a skill") should be an extension/hook. | -| 10 | **Context compression** | 5-step algorithm: prune old tool results, protect head/tail, LLM-summarize middle, rebuild, chain sessions via `parent_session_id` | **CORE** | Context management is fundamental for long-running sessions. AGH should have a compaction interface in core with the default implementation. Session chaining via parent references is a good schema pattern. | -| 11 | **Prompt caching (Anthropic)** | `apply_anthropic_cache_control()` marks system prompt + last 3 messages with `cache_control` breakpoints | **CORE** | Provider-specific optimization, but the concept of marking stable context for caching is universal. AGH's transcript assembly should support cache-hint annotations. | -| 12 | **Gateway / platform adapters** | 8 adapters (Telegram, Discord, Slack, WhatsApp, Signal, Email, Matrix, Home Assistant) via `BaseAdapter` interface | **EXTENSION** | Definitively an extension. The `BaseAdapter` contract (connect, start, send_text, send_message, edit_message, delete_message, on_message, on_command) is a good interface to define in core and let extensions implement. | -| 13 | **Unified command registry** | `COMMAND_REGISTRY` shared across CLI and all gateway platforms; `cli_only` / `gateway_only` flags | **CORE** | A shared command dispatch table is core infrastructure. Commands registered once should be available across all interfaces. | -| 14 | **Cron / scheduled automations** | Natural-language tasks on cron schedule; delivery to any platform; `[SILENT]` marker convention | **EXTENSION** | Scheduling is not part of a minimal agent OS core. It is a compelling extension that uses core primitives (session creation, agent execution, platform delivery). | -| 15 | **Terminal execution backends** | 6 pluggable backends (Local, Docker, SSH, Modal, Daytona, Singularity) via `BaseEnvironment` interface | **CORE interface, EXTENSION backends** | The interface (`execute(command, cwd, timeout) -> {output, returncode}`) belongs in core. Individual backends are extensions. AGH already handles this via ACP subprocess spawning, but a pluggable execution environment concept is valuable. | -| 16 | **Subagent delegation** | `delegate_task` spawns isolated child `AIAgent` in ThreadPoolExecutor; zero-context-cost for parent; blocked tools prevent recursion | **CORE** | Agent composition is fundamental. AGH should support spawning child sessions with isolated contexts, restricted tool access, and independent iteration budgets. The depth limit and blocked-tool pattern are good safety defaults. | -| 17 | **ACP adapter (IDE integration)** | JSON-RPC server exposing `initialize`, `tools/list`, `tools/call`, `completion/complete`, `resources/read`; stateful sessions with code context | **CORE** | AGH already has ACP as its primary agent communication protocol. Hermes validates the pattern: expose the same tool registry and session semantics over JSON-RPC for IDE integration. | -| 18 | **Security: dangerous command detection** | Regex patterns for destructive commands; command normalization (ANSI stripping, null byte removal, NFKC); Tirith binary scanner | **CORE** | Command safety is essential for any agent that executes shell commands. AGH should have a core command-approval interface with default regex patterns. | -| 19 | **Approval state machine** | Three scopes: once / session / permanent; CLI interactive prompts; gateway async approval via chat buttons | **CORE** | The approval interface (check -> prompt -> remember) belongs in core. The persistence scope hierarchy is a good pattern. | -| 20 | **SSRF / URL safety** | Block private IP ranges, cloud metadata endpoints, user-defined blocklist | **CORE** | Network safety for agent web requests is core security. | -| 21 | **Gateway authorization** | Priority chain: platform allow-all -> DM pairing -> platform allowlist -> global allowlist -> global allow-all -> deny | **EXTENSION** | Gateway auth is specific to multi-user messaging scenarios. Extension responsibility. | -| 22 | **DM pairing system** | Cryptographic pairing codes for granting messaging platform access | **EXTENSION** | Platform-specific access control. Extension. | -| 23 | **Process management** | `ProcessRegistry` for background processes; spawn/poll/wait/kill/read_log; PTY support; crash recovery via checkpoint file | **EXTENSION** | Background process tracking is valuable but not core agent OS. The registry pattern and lifecycle management are good extension material. | -| 24 | **Persistent shell state** | `PersistentShellMixin` maintains shell state across tool calls; SSH ControlMaster | **EXTENSION** | Implementation detail of terminal execution. Extension. | -| 25 | **Token accounting & cost estimation** | `CanonicalUsage` tracks input/output/cache/reasoning tokens; per-model pricing; session-level cost rollup | **CORE** | Usage tracking is core observability. AGH's `internal/observe` should track token economics per session. | -| 26 | **Diagnostic tools (doctor/status)** | `hermes doctor` validates config, deps, tools; `hermes status` shows component health; `InsightsEngine` for analytics | **EXTENSION** | Diagnostics are important but not core agent loop. Good extension that uses core health/metrics APIs. | -| 27 | **Batch processing / trajectory generation** | `BatchRunner` with multiprocessing; toolset distribution sampling; JSONL trajectory output; `TrajectoryCompressor` | **EXTENSION** | Training-data generation is Nous-specific. Not core agent OS. | -| 28 | **RL training environments** | Atropos integration, `HermesAgentBaseEnv` | **EXTENSION** | Research-specific. | -| 29 | **Voice / TTS system** | Multi-provider STT (faster-whisper, Groq, OpenAI) + TTS (Edge, ElevenLabs, OpenAI); Discord voice channels | **EXTENSION** | Modality-specific. Extension with provider plugin interface. | -| 30 | **Honcho user modeling** | Dialectic user modeling via external API; semantic search, peer cards, configurable write strategies | **EXTENSION** | External memory provider. Extension. | -| 31 | **Authentication / provider system** | 11 providers; 4 auth types (OAuth device code, OAuth external, API key, external process); credential resolution chain | **CORE interface, EXTENSION providers** | A provider resolution interface (credential lookup chain, model validation) belongs in core. Individual provider implementations are extensions. | -| 32 | **MCP server integration** | External MCP servers discovered at startup; tools namespaced under server name | **CORE** | MCP tool integration is part of the standard agent protocol ecosystem. AGH should support discovering and proxying MCP servers as a core capability. | -| 33 | **User plugins** | `~/.hermes/plugins/` directory; Python modules loaded at startup; register tools via the same registry | **CORE mechanism** | User-authored tool extensions via a plugin directory is a core extensibility mechanism. | -| 34 | **Streaming response delivery** | `StreamingResponse` class: buffer 500 chars or 2s timeout; progressive message editing on platforms that support it | **EXTENSION** | Platform-specific delivery optimization. Extension. | -| 35 | **System prompt builder** | Ordered concatenation of stable sections (identity, platform hints, skills index, memory, context files, guidance blocks) for cache stability | **CORE** | System prompt assembly order matters for caching. AGH should have a structured prompt builder with ordered sections. | - -## Architectural Patterns Worth Adopting - -### 1. Learning Loop (Memory -> Skills -> Session Recall) - -Hermes implements a three-layer learning loop that feeds back into every subsequent session: - -- **Persistent memory**: durable facts saved via tool call, injected into system prompt -- **Skills**: procedural knowledge crystallized from completed tasks, invocable on demand -- **Session recall**: FTS5 search + LLM summarization across all historical sessions - -**AGH relevance**: AGH already has dual-scope memory with dream consolidation and a skills catalog. The key pattern to adopt is the **closed feedback loop**: the agent should be able to save memories, create skills, and search past sessions -- all via tool calls within the same conversation. The dream consolidation AGH already has goes beyond Hermes (which has no automatic consolidation). The FTS5 cross-session search pattern is the missing piece AGH should add to its `internal/store` layer. - -**Recommendation**: Add FTS5 indexing to sessiondb event content. Expose a `session_search` capability as a core tool. Let the LLM summarization of results be an extension point. - -### 2. Cron / Scheduled Automations - -Hermes runs a 60-second tick loop in its gateway process, checking `~/.hermes/cron/jobs.json` for due jobs. Each job carries a natural-language command, a cron-syntax trigger, and a delivery target. - -**AGH relevance**: For an Agent OS, scheduled execution is a strong differentiator. An agent that can autonomously perform tasks on schedule, route output to platforms, and suppress noise with `[SILENT]` markers transforms from a reactive tool to a proactive assistant. - -**Recommendation**: Implement as an **extension** that registers with the daemon. Core should expose: (a) a way to create sessions programmatically (already exists), (b) a delivery/notification interface for routing output, (c) a timer/scheduler hook in the daemon lifecycle. The cron extension then uses these primitives. - -### 3. Gateway / Platform Adapters - -Hermes uses a `BaseAdapter` interface with 8 implementations. All adapters normalize incoming messages to `MessageEvent` and route through the same dispatch pipeline. - -**AGH relevance**: AGH already has HTTP/SSE and UDS interfaces. Adding messaging platform support should follow the adapter pattern. - -**Recommendation**: Define a `PlatformAdapter` interface in core (`internal/api/contract`). Each platform is a separate extension package. The shared command registry pattern (commands registered once, available everywhere) is excellent and should be adopted. - -### 4. Pluggable Execution Environments - -Hermes separates "what command to run" from "where to run it" via `BaseEnvironment`. The terminal tool delegates to the active backend without knowing whether it is local, Docker, SSH, or serverless. - -**AGH relevance**: AGH spawns ACP-compatible agents as subprocesses. The execution environment concept could extend this: agents could run in Docker, on remote machines, or in serverless environments. - -**Recommendation**: Define an `ExecutionEnvironment` interface in core. The current local subprocess spawning becomes the default implementation. Docker, SSH, and serverless backends become extensions. This is lower priority than other patterns but valuable for Phase 3 (agent network protocol). - -### 5. Approval / Security Pipeline - -Hermes implements defense-in-depth: command normalization -> regex detection -> Tirith scanning -> approval callback -> execution backend isolation -> file write safety. The three-scope approval state machine (once/session/permanent) is particularly well-designed. - -**AGH relevance**: AGH will need command safety as it supports more agent types. The layered approach is the right architecture. - -**Recommendation**: Core should define: (a) a `CommandApproval` interface, (b) default regex patterns for dangerous commands, (c) a scope-based approval memory (once/session/permanent). The Tirith scanner and SSRF protection can be extensions or built-in. - -## Extension System Insights - -### Skills Pipeline Design - -Hermes validates several skills design decisions that AGH should consider: - -1. **User-message injection over system-prompt modification**: Skill content injected as a user message preserves prompt caching. System prompt stays stable; only the skill body pays fresh token cost. AGH should adopt this pattern. - -2. **Frontmatter-indexed, body-injected**: Only the skill index (names + descriptions) goes into the system prompt. Full skill content is loaded on demand. This keeps system prompts compact. - -3. **Platform-conditional skills**: Skills declare which platforms and tools they require via `conditions` in frontmatter. Unavailable skills are withheld. AGH should support skill conditions. - -4. **Auto-proposal loop**: After complex tasks, the agent proposes new skills from the trajectory. This should be an opt-in extension, not forced behavior. - -### Tool Registry Design - -Key patterns from Hermes' tool registry for AGH: - -1. **Single-file tool registration**: Each tool is a self-contained file that registers itself. In Go, this maps to an `init()` function or a registry-builder pattern. AGH should make adding a tool a one-package operation. - -2. **check_fn for availability gating**: Tools that cannot run (missing API keys, missing deps) are withheld from the model's tool list. This is the single most important reliability property. AGH must implement this. - -3. **Toolset composition with recursive includes**: Toolsets compose other toolsets. `resolve_toolset("full_stack")` recursively expands to all leaf tools. This is valuable for configuration ergonomics. - -4. **Hidden tools**: Tools that exist in the registry for programmatic use but are not exposed to the model. Useful for internal orchestration tools. - -5. **Tool output conventions**: Consistent `{"success": true, "data": {...}}` / `{"error": "..."}` shape. Models learn to parse and retry. AGH should standardize tool result format. - -6. **MCP + user plugins as registry citizens**: External MCP servers and user plugin directories are discovered at startup and register into the same registry as built-in tools. AGH should treat MCP tools and user-authored tools as first-class registry entries. - -### What AGH Should NOT Copy - -1. **Module-level mutable globals** (`_last_resolved_tool_names`): A fragile pattern that causes bugs with subagents. AGH should thread resolved tool context through function parameters. - -2. **Synchronous core with async bridging hacks** (`_run_async()`): Hermes' sync/async impedance mismatch creates complexity. AGH's Go concurrency model (goroutines + channels) avoids this entirely. - -3. **Single-file session database shared across all interfaces**: AGH's split (global catalog + per-session event store) is architecturally cleaner. It avoids the write contention Hermes must hack around with jitter retries. - -4. **Import-time side effects for registration**: In Go, prefer explicit registration in the composition root (`internal/daemon`) rather than relying on `init()` functions. - -5. **Flat memory.md without consolidation**: AGH already has dream consolidation, which is superior to Hermes' approach of trusting the model to manually manage memory quality. - -6. **Kitchen-sink monolith**: Hermes bundles 8 platform adapters, 6 terminal backends, voice/TTS, RL training, and batch processing into one package. AGH should keep these as extensions. - -## Summary: Core vs Extension Classification - -### Core (what AGH should build into its minimal robust foundation) - -- Tool registry with availability gating, toolset composition, and standardized result format -- FTS5 cross-session search in the session/event store -- Context compression interface with default LLM-summarization implementation -- Prompt caching hints in transcript assembly -- Structured system prompt builder with ordered stable sections -- Command approval interface with scope-based memory (once/session/permanent) -- Provider resolution interface for LLM credentials -- Token accounting and cost estimation in the observe layer -- Subagent delegation with isolated contexts and restricted tools -- MCP tool discovery and proxy -- Unified command dispatch table across interfaces -- Skills: frontmatter-indexed, user-message injected, platform-conditional - -### Extension (what AGH should support via its plugin system) - -- Gateway platform adapters (Telegram, Discord, Slack, etc.) -- Cron/scheduled automations -- Terminal execution backends (Docker, SSH, Modal, etc.) -- Voice/TTS pipeline -- Batch processing and trajectory generation -- RL training environments -- Honcho user modeling -- Diagnostic commands (doctor, status, insights) -- Skill auto-proposal heuristics -- DM pairing and gateway authorization -- Process management (background process registry) -- Persistent shell state -- Streaming response delivery optimization diff --git a/.compozy/tasks/extensability/analysis/analysis_openclaw.md b/.compozy/tasks/extensability/analysis/analysis_openclaw.md deleted file mode 100644 index add2f6eb8..000000000 --- a/.compozy/tasks/extensability/analysis/analysis_openclaw.md +++ /dev/null @@ -1,228 +0,0 @@ -# OpenClaw Analysis for AGH Extensibility - -## Overview - -OpenClaw is a personal AI assistant that runs locally on a user's devices and fans out to 20+ messaging platforms through a single control plane. Its architecture follows a **hub-and-spoke control model**: a long-lived **Gateway** daemon owns every channel connection, session, tool invocation, and device pairing, while a separate **assistant runtime** (the Pi agent) performs inference and tool reasoning over WebSocket RPC. - -The project is implemented in TypeScript/Node.js with 70+ bundled extensions, 20+ channel adapters, native apps for macOS/iOS/Android, and a Plugin SDK that isolates extensions from core through a narrow typed boundary. OpenClaw targets a single trusted operator who wants one assistant reachable from any device and any chat platform. - -### Key Architectural Differences from AGH - -| Aspect | OpenClaw | AGH | -|---|---|---| -| Language | TypeScript/Node.js | Go single-binary | -| Communication | WebSocket RPC between Gateway and Assistant | JSON-RPC over stdio between daemon and agents | -| Session persistence | JSONL files on disk | SQLite (globaldb + per-session eventdb) | -| Extension model | npm-based Plugin SDK with manifest discovery | Go interfaces with dependency injection | -| Channel surface | 20+ messaging platform adapters | HTTP/SSE (web UI) + UDS (CLI) | -| Scope | Personal assistant across many platforms | Agent session management via ACP | -| Assistant runtime | Stateless Pi agent over WS RPC | ACP-compatible agents (Claude Code, Codex, etc.) as subprocesses | - ---- - -## Key Features Analysis - -### Feature Classification Table - -| Feature | OpenClaw Implementation | Classification for AGH | Rationale | -|---|---|---|---| -| **Gateway/Assistant Split** | Stateful Gateway control plane + stateless inference runtime communicating over WS RPC | **CORE** | AGH already has this via daemon + ACP subprocess model. The pattern of keeping all state in the daemon and treating agents as stateless between turns is foundational. | -| **Plugin SDK with Manifest-First Discovery** | Typed boundary (`plugin-sdk/`), `openclaw.plugin.json` manifests, discovery before code execution, four capability types (channel, provider, tool, skill) | **CORE** | AGH needs a plugin registration contract. Manifest-first discovery (inspect metadata without executing code) is a critical safety and performance pattern for any extensible system. | -| **Channel Adapter Matrix** | 20+ messaging adapters (WhatsApp, Telegram, Slack, Discord, etc.) behind a uniform `ChannelPlugin` interface with normalized `InboundMessage` shape | **EXTENSION** | Individual channel adapters are clearly extensions. But the normalized message contract and channel health monitoring model should inform AGH's API layer design. | -| **Skills System (AgentSkills format)** | YAML frontmatter + Markdown files, five-tier precedence (workspace > project > personal > managed > bundled), ClawHub registry, slash commands | **CORE (format) / EXTENSION (individual skills)** | AGH already has a skills package. The five-tier precedence model and the AgentSkills standard format are worth adopting as core. Individual skills and the registry (ClawHub equivalent) are extensions. | -| **Tool System with Catalog** | Self-describing JSON Schema tools, `tools.catalog` discovery, tool profiles (coding/research/creative/dangerous/none), allow/deny/alsoAllow composition | **CORE** | Tool catalog with self-describing schemas, profile-based defaults, and allow/deny composition rules should be core. The pattern of tools advertising their own contracts is essential for any agent-facing tool system. | -| **Approval Flow for High-Risk Operations** | Per-invocation approval with UUID tracking, broadcast to all operators, timeout + deny/approve, iOS push delivery | **CORE** | AGH must have an approval mechanism for dangerous tool invocations. The state machine (request > broadcast > wait > approve/deny/timeout) is simple and effective. | -| **DM Scope Policies** | Four policies (main, per-peer, per-channel-peer, per-account-channel-peer) preventing cross-user context leakage | **EXTENSION** | AGH is currently single-user/single-session focused. DM scope becomes relevant only if AGH exposes channel adapters or multi-user surfaces. The pattern is worth noting for future extension. | -| **Context Compaction** | LLM-powered summarization with proactive and reactive triggers, token estimation, tool-result stripping, identifier preservation, write-lock safety | **CORE** | AGH already has consolidation in its memory package. OpenClaw's approach (reactive on overflow + proactive guard, configurable compaction model, summarization that preserves identifiers) provides useful refinements. | -| **Device Pairing and Node Capabilities** | Fingerprint-pinned tokens, capability advertisement (`camera`, `canvas`, `screen`, `location`, `voice`), capability-based routing | **EXTENSION** | Device pairing and node capabilities are entirely about multi-device reach. Not relevant for AGH's current scope but a clean extension point if AGH adds device/node support. | -| **ACP Bridge for IDEs** | stdio-to-WS translator process (`openclaw acp`), session mapping, prompt/cancel/listSessions translation | **CORE** | AGH already has ACP as its primary protocol. OpenClaw's bridge pattern validates AGH's approach. The session-mapping strategy (per-client default, explicit override) is a good pattern. | -| **Canvas UI Rendering (A2UI)** | Agent-controlled HTML/CSS/JS workspace + structured A2UI v0.8 protocol, per-session file storage, deep-link scheme back to agent loop | **EXTENSION** | A rich visual surface is a powerful capability but not essential for AGH's core. Should be an extension that any agent can use if available. | -| **Voice and Speech Stack** | Wake-word detection, Talk Mode, STT/TTS provider matrix with fallback chains, global wake-word sync across devices | **EXTENSION** | Voice is a premium feature that adds complexity. Should be a cleanly separated extension with provider interfaces. | -| **Browser Automation** | Multi-profile CDP control, SSRF protection, node-host proxying, Chrome Extension Relay for user sessions, accessibility tree snapshots | **EXTENSION** | Browser automation is a powerful tool but clearly an extension. The SSRF protection pattern and profile-based isolation are worth noting. | -| **Model Provider System** | Auth profiles, auto-discovery (Ollama, Bedrock, Vertex), OAuth token management, auth profile rotation with cooldown, `models.json` pipeline, three-level parameter merge | **CORE (provider interface) / EXTENSION (individual providers)** | AGH needs a provider abstraction. The auth profile rotation with cooldown (don't thrash a rate-limited key) and the three-level parameter merge (global > model-specific > agent-specific) are patterns worth adopting in core. | -| **Cron/Webhooks (Proactive Agent)** | Cron scheduler for periodic agent jobs, webhook endpoints for HTTP-triggered runs | **EXTENSION** | Proactive agent triggers (scheduled jobs, external webhooks) are extensions on top of the core session model. Good extension candidates. | -| **Sandboxing (Docker)** | Three specialized images (generic, browser, common base), per-invocation container spawn, resource limits, network policy, nested sandboxing | **EXTENSION** | Docker-based sandboxing is an isolation strategy. AGH should define a sandboxing interface in core but let the Docker implementation be an extension. | -| **Security Audit System** | `openclaw security audit` CLI command, automated checks for filesystem permissions, gateway config, sandbox config, channel policies, skill code safety, tool policy | **CORE** | A security audit surface that validates configuration against best practices should be part of AGH's core. The pattern of automated security assessment at CLI time is valuable. | -| **Onboard Wizard** | Interactive six-step setup (model/auth, workspace, gateway, channels, daemon, health), non-interactive mode for CI, idempotent reconfiguration | **CORE** | AGH needs a clean first-boot experience. The pattern of wizard-writes-config (not hidden state) and idempotent `configure --section` reconfiguration is good. | -| **Session Middleware/Hooks** | `before_compaction`, `after_compaction`, `session.load`, `context.assemble` hooks with exec:// handlers | **CORE** | Lifecycle hooks at well-defined points (session load, context assembly, pre/post-compaction) enable extensions without core changes. AGH should define these hook points. | -| **Idempotency Keys** | Per-request UUID for side-effecting methods, retry-safe collapse of duplicate messages | **CORE** | Essential for any system where messages can be delivered at-least-once. AGH should adopt idempotency keys for state-mutating operations. | -| **Event Fan-Out / Broadcast** | Every agent event broadcasts to all authorized subscribers, enabling multi-client observation | **CORE** | AGH already has this via SSE. The pattern of every connected client seeing the same event stream is fundamental for observability. | -| **Sub-Agent System** | `sessions_spawn` tool, sub-agent registry with lifecycle tracking, thread-bound sessions, announcement/delivery pipeline with exactly-once semantics | **EXTENSION** | Multi-agent orchestration is an advanced feature. The `sessions_spawn` unified entry point and the sub-agent registry pattern are worth studying for AGH's future phases. | -| **Deployment Topologies** | Six deployment modes (local, Tailscale Serve, SSH tunnel, Tailscale Funnel, Docker, Fly.io) with explicit migration paths | **EXTENSION** | Each deployment topology beyond local is an extension concern. But the health endpoint contract (`/healthz`, `/readyz`) should be core. | - ---- - -## Architectural Patterns Worth Adopting - -### 1. Manifest-First Plugin Discovery - -OpenClaw's strongest extensibility pattern is the split between **manifest discovery** (read metadata, no code execution) and **code loading** (dynamic import after validation). This means: - -- `openclaw plugins status` can list all plugins without executing any plugin code -- Requirements can be checked, missing dependencies flagged, before any risk -- Disabled plugins are never loaded - -**AGH recommendation**: Define a plugin manifest format (TOML or JSON) that AGH reads at daemon startup before loading any plugin Go code. This enables `agh plugins list` without importing plugin packages. - -### 2. Four-Capability Plugin Model - -OpenClaw defines exactly four plugin capabilities: **channels**, **providers**, **tools**, **skills**. Every extension implements one or more of these through typed contracts. This keeps the plugin surface finite and comprehensible. - -**AGH recommendation**: Define AGH's plugin capabilities explicitly. Candidates: -- **AgentDriver** (already exists as an interface in `session/`) -- **Tool** (agent-callable capabilities with JSON Schema) -- **Skill** (YAML+Markdown instruction files) -- **Observer** (event consumers for observability/integrations) - -### 3. Tool Profiles with Allow/Deny Composition - -OpenClaw's tool profile system (`coding`, `research`, `creative`, `dangerous`, `none`) provides sensible defaults. The composition rule (`deny` always wins, `alsoAllow` adds, `allow` replaces) is simple and predictable. - -**AGH recommendation**: Adopt this for AGH's tool configuration. It avoids the complexity of inheritance trees while giving users enough control. The rule "deny always wins" is the right safety default. - -### 4. Skill Precedence Tiers - -Five tiers (workspace > project-agents > personal-agents > managed > bundled) with higher tiers winning. This lets users override bundled behavior without forking. - -**AGH recommendation**: AGH already has bundled skills. Adding workspace-level and personal-level tiers would let users customize without modifying the binary. The precedence model is simple: scan each tier, build a name-to-definition map, higher tiers overwrite. - -### 5. Lifecycle Hooks at Defined Points - -OpenClaw exposes hooks at `before_prompt_build`, `before_compaction`, `after_compaction`, `session.load`, and `context.assemble`. These are not a generic event bus -- they are specific, named lifecycle points where extensions can inject behavior. - -**AGH recommendation**: Define AGH's lifecycle hook points explicitly in the daemon package. Candidates: -- `session.create` / `session.resume` -- `context.assemble` (before building the prompt for the agent) -- `event.record` (after an event is persisted) -- `agent.start` / `agent.done` -- `consolidation.before` / `consolidation.after` - -### 6. Health Endpoints as Core Contract - -Every OpenClaw deployment mode (local, Docker, Fly.io, Tailscale) uses the same `/healthz` and `/readyz` HTTP endpoints. Supervisors, load balancers, and health monitors all converge on these two URLs. - -**AGH recommendation**: AGH should expose `/healthz` (liveness) and `/readyz` (readiness) on the HTTP API as a core contract. These are cheap to implement and universally useful. - -### 7. Idempotency Keys for Side-Effecting Operations - -OpenClaw requires idempotency keys on every state-mutating WS RPC method. This collapses duplicates from at-least-once delivery and makes retries safe. - -**AGH recommendation**: Adopt idempotency keys for AGH's HTTP API endpoints that mutate state (session creation, event submission, config changes). Store recent keys in an LRU cache with TTL. - -### 8. Normalized Message Shape - -OpenClaw compresses 20+ platform-specific message formats into one `InboundMessage` type: `{senderId, channelId, accountId, threadId, groupId, text, timestamp, attachments}`. Every downstream consumer works with this single shape. - -**AGH recommendation**: If AGH adds input channels beyond HTTP/UDS, define a canonical internal message type early. Even for HTTP/UDS, a normalized request shape simplifies the pipeline. - ---- - -## Extension System Insights - -### ClawHub (Skills Registry) - -OpenClaw's ClawHub (clawhub.ai) is a centralized public registry for skills, modeled after npm: - -- **Install**: `openclaw skills install github` -- **Update**: `openclaw skills update --all` -- **Search**: `openclaw skills search weather` -- **Version pinning**: `openclaw skills install github@1.2.3` - -Skills are distributed as git repos or npm packages with semver tags. The registry is **optional** -- users can point `skills.load.extraDirs` at any local directory and skip ClawHub entirely. - -**Insight for AGH**: A skill registry is a Phase 2-3 concern. For now, AGH should ensure its skills format is portable (the AgentSkills standard is shared across multiple agent frameworks). When a registry is needed, the npm-like CLI UX (`agh skills install/update/search`) is the right model. The critical design decision is making the registry optional -- air-gapped and enterprise deployments must work without it. - -### Native Apps (Node Mode) - -OpenClaw's native apps (macOS, iOS, Android) connect as **node-role WebSocket clients** that expose device capabilities back to the Gateway. They are NOT plugins (no in-process registration). Instead: - -1. Connect to Gateway WS with `role: "node"` -2. Advertise capabilities: `["camera", "canvas", "screen", "location", "voice"]` -3. Gateway indexes capabilities by device -4. Agent calls `nodes.invoke({command: "camera.snap"})` and Gateway routes to the right device - -The distinction between **plugins** (in-process, Plugin SDK boundary) and **nodes** (external process, WS protocol boundary) is clean and important. - -**Insight for AGH**: If AGH adds device/node support, adopt this two-tier model: -- **Extensions/plugins**: Go interfaces, in-process, compiled into the binary or loaded at startup -- **Nodes/clients**: External processes connecting via HTTP/WS/UDS, advertising capabilities, receiving routed commands - -### Channel Adapters - -Each channel adapter implements five concerns: -1. **Transport** -- how it connects -2. **Normalization** -- platform-native to internal message type -3. **Send/receive** -- round-trip delivery -4. **Auth/accounts** -- credential management -5. **Health monitoring** -- state machine with reconnect backoff - -The key insight is that every adapter follows the same interface, and core never special-cases bundled vs. third-party adapters. - -**Insight for AGH**: AGH's HTTP/SSE and UDS "channels" already follow this pattern implicitly. If AGH adds more input surfaces (CLI stdio, WebSocket, platform-specific adapters), formalizing the adapter interface would be valuable. The five-concern decomposition is a good checklist. - -### Plugin Configuration Pattern - -OpenClaw separates plugin config into three layers: -- **`config`**: passed to plugin setup code, referenced in prompts (no raw secrets) -- **`env`**: injected as process environment at tool invocation time (for secrets) -- **`enabled`**: toggle without removing config - -The split between `config` (agent-visible) and `env` (execution-only, never in prompt) prevents secret leakage through the LLM. - -**Insight for AGH**: When AGH's skill/tool extensions need configuration, adopt this config/env split. Never let extension secrets appear in the context sent to the LLM. - -### Extension Loading Order - -OpenClaw enforces a deterministic loading order: providers before channels before skills. This prevents a channel from registering before its required provider is loaded. - -**Insight for AGH**: AGH's daemon package (the composition root) should document and enforce an explicit initialization order for extensions. Go's `init()` functions are not sufficient -- explicit ordering through the daemon's boot sequence is needed. - ---- - -## Patterns to Explicitly Avoid - -### 1. WebSocket RPC Between Gateway and Assistant - -OpenClaw uses WS RPC because the Gateway and assistant can be on different hosts. AGH uses stdio JSON-RPC because agents are subprocesses. AGH's approach is simpler and more appropriate for its single-binary model. Do not adopt OpenClaw's WS split. - -### 2. In-Process Channel Adapters (at scale) - -OpenClaw runs 20+ channel adapters inside the Gateway process. This is fine for Node.js's event-loop model but would be problematic in Go if each adapter needed goroutines with complex lifecycle management. If AGH adds channel adapters, consider subprocess isolation rather than in-process loading. - -### 3. JSONL Session Persistence - -OpenClaw uses append-only JSONL files for session transcripts. AGH already uses SQLite, which is strictly better for structured queries, concurrent access, and crash recovery. Do not regress to JSONL. - -### 4. 70+ Bundled Extensions - -OpenClaw ships 70+ extensions in its binary. AGH's philosophy is "robust minimal core" -- keep the binary lean, let extensions be separately compiled or loaded. Do not bundle everything. - ---- - -## Summary of Recommendations - -### Must-Adopt (Core) - -1. **Manifest-first plugin discovery** -- read metadata before executing code -2. **Typed plugin capability model** -- enumerate the finite set of extension types -3. **Tool profiles with allow/deny composition** -- sensible defaults, predictable overrides -4. **Lifecycle hooks at named points** -- not a generic bus, but specific extension points -5. **Approval flow for dangerous operations** -- per-invocation, with timeout -6. **Health endpoints** -- `/healthz` and `/readyz` as core contract -7. **Idempotency keys** -- for all state-mutating API operations -8. **Security audit CLI** -- automated configuration validation - -### Should-Adopt (Near-term Extension Design) - -1. **Five-tier skill precedence** -- workspace > project > personal > managed > bundled -2. **Config/env split for extension secrets** -- never leak secrets into LLM context -3. **Deterministic extension loading order** -- enforce in daemon boot sequence -4. **Normalized internal message type** -- prepare for multiple input surfaces - -### Worth-Studying (Future Phases) - -1. **ClawHub-style registry** -- when AGH has enough extensions to warrant discovery -2. **Node capability advertisement** -- when AGH supports multi-device -3. **Sub-agent orchestration** -- `sessions_spawn` pattern for Phase 3 agent networks -4. **A2UI-style structured surfaces** -- if AGH adds visual output beyond web UI -5. **Channel adapter matrix** -- if AGH moves beyond HTTP/SSE + UDS diff --git a/.compozy/tasks/extensability/analysis/analysis_openfang.md b/.compozy/tasks/extensability/analysis/analysis_openfang.md deleted file mode 100644 index 4c22796d9..000000000 --- a/.compozy/tasks/extensability/analysis/analysis_openfang.md +++ /dev/null @@ -1,285 +0,0 @@ -# OpenFang Analysis for AGH Extensibility - -## Overview - -OpenFang is a Rust-based Agent Operating System comprising 14 crates (~137K LoC) that runs as a persistent daemon managing AI agent sessions. It shares AGH's fundamental design philosophy -- single binary, SQLite persistence, daemon model, local-first -- but takes a maximalist approach: 53 builtin tools, 40 channel adapters, 7 bundled Hands, 60+ skills, 130+ model catalog entries, and 25 MCP templates all compiled into one ~32 MB binary. - -Where AGH follows "robust minimal core + extensible plugins", OpenFang follows "bundle everything into the binary". This is the central tension in the analysis: OpenFang validates many features AGH should eventually support, but its monolithic compilation strategy is the opposite of AGH's extension-first philosophy. The features are proven; the packaging strategy should be inverted. - -### Key Similarities to AGH -- Single-binary daemon with SQLite (WAL mode, `synchronous = FULL`) -- Kernel-as-composition-root pattern (OpenFang's `OpenFangKernel` ~ AGH's `daemon/`) -- Agent lifecycle state machine with session persistence -- TOML configuration with env var interpolation -- JSON-RPC over stdio for agent communication (MCP/ACP) -- Strict dependency direction enforced by module boundaries -- CLI that doubles as HTTP client when daemon is running - -### Key Differences from AGH -- Rust vs Go (crate boundaries vs package boundaries) -- Everything compiled in vs extension-first architecture -- 140+ HTTP endpoints vs AGH's focused API surface -- Custom OFP wire protocol vs AGH's ACP-based approach -- In-process LLM drivers vs subprocess-spawned ACP agents -- Built-in web dashboard (Alpine.js) vs AGH's separate React SPA - ---- - -## Key Features Analysis - -### Agent Runtime & Execution - -| Feature | OpenFang Implementation | Classification | Rationale | -|---------|------------------------|----------------|-----------| -| **Agent loop (recv/recall/call/execute)** | `run_agent_loop()` in openfang-runtime, 50-iteration cap, bounded by loop guard | **CORE** | This is the fundamental execution model. AGH already has this via ACP subprocess agents, but the loop-bounding, stop-reason taxonomy, and structured result type are worth hardening. | -| **Loop guard (cycle detection)** | SHA256 fingerprinting of recent tool calls, detects ping-pong patterns, forces conclusion | **CORE** | Critical safety mechanism. Any agent that can loop must have cycle detection. AGH should implement this in the session/observe layer as a cross-cutting concern, not per-agent. | -| **Context budget allocator** | Token allocation across system/tools/history/response regions, 70% compaction, 90% emergency trim | **CORE** | Essential for long-running sessions. AGH's `transcript` package should own this, with configurable thresholds per agent. | -| **Session repair (7-phase validation)** | Validates message continuity, tool call completeness, role alternation, deduplication, timestamps | **CORE** | Critical for crash recovery of long-running agents. AGH's session store should validate on load. This prevents corrupt state from cascading. | -| **Three LLM drivers (Anthropic, Gemini, OpenAI-compat)** | Native HTTP clients with provider-specific adaptation | **EXTENSION** | AGH delegates LLM interaction to ACP agents (Claude Code, Codex, Gemini CLI). AGH should NOT embed LLM drivers -- the ACP model is superior because it delegates provider-specific logic to purpose-built agents. | -| **Provider routing with fallback chain** | ModelRouter with complexity scoring, auth cooldown, fallback traversal | **EXTENSION** | If AGH ever routes between multiple ACP agents based on task complexity, this pattern is useful. But it belongs as an extension, not core -- AGH's philosophy is that the agent handles its own model selection. | -| **Model catalog (130+ models with pricing)** | Static catalog compiled into binary, cost per million tokens | **EXTENSION** | Useful for metering, but should be a loadable resource file, not compiled in. AGH's config system can reference an external catalog. | - -### Scheduling & Automation - -| Feature | OpenFang Implementation | Classification | Rationale | -|---------|------------------------|----------------|-----------| -| **Cron scheduler** | POSIX 5-field cron expressions, `BackgroundExecutor` with per-schedule Tokio tasks | **CORE** | A daemon that runs 24/7 needs scheduled execution. AGH should support cron-triggered sessions in the daemon package. Simple: parse cron, sleep until next fire, dispatch session. | -| **Event-driven triggers** | `TriggerEngine` subscribes to EventBus, matches event kind + regex + fire limits | **CORE** | Reactive execution is the complement to cron. AGH's `observe` package already has event recording; adding pattern-matching trigger dispatch is a natural extension of that. | -| **Fire limits (rate limiting triggers)** | Rolling hourly counter prevents thundering-herd from high-frequency events | **CORE** | Without fire limits, a misconfigured webhook can spawn hundreds of sessions per second. This is a safety mechanism that belongs in core. | -| **Missed fire policy (skip, don't backfill)** | Deliberate: no catch-up on missed cron fires after daemon restart | **CORE** | Good design decision. Backfilling is complex and budget-dangerous. AGH should adopt the same policy. | - -### Workflow Engine - -| Feature | OpenFang Implementation | Classification | Rationale | -|---------|------------------------|----------------|-----------| -| **Multi-step workflow pipelines** | `WorkflowEngine` with sequential, parallel (fan-out), condition, loop, collect modes | **EXTENSION** | Powerful but complex. This should be an extension that composes on top of AGH's session primitives. Core should provide the building blocks (session dispatch, result collection); the workflow engine wires them together. | -| **Variable interpolation between steps** | `{{step_output}}`, `{{global_var}}`, `{{input}}` expansion in step prompts | **EXTENSION** | Implementation detail of the workflow engine extension. | -| **Error handlers per step (retry/skip/abort)** | Exponential backoff retries, skip-and-continue, abort-workflow | **EXTENSION** | Belongs with the workflow engine extension. | -| **Visual workflow builder** | Alpine.js canvas with drag-and-drop step nodes | **EXTENSION** | Frontend concern, definitely an extension. | - -### Hands (Autonomous Agent Packages) - -| Feature | OpenFang Implementation | Classification | Rationale | -|---------|------------------------|----------------|-----------| -| **Hand concept (packaged autonomous agents)** | HAND.toml manifest + SKILL.md + system prompt + cron schedule + dashboard metrics + guardrails | **EXTENSION** | Brilliant packaging concept. AGH should support a similar "agent package" format as an extension -- a directory with manifest, instructions, schedule, and tool allowlist. But it should NOT be compiled into the binary. | -| **Hand lifecycle state machine** | Discovered -> Dormant -> Active -> Running -> Paused -> Completed -> Error | **CORE** | The lifecycle state machine itself is a core pattern that AGH's session manager already partially implements. The state transitions and persistence-across-restart behavior should be part of AGH's session package. | -| **Dependency verification (binary, env var, API key checks)** | `check_requirements()` validates system state before activation | **EXTENSION** | Useful for agent packages but not core. Extensions that need external binaries should declare and check their own deps. | -| **Hand persistence across daemon restarts** | JSON state files at `~/.openfang/hands/.json`, recovered at boot | **CORE** | AGH should persist active session configurations so they survive daemon restarts. This is part of the daemon lifecycle, not an extension. | - -### Memory & Knowledge - -| Feature | OpenFang Implementation | Classification | Rationale | -|---------|------------------------|----------------|-----------| -| **Tri-part memory (session + semantic + KG)** | Structured KV, semantic text search, entity-relation-fact triples, all in SQLite | **CORE (session) + EXTENSION (semantic, KG)** | Session storage is core (AGH has this). Semantic recall and knowledge graph are extensions. AGH already has `memory/` with dual-scope memory and dream consolidation -- this aligns well. The KG is a natural extension of AGH's memory system. | -| **Knowledge graph (entity-relation-fact triples)** | Three SQLite tables, confidence scoring, BFS traversal, per-agent scoping | **EXTENSION** | Structured knowledge is powerful for long-running agents but adds schema complexity. Should be an opt-in extension that agents can activate. | -| **Session compaction (70% threshold)** | LLM-based summarization of old user/assistant pairs, chars/4 heuristic | **CORE** | Long-running AGH sessions need compaction. The threshold-based approach with graceful degradation belongs in `transcript/`. | -| **Memory consolidation on agent clone** | Dedup entities, merge KV stores, merge KG, report conflicts | **EXTENSION** | Useful but not core. Agent cloning is an advanced feature. | - -### Channel Adapters - -| Feature | OpenFang Implementation | Classification | Rationale | -|---------|------------------------|----------------|-----------| -| **Channel adapter trait** | `ChannelAdapter` (inbound stream) + `MessageAdapter` (outbound send) | **EXTENSION** | The trait design is excellent -- clean separation of inbound/outbound with platform-agnostic message envelope. AGH should define a similar interface in its extension system. | -| **40 messaging platform adapters** | Telegram, Discord, Slack, WhatsApp, Signal, Matrix, Email, etc. | **EXTENSION** | Obviously extensions. Each adapter is a plugin that implements the channel interface. | -| **Message routing with 5-level priority** | Bindings -> direct routes -> user defaults -> channel defaults -> system default | **EXTENSION** | Routing logic belongs with the channel system extension, not core. | -| **Per-channel policies** | DM policy, group policy, output format, rate limits, user allow/block lists | **EXTENSION** | Configuration for channel extensions. | -| **Hot-reloadable channel config** | Adapter restart without daemon restart on config change | **CORE** | Hot reload of extension configuration is a core daemon capability. AGH should support this generically for all extensions. | - -### Peer Networking (OFP) - -| Feature | OpenFang Implementation | Classification | Rationale | -|---------|------------------------|----------------|-----------| -| **OFP wire protocol** | Custom TCP binary protocol with length-prefix + JSON, HMAC-SHA256 mutual auth | **EXTENSION** | Agent-to-agent networking is a Phase 3 feature for AGH. When it arrives, it should be an extension, not a custom wire protocol. AGH should prefer standard protocols (HTTP, gRPC, or A2A spec) over inventing a new one. | -| **Peer discovery (gossip)** | PeerDiscovery payload exchange for transitive endpoint discovery | **EXTENSION** | Network topology management is clearly an extension. | -| **Heartbeat and health monitoring** | 30s heartbeat with Healthy/Degraded/Unhealthy/Disconnected classification | **EXTENSION** | Peer health is part of the networking extension. | -| **Inter-agent tools (agent_send, agent_spawn)** | 5 tools for cross-agent and cross-node communication | **EXTENSION** | Agent delegation and orchestration tools are extensions that compose on top of the session system. | -| **Recursion guard (MAX_DEPTH=5)** | `task_local!` depth counter prevents infinite agent delegation chains | **CORE** | If AGH supports agent-to-agent delegation, the recursion guard is a safety mechanism that belongs in core. Unbounded recursion is a cost and stability risk. | - -### Security - -| Feature | OpenFang Implementation | Classification | Rationale | -|---------|------------------------|----------------|-----------| -| **RBAC capability gates** | Per-agent tool allowlist, deny-by-default, child inherits subset of parent | **CORE** | Essential for multi-agent safety. AGH should enforce tool/capability scoping per session. | -| **Approval manager (human-in-the-loop)** | Tool risk levels (Low/Medium/High/Critical), oneshot channels for blocking approval, 60s timeout | **EXTENSION** | Approval gates are important for autonomous agents but should be an extension. Core provides the hook point; the approval logic is pluggable. | -| **Merkle hash-chain audit log** | SHA256 chaining of every significant action, tamper detection, append-only | **EXTENSION** | Powerful for compliance but overkill for most AGH deployments. Should be an optional extension that wraps AGH's observe/event system. | -| **WASM sandbox (fuel + epoch + watchdog)** | Triple-metered Wasmtime sandbox for untrusted skills | **EXTENSION** | AGH doesn't execute untrusted code in-process (ACP agents are subprocesses). If AGH adds a WASM skill runtime, this belongs there. | -| **SSRF protection** | Block private IPs, metadata endpoints, DNS rebinding on web_fetch | **CORE** | If AGH ever exposes web-fetch capabilities, SSRF protection is non-negotiable. But since AGH delegates to ACP agents that have their own sandboxing, this may be the agent's responsibility. | -| **Subprocess sandbox (env_clear + allowlist)** | Clear environment, selective passthrough for child processes | **CORE** | AGH already spawns ACP agents as subprocesses. Environment isolation is a core safety property. | -| **Secret zeroization** | `Zeroizing` wrapper that scrubs memory on drop | **CORE** | All credential handling in AGH should use Go's equivalent pattern (explicit zeroing of byte slices). | -| **Prompt injection scanner** | Scan user messages for instruction overrides, delimiter injection | **EXTENSION** | Defense against prompt injection is valuable but should be a pluggable middleware, not hardcoded. | -| **Taint tracking** | Newtype wrappers that label secret data through the call chain | **EXTENSION** | Sophisticated but heavyweight. Go doesn't have Rust's type-level guarantees for this. Should be an extension if implemented. | - -### Cost & Metering - -| Feature | OpenFang Implementation | Classification | Rationale | -|---------|------------------------|----------------|-----------| -| **Per-agent, per-provider, global cost tracking** | DashMap for per-agent, per-provider, AtomicU64 for global | **CORE** | Cost tracking is essential for any agent system. AGH's `observe` package should track token usage and cost per session. | -| **Budget enforcement (daily + monthly limits)** | Pre-flight check on every LLM call, halt agent or all agents on breach | **CORE** | Cost runaway is the top operational risk for autonomous agents. Budget gates must be in core. | -| **Cost-aware rate limiting** | GCRA token bucket where expensive models draw more tokens | **EXTENSION** | Sophisticated but an optimization. Basic rate limiting is core; cost-weighted rate limiting is an extension. | - -### MCP Integration - -| Feature | OpenFang Implementation | Classification | Rationale | -|---------|------------------------|----------------|-----------| -| **MCP client (tool discovery + dispatch)** | Connect to external MCP servers, discover tools, merge into agent tool catalog | **CORE** | AGH already has ACP for agent communication. MCP tool discovery and dispatch should be a core capability since it's the standard for tool interop. | -| **MCP server mode (expose agents as tools)** | OpenFang agents consumable by external MCP clients | **EXTENSION** | Exposing AGH agents as MCP tools is valuable for interop but not essential for core operation. | -| **25 bundled MCP templates** | Pre-configured MCP server configs for GitHub, Slack, Notion, etc. | **EXTENSION** | Templates are definitionally extensions. AGH should ship none bundled and let users install from a registry. | -| **Tool namespacing (mcp_ prefix)** | Prevent collisions between builtins, skills, and MCP tools | **CORE** | Any system that merges tools from multiple sources needs namespacing. This is a core protocol concern. | - -### Configuration & CLI - -| Feature | OpenFang Implementation | Classification | Rationale | -|---------|------------------------|----------------|-----------| -| **Dual execution mode (daemon + ephemeral)** | Same binary works as daemon or single-shot CLI | **CORE** | AGH should support this. `agh chat "question"` should work without a running daemon by booting an ephemeral session. | -| **Config hot reload** | Whitelist of reloadable fields, POST /api/config/reload | **CORE** | Essential for operational flexibility. AGH's daemon should support reloading config subsections without restart. | -| **Config validation endpoint** | POST /api/config/validate for dry-run validation | **EXTENSION** | Nice-to-have but not essential for core. | -| **Credential vault (AES-256-GCM)** | Encrypted secret storage with Argon2 key derivation, OS keyring integration | **EXTENSION** | AGH should integrate with OS keyrings or external secret managers, but a custom vault implementation is an extension. | -| **OAuth2 PKCE flow** | Built-in OAuth for Google, GitHub, Slack integrations | **EXTENSION** | Authentication flows for third-party services are clearly extensions. | - ---- - -## Architectural Patterns Worth Adopting - -### 1. Kernel-as-Composition-Root with Explicit Subsystem Fields - -OpenFang's `OpenFangKernel` holds ~35 fields representing every subsystem. AGH's `daemon/` package serves the same role. The key insight: make every subsystem visible as a named field, not hidden behind a service locator or DI container. - -**AGH implication**: The daemon struct should explicitly list session manager, store, observer, memory, skills, config, etc. as typed fields. New extensions register through the daemon at boot, not through a generic registry. - -### 2. Strict Dependency Direction (Foundation -> Subsystems -> Kernel -> API -> CLI) - -OpenFang's 14-crate workspace enforces no circular dependencies at compile time. The DAG flows: types (leaf) -> subsystems -> runtime -> kernel -> API -> CLI. - -**AGH implication**: AGH already follows this (`daemon/` imports all; nothing imports `daemon/`). Maintain this rigorously as extensions are added. Extensions should depend on core interfaces, never on the daemon or API packages. - -### 3. EventBus with Typed Events for Cross-Subsystem Reactions - -OpenFang's `EventBus` with correlation IDs connects the metering engine, audit log, trigger engine, and workflow engine without coupling them directly. - -**AGH implication**: AGH's `observe` package records events. Adding a pub-sub dispatch mechanism (typed observer/notifier pattern, not a generic bus) would enable the trigger engine and cost tracking to react to session events without importing each other. - -### 4. KernelHandle Trait for Testability - -OpenFang's `KernelHandle` trait lets the runtime call kernel methods without importing the kernel directly. This enables testing the runtime with a mock kernel. - -**AGH implication**: AGH's `session/` package defines `AgentDriver` (implemented by `acp/`). Extend this pattern: define a `KernelHandle` or `DaemonHandle` interface that the session package and extensions use to call back into the daemon. This breaks the dependency arrow and enables testing. - -### 5. Dual Execution Mode (Daemon + Ephemeral) - -The same binary can boot a full daemon or run a single-shot operation. This is critical for scripting, testing, and CLI ergonomics. - -**AGH implication**: AGH should support `agh chat "question"` without a running daemon. The daemon package should expose an ephemeral boot path that initializes just enough state for one session. - -### 6. Agent Package Format (HAND.toml Analog) - -OpenFang's Hands package system prompt + skills + manifest + schedule + guardrails into a single activatable unit. - -**AGH implication**: AGH should define an "agent package" format (TOML manifest, instruction file, tool allowlist, schedule, resource quotas) that extensions can install, activate, and manage. This is the primary extensibility surface for end users. - -### 7. Stop Reason Taxonomy - -OpenFang's `StopReason` enum (Completed, MaxIterations, LoopDetected, Timeout, QuotaExceeded, BudgetExceeded, Error) gives precise observability into why an agent loop terminated. - -**AGH implication**: AGH's session state machine should capture terminal states with the same granularity. This feeds directly into observability, debugging, and billing. - ---- - -## Extension System Insights - -### What Should Be the Extension Interface? - -OpenFang has no runtime extension loading -- everything is compiled in. This is the opposite of what AGH wants. However, the *boundaries* between OpenFang's subsystems reveal the natural extension points: - -1. **Tool providers** -- anything that adds tools to the agent's catalog (MCP servers, skill runtimes, builtin tools). Interface: tool definition + execute function. - -2. **Channel adapters** -- anything that bridges external messaging to agent sessions. Interface: inbound message stream + outbound send. - -3. **Memory backends** -- anything that extends the memory substrate (semantic search, knowledge graph, vector DB). Interface: store + recall. - -4. **Scheduling triggers** -- anything that dispatches sessions on events or time. Interface: event pattern + session dispatch. - -5. **Workflow orchestrators** -- anything that composes multiple sessions into pipelines. Interface: step definition + execution engine. - -6. **Security layers** -- anything that adds safety checks to the execution pipeline. Interface: pre-execution hook + post-execution hook. - -### Workflow Engine as Extension Pattern - -The workflow engine is the best example of a feature that should be an extension, not core. It composes the core session dispatch primitive into multi-step pipelines with fan-out, conditionals, and loops. The key design lesson: - -- **Core provides**: session dispatch, result collection, event emission on completion -- **Extension provides**: step ordering, variable interpolation, parallel dispatch, error handling -- **Extension consumes**: only the core interfaces (session dispatch + event bus), never kernel internals - -This pattern generalizes: any complex orchestration (workflow engine, Hand lifecycle, channel bridge) should compose on top of core primitives through defined interfaces. - -### OFP Peer Network: Cautionary Tale - -OpenFang invented a custom TCP wire protocol for agent-to-agent communication. While technically sound (HMAC auth, nonce replay protection), it creates a compatibility island -- only OpenFang instances can speak OFP. - -**AGH recommendation**: Do NOT invent a custom protocol. Use the A2A protocol specification (Google/Linux Foundation) or plain HTTP. Agent networking should be an extension that speaks standard protocols, ensuring interoperability with non-AGH systems. - -### Hands/Tools/Skills Layering - -OpenFang's four-layer tool taxonomy is instructive: - -| Layer | Scope | Sandbox | Example | -|-------|-------|---------|---------| -| Builtins | Core functionality | In-process | file_read, memory_store | -| MCP tools | External interop | Subprocess/HTTP | GitHub, Slack, Notion | -| Skills | Domain expertise | WASM/subprocess | Data analysis, web scraping | -| Hands | Autonomous packages | Agent-level | Researcher, Lead, Collector | - -**AGH equivalent layering**: - -| Layer | AGH Scope | Mechanism | Example | -|-------|-----------|-----------|---------| -| ACP agent tools | Core functionality | ACP protocol | Claude Code, Codex, Gemini CLI | -| MCP tools | External interop | MCP protocol | GitHub, Linear, Notion servers | -| Skills | Domain expertise | Bundled SKILL.md | AGH's existing skills package | -| Agent packages | Autonomous units | Package manifest | Researcher, analyst, monitor | - -The key difference: AGH pushes tool execution to ACP agents instead of executing in-process. This is architecturally superior for isolation but means AGH's extension system focuses on configuration and composition rather than runtime execution. - -### What AGH Can Skip - -Several OpenFang features are consequences of its monolithic design and are unnecessary for AGH: - -1. **In-process LLM drivers** -- AGH delegates to ACP agents. No need to implement Anthropic/OpenAI/Gemini HTTP clients. -2. **WASM sandbox** -- AGH doesn't execute untrusted code in-process. ACP agents run as sandboxed subprocesses. -3. **40 channel adapters compiled into the binary** -- These should be installable extensions, not compiled in. -4. **Custom OFP protocol** -- Use standard A2A/HTTP. -5. **Built-in web dashboard** -- AGH already has a separate React SPA. Better separation of concerns. -6. **130+ model catalog** -- AGH's ACP agents handle their own model selection. A config-loadable pricing catalog is sufficient for metering. - ---- - -## Summary: Priority Features for AGH - -### Immediate (Core) - -1. **Budget enforcement** -- per-session and global cost limits with pre-flight checks -2. **Session stop reason taxonomy** -- precise terminal state classification -3. **Cron scheduler** -- POSIX cron for scheduled session dispatch -4. **Event-driven triggers** -- pattern-matching on session events with fire limits -5. **Loop/recursion guard** -- cycle detection for agent delegation chains -6. **Session compaction** -- threshold-based context trimming for long sessions -7. **Session repair on load** -- validate session state integrity after crash -8. **Dual execution mode** -- ephemeral single-shot alongside persistent daemon -9. **Config hot reload** -- reload extension configs without daemon restart -10. **Subprocess environment isolation** -- env_clear + allowlist for ACP agent spawning - -### Near-term (Extension interfaces) - -1. **Tool provider interface** -- for MCP servers and custom tool sources -2. **Channel adapter interface** -- for messaging platform bridges -3. **Agent package format** -- manifest + instructions + schedule + guardrails -4. **Workflow engine** -- multi-step session orchestration with fan-out -5. **Memory extension interface** -- for knowledge graph, semantic search backends - -### Later (Extensions) - -1. **Knowledge graph engine** -- entity-relation-fact triples with confidence scoring -2. **Approval manager** -- human-in-the-loop gates for high-risk operations -3. **Audit log (Merkle chain)** -- tamper-evident action logging -4. **Agent-to-agent networking** -- standard A2A protocol, not custom wire protocol -5. **Credential vault** -- encrypted secret storage with OS keyring integration diff --git a/.compozy/tasks/extensability/analysis/analysis_pi_mono.md b/.compozy/tasks/extensability/analysis/analysis_pi_mono.md deleted file mode 100644 index d266e66ff..000000000 --- a/.compozy/tasks/extensability/analysis/analysis_pi_mono.md +++ /dev/null @@ -1,230 +0,0 @@ -# Pi-Mono Analysis for AGH Extensibility Design - -## Overview - -Pi-Mono is a TypeScript monorepo by Mario Zechner that implements an "aggressively extensible" AI coding agent framework. It consists of seven packages organized in three tiers: a foundation LLM API (`pi-ai`), infrastructure packages (`pi-agent-core`, `pi-tui`), and application-level consumers (`pi-coding-agent`, `pi-mom`, `pi-web-ui`, `pi-pods`). The project's guiding thesis is that coding agents should ship a minimal core with comprehensive extension points, letting users compose exactly the features they need rather than accepting a monolithic feature set. - -Pi-Mono's philosophy directly opposes "batteries-included" tools: it ships only 4 default tools (read, write, edit, bash), keeps its system prompt under 1,000 tokens, and deliberately omits MCP support, sub-agents, permission systems, plan mode, built-in todos, and background bash -- all of which can be rebuilt via its extension system. This minimalism is driven by a concrete technical constraint: context windows are finite, and every token consumed by framework overhead is unavailable for the user's actual task. - -**Relevance to AGH**: Pi-Mono validates the "robust minimal core + extensible plugin system" philosophy that AGH already pursues. It provides a detailed case study of where that boundary should be drawn and what extension surface area looks like in practice for an agent operating system. - ---- - -## Key Features Analysis - -### Feature Classification Table - -| # | Feature | Pi-Mono Implementation | AGH Classification | Rationale | -|---|---------|----------------------|-------------------|-----------| -| 1 | **Unified LLM Streaming API** | `pi-ai`: single `stream()` call over 20+ providers, 10 API protocols, lazy-loaded provider modules, canonical message types | **EXTENSION** | AGH's ACP protocol already abstracts agent communication via JSON-RPC over stdio. AGH spawns complete agent binaries (Claude Code, Codex, Gemini CLI) that handle their own LLM provider connections. A unified LLM API would be useful for future "native" agents AGH spawns directly, but should remain an optional provider package rather than core. | -| 2 | **Extension System (TypeScript modules with lifecycle hooks)** | `ExtensionAPI` with 50+ hooks: tools, commands, shortcuts, events, UI components, providers, state persistence. Extensions loaded via jiti transpiler. | **CORE (design pattern)** | The extension system design is the single most important pattern to adopt. AGH needs a Go-native equivalent: a plugin/extension interface with lifecycle hooks at every boundary (session start/end, tool execution, message streaming, compaction). This should be the core abstraction, not an afterthought. | -| 3 | **Skills (on-demand capability packages)** | Markdown files with YAML frontmatter following AgentSkills.io spec. Progressive disclosure: only name+description in system prompt, full content loaded on demand. | **CORE** | AGH already has `internal/skills` with bundled skill definitions. Pi validates this approach and adds the key insight of progressive disclosure (load-on-demand to save context tokens). AGH's skills loader should follow this pattern. | -| 4 | **Prompt Templates** | Markdown files with bash-style variable substitution (`$1`, `$@`, `${@:N:L}`). Expanded via `/name args`. | **EXTENSION** | Useful but not essential to the daemon core. Should be an extension that prompt template directories can register with the skills system. AGH's workspace/config system can discover these. | -| 5 | **Session Tree (JSONL with branching)** | Append-only JSONL where each entry has `id`/`parentId`, forming a tree. Branching via `leafId` pointer. No data ever deleted. | **CORE (adapt)** | AGH already uses SQLite for session events (`store/sessiondb`). The tree-branching concept (navigate to any point, branch without losing history) is valuable for AGH's session model. The `leafId`-based branching pattern could be adapted to SQLite with a `parent_event_id` column. | -| 6 | **Context Compaction** | Structured summarization when context exceeds threshold. Walks backwards to find cut point preserving recent tokens. Iterative compaction builds on previous summaries. | **CORE** | Critical for long-running sessions. AGH's `memory/consolidation` package handles dream consolidation, but per-session compaction for context window management is a separate concern that belongs in core. The structured summary format (Goal, Progress, Key Decisions, Next Steps) is a good template. | -| 7 | **Auto-generated Model Catalog** | Build-time script scrapes provider APIs, writes `models.generated.ts` with type-safe model definitions including pricing, context windows, capabilities. | **EXTENSION** | AGH delegates model selection to agent binaries. If AGH ever needs to route to specific models, this could be a useful extension. Not core. | -| 8 | **Cross-Provider Message Handoffs** | `transform-messages.ts` converts thinking blocks, normalizes tool call IDs, repairs orphaned tool calls, sanitizes Unicode when switching models mid-conversation. | **EXTENSION** | Relevant only if AGH manages LLM connections directly. Currently agents handle their own provider connections. Could become relevant for Phase 3 agent network protocol. | -| 9 | **TUI Framework (differential rendering)** | Standalone package with component tree, differential rendering (only redraws changed regions), synchronized output (CSI 2026), overlay system, Kitty keyboard protocol. | **N/A (not applicable)** | AGH uses a React 19 SPA for its web UI and UDS for CLI. A TUI framework is not needed. However, the differential rendering concept is instructive for SSE-based UI updates. | -| 10 | **Theme System (hot-reloadable)** | 51 color tokens in JSON, hot-reload via `fs.watch` with debounce, terminal capability detection, syntax highlighting integration. | **EXTENSION** | Visual customization belongs in the web UI layer, not the daemon core. AGH's web UI already uses Tailwind/shadcn. | -| 11 | **Package Manager (npm/git/local)** | `pi install`, `pi remove`, `pi update`. Packages bundle extensions, skills, prompts, themes. Supports npm, git, and local sources. Auto-install on startup from `.pi/settings.json`. | **CORE (adapted)** | AGH needs a package/plugin distribution mechanism. The concept of bundling skills, extensions, and config into installable packages is essential for the extension ecosystem. Should be adapted to Go (e.g., git-based plugin repos with `agh install`). | -| 12 | **Pi Mom (Self-managing Slack Bot)** | Headless agent deployment in Slack. Per-channel isolation with separate workspaces, MEMORY.md files, skills directories. Docker sandbox. Events system (immediate, one-shot, periodic/cron). | **EXTENSION** | Demonstrates a powerful application pattern: the same agent core deployed headlessly into a chat platform. AGH should enable this via its API layer, not by building it into core. The events system (cron-based agent triggers) is a good extension candidate. | -| 13 | **Steering & Follow-up Message Queues** | Two-queue system: steering messages redirect agent mid-turn, follow-up messages queue for after completion. Drain modes: "one-at-a-time" vs "all". | **CORE** | Essential for interactive agent sessions. AGH's session manager should support injecting messages into running sessions with priority semantics (interrupt vs. queue). This maps directly to AGH's HTTP/SSE API. | -| 14 | **Tool Execution Pipeline (parallel/sequential with hooks)** | Preflight sequential, execute parallel, finalize in source order. `beforeToolCall` can block, `afterToolCall` can modify results. File mutation queue for concurrent writes. | **CORE (design pattern)** | AGH delegates tool execution to agent subprocesses, but the hook pattern (before/after with block/modify capability) is relevant for the observe layer and for extensions that want to intercept tool calls visible via ACP events. | -| 15 | **Custom Message Types (declaration merging)** | TypeScript `CustomAgentMessages` interface widened via declaration merging. Custom messages in transcript but filtered from LLM context by `convertToLlm`. | **CORE (adapted)** | AGH's event store should support custom event types from extensions. The pattern of storing extension-specific data in the event stream (but excluding it from agent context) is directly applicable to `store/sessiondb`. | -| 16 | **OAuth Provider System** | 5 built-in OAuth providers (Anthropic, OpenAI Codex, GitHub Copilot, Gemini CLI, Antigravity). `AuthStorage` with file-based locking. Token auto-refresh. | **EXTENSION** | Auth management for LLM providers. AGH delegates this to agent binaries, so not needed in core. Could be an extension for future native agent support. | -| 17 | **Web UI Components (mini-lit)** | Web components for chat interfaces: ChatPanel, AgentInterface, MessageList, artifacts system, sandboxed iframe execution, IndexedDB storage, custom tool renderers. | **N/A (parallel)** | AGH has its own React 19 SPA. Not directly adoptable, but the artifact system (LLM creates/modifies files rendered interactively) and custom tool renderer registry are patterns worth replicating in AGH's web UI. | -| 18 | **GPU Pod Management** | CLI for deploying vLLM on remote GPU pods. SSH-based provisioning, model lifecycle, health monitoring, OpenAI-compatible endpoints. | **EXTENSION** | Infrastructure automation for self-hosted LLMs. Clearly an extension/plugin, not core to an agent OS. | -| 19 | **Context File Discovery (AGENTS.md)** | Loads context files from global, parent directories, and current directory. Both `AGENTS.md` and `CLAUDE.md` recognized. Injected into system prompt. | **CORE** | AGH's workspace resolver already handles this pattern. Validates the approach. The progressive discovery (walk up from cwd to root) is the right pattern. | -| 20 | **RPC Mode (JSONL over stdin/stdout)** | Headless mode using LF-delimited JSONL for IDE integration. Extension UI forwarded as typed requests. | **CORE (validates)** | AGH already has UDS for CLI IPC. Pi's RPC mode validates that a structured protocol over stdio is essential for embedding agents in IDEs and other host processes. | -| 21 | **Dual-Scope Memory (Global + Channel/Workspace)** | MEMORY.md files at global and per-channel levels. Read before every response, injected into system prompt. Editable by both human and agent. | **CORE** | AGH already has `internal/memory` with global + workspace scope. Pi's implementation via plain Markdown files validates the approach and emphasizes that memory should be human-readable and editable. | -| 22 | **Events/Scheduling System** | Three event types: immediate, one-shot (timestamp), periodic (cron). File-based triggers (`events/` directory). Queue cap per channel. Silent completion for no-op periodic checks. | **EXTENSION** | Scheduled agent triggers are a powerful pattern but belong as an extension. AGH's daemon could expose a scheduling API that extensions register with. | -| 23 | **Cost Tracking** | Per-message `Usage` object with token counts and dollar costs. `calculateCost()` from model pricing metadata. Real-time display in TUI/web UI. | **CORE** | Observable cost tracking is essential for an agent OS. AGH's `observe` package should track token usage and cost per session, derived from ACP events that report usage. | - ---- - -## Architectural Patterns Worth Adopting - -### 1. Layered Package Architecture with Strict Dependency Flow - -Pi-Mono's three-tier architecture is its most important structural decision: - -``` -Foundation: pi-ai (zero internal deps) -Infrastructure: pi-agent-core (depends on pi-ai), pi-tui (standalone) -Application: pi-coding-agent, pi-mom, pi-web-ui, pi-pods -``` - -**Key rules**: Dependencies flow strictly downward. No package imports `pi-coding-agent` (the top-level app). The foundation layer has zero internal dependencies. Infrastructure packages depend only on foundation. Application packages pull together lower layers. - -**AGH parallel**: AGH already follows this with `daemon/` as sole composition root and downward-only dependency flow. This validates AGH's approach. The additional insight is that AGH's `internal/api/` packages should never be imported by core domain packages (`session/`, `memory/`, `skills/`), which AGH already enforces. - -### 2. Progressive Disclosure for Context Optimization - -Pi's most impactful design insight is that context windows are finite and expensive. Every feature decision is filtered through "what does this cost in tokens?" - -- Skills inject only name+description into the system prompt; full content is loaded on-demand when the agent decides it's relevant. -- No elaborate system prompts -- under 1,000 tokens. -- No MCP tool definitions burning context tokens whether used or not. -- Compaction keeps recent tokens intact while summarizing older ones. - -**AGH adoption**: AGH should adopt progressive disclosure as a first-class principle in its skills system. When AGH sends context to agents, skills should be listed as brief descriptors, with the full skill content available via a "read skill" mechanism. This directly reduces the system prompt overhead per agent session. - -### 3. Single-File Session Trees - -Pi stores entire conversation histories, including all branches, in a single append-only JSONL file. Branching is achieved by appending entries with `parentId` pointing to earlier entries rather than the current leaf. Nothing is ever deleted. - -**Benefits**: No multi-file branch management, complete audit trail, standard format parseable by any tool, no data loss from aborts or crashes (append-only). - -**AGH adaptation**: AGH uses SQLite per-session, which is more powerful but less inspectable. Consider adding a `parent_event_id` column to session events to enable tree-structured branching. The append-only guarantee maps naturally to SQLite's INSERT-only pattern. The key insight is that branching should be a core session primitive, not an afterthought. - -### 4. Extension Points as First-Class API Surface - -Pi's extension API exposes 50+ hooks organized into clear categories: - -- **Session lifecycle**: start, before_switch, before_fork, before_compact, compact, shutdown -- **Agent lifecycle**: before_agent_start, agent_start/end, turn_start/end -- **Message lifecycle**: message_start, message_update, message_end -- **Tool lifecycle**: tool_call (can block), tool_result (can modify), tool_execution_start/update/end -- **Input**: transform user input before agent processing -- **Context**: modify messages before LLM call -- **Resources**: contribute additional skill/prompt/theme paths - -The critical patterns: -- **`tool_call` can block execution** -- extensions can implement permission gates by returning `{ block: true, reason: "..." }` -- **`tool_result` chains like middleware** -- each extension handler can modify results, patches merge sequentially -- **`before_agent_start` can inject messages** -- extensions add context without modifying core logic - -**AGH adoption**: This is the blueprint for AGH's extension system. In Go, these hooks should be typed interfaces that extension packages implement. The `Notifier` pattern AGH already uses is a good foundation; it needs to be extended with blocking/modification semantics for tool call interception. - -### 5. Conflict Resolution Rules - -Pi has deterministic rules for when extensions collide: - -- **Shortcuts**: Reserved keybindings cannot be overridden. Non-reserved conflicts generate warnings. -- **Commands**: Built-in always wins. Extension-vs-extension: first-registered wins, duplicates get numeric suffixes. -- **Tools**: Built-in conflicts produce warnings. First registration wins for extension-vs-extension. -- **Providers**: Can override built-in by ID. Unregister restores defaults. - -**AGH adoption**: AGH needs explicit conflict resolution policies defined before the extension system is built. The "built-in always wins" rule is sensible. The "first-registered wins with warnings" approach avoids silent breakage. - -### 6. Lazy Loading and Registration - -Pi never eagerly imports heavy dependencies. Provider modules are loaded via dynamic `import()` only when first used, with `||=` caching to ensure single-load semantics. Errors during lazy load are encoded as events in the stream, never thrown as unhandled exceptions. - -**AGH adoption**: In Go, this translates to lazy initialization of extension packages. Extensions should register intent at startup (name, capabilities, hooks) but defer heavy initialization (database connections, subprocess spawning) until first use. Errors should be captured and reported through the observe layer, not panic. - ---- - -## Extension System Insights - -### Architecture: Hook-Based with Full API Surface - -Pi's extension system is its defining feature. The core insight is that the extension API should be **exactly as powerful as the internal API**. Extensions import the same packages the agent uses. There is no restricted sandbox, no capability manifest, no permission model. This is a deliberate design choice: the target audience is developers who already run arbitrary code. - -**Extension loading flow**: -1. Discovery: scan `~/.pi/agent/extensions/`, `.pi/extensions/`, and package manifests -2. Transpile: use jiti (just-in-time TypeScript transpiler) with virtual modules for bundled packages -3. Execute: call each extension's default export function with `ExtensionAPI` -4. Bind: wire real action methods into the runtime after initialization -5. Dispatch: route events through `ExtensionRunner` which sits between `AgentSession` and extensions - -**For AGH in Go**: The equivalent would be: -- Discovery: scan `~/.agh/extensions/`, `.agh/extensions/`, and registered plugin directories -- Load: Go plugins (`plugin.Open()`) or, more practically, subprocess-based plugins communicating via gRPC/JSON-RPC -- Register: each plugin exports a registration function that receives an `ExtensionAPI` interface -- Bind: wire hooks into daemon lifecycle after all plugins register -- Dispatch: route events through an extension runner that sits between `session.Manager` and extensions - -### The Four Extension Surfaces - -Pi provides four distinct extension mechanisms, each targeting a different user sophistication level: - -| Surface | Complexity | Capability | Token Cost | -|---------|-----------|------------|------------| -| **Context files** (AGENTS.md) | Zero code | Persistent instructions | Always loaded | -| **Skills** (SKILL.md) | Zero code | On-demand procedures | Loaded when relevant | -| **Prompt Templates** (*.md) | Zero code | Reusable shortcuts | Loaded on invocation | -| **Extensions** (*.ts) | TypeScript | Full runtime integration | No token cost | - -**AGH mapping**: -- Context files: AGH workspace already supports this via `CLAUDE.md` / config -- Skills: AGH's `internal/skills` package -- validate with progressive disclosure -- Prompt Templates: New extension type, low priority -- Extensions: Primary focus for AGH's extension system design - -### Multi-Package Extensibility (Pi Packages) - -Pi's package system bundles all four extension surfaces into distributable units: - -```json -{ - "pi": { - "extensions": ["./extensions"], - "skills": ["./skills"], - "prompts": ["./prompts"], - "themes": ["./themes"] - } -} -``` - -Packages support three source types (npm, git, local), two scopes (global and project-local), version pinning, selective resource loading via glob patterns, and offline mode. - -**AGH design implications**: -- AGH packages should bundle: extensions (Go plugins or subprocess handlers), skills (Markdown), config templates, and web UI components -- Source types: git repositories (primary), local directories (development) -- Scopes: global (`~/.agh/packages/`) and workspace-local (`.agh/packages/`) -- The `agh install ` command installs a package by cloning the repo and registering its contents -- A manifest file (`agh-package.toml` or similar) declares what the package provides -- Auto-install from workspace config ensures team consistency - -### Security Model: Full Trust with Escape Hatches - -Pi runs extensions with full trust -- no sandbox, no capability restrictions. The rationale: once an agent can read, write, and execute code, preventing exfiltration while maintaining utility is impossible. Security theater (permission popups) provides false assurance. - -Real security comes from: -- **Containers**: Run in Docker/VM for genuine isolation -- **Scope limitation**: Project-local extensions only affect that project -- **Audit**: Package provenance via npm/git audit tools -- **Extension permission gates**: Extensions themselves can add confirmation flows - -**AGH consideration**: AGH should follow the same model for extension trust. Since AGH runs as a daemon, the security boundary is the daemon's process permissions. Extensions run in the daemon's process (or as supervised subprocesses) and inherit its permissions. The real security boundary is the container/VM that runs the daemon. - -### Event System Design - -Pi's event system has two critical properties: - -1. **Listeners are awaited sequentially** -- a slow listener blocks subsequent listeners and the loop itself. This is by design: it makes `message_end` processing a barrier before tool preflight, ensuring state consistency. - -2. **State is updated before listeners fire** -- when an event arrives, internal state (messages, pending tool calls, streaming state) is updated first, then listeners are invoked. Listeners always see consistent state. - -**AGH adoption**: The `observe` package's notifier pattern should follow both rules. Events should be dispatched synchronously through registered handlers in registration order, with state mutations committed before notification. For blocking operations (like permission gates), the handler should be able to return a result that the caller inspects. - ---- - -## Summary of Recommendations for AGH - -### Adopt as CORE -1. **Extension hook system** with lifecycle events at session, agent, tool, and message boundaries -2. **Progressive disclosure** for skills (name+description in context, full content on demand) -3. **Session branching** via parent-event relationships in SQLite -4. **Context compaction** with structured summarization and iterative updates -5. **Steering/follow-up message queues** for injecting messages into running sessions -6. **Package distribution** mechanism for bundling extensions, skills, and config -7. **Cost tracking** integrated into the observe layer -8. **Conflict resolution** policies defined upfront for extension collisions - -### Adopt as EXTENSION -1. Unified LLM API (for future native agent support) -2. Prompt templates (Markdown with variable substitution) -3. Theme/visual customization -4. Scheduled event triggers (cron-based agent wake-ups) -5. OAuth provider management -6. Chat platform integrations (Slack, Discord, etc.) -7. GPU pod management / self-hosted model deployment -8. Cross-provider message transformation - -### Do Not Adopt -1. TUI framework (AGH uses web UI) -2. TypeScript-specific patterns (declaration merging, jiti transpiler) -3. "No MCP" stance (AGH should support MCP as an extension surface since it spawns external agents that may use MCP) -4. Single-file JSONL storage (AGH's SQLite approach is better for the daemon model) diff --git a/.compozy/tasks/web-pages/reviews-001/_meta.md b/.compozy/tasks/web-pages/reviews-001/_meta.md deleted file mode 100644 index d6dffcc07..000000000 --- a/.compozy/tasks/web-pages/reviews-001/_meta.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -provider: coderabbit -pr: "18" -round: 1 -created_at: 2026-04-14T02:39:11.002701Z ---- - -## Summary -- Total: 31 -- Resolved: 20 -- Unresolved: 11 diff --git a/.compozy/tasks/web-pages/reviews-001/issue_021.md b/.compozy/tasks/web-pages/reviews-001/issue_021.md deleted file mode 100644 index c98c5b627..000000000 --- a/.compozy/tasks/web-pages/reviews-001/issue_021.md +++ /dev/null @@ -1,23 +0,0 @@ ---- -status: pending -file: web/src/systems/network/components/network-channels-list-panel.tsx -line: 50 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4103023844,nitpick_hash:4830d59f7aba -review_hash: 4830d59f7aba -source_review_id: "4103023844" -source_review_submitted_at: "2026-04-14T02:37:32Z" ---- - -# Issue 021: Add loading and error states to the panel API. -## Review Comment - -This component can only render a filled list or an empty state, so routes have to special-case loading and failure outside the panel. That breaks the “component owns all states” rule for UI components in `web/`. - -As per coding guidelines, "Handle all loading, error, and empty states in components — never assume `data` exists". - -## Triage - -- Decision: `UNREVIEWED` -- Notes: diff --git a/.compozy/tasks/web-pages/reviews-001/issue_022.md b/.compozy/tasks/web-pages/reviews-001/issue_022.md deleted file mode 100644 index a6c5de701..000000000 --- a/.compozy/tasks/web-pages/reviews-001/issue_022.md +++ /dev/null @@ -1,85 +0,0 @@ ---- -status: pending -file: web/src/systems/network/components/network-create-channel-dialog.tsx -line: 55 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM56sg4b,comment:PRRC_kwDOR5y4QM63ZMIR ---- - -# Issue 022: _⚠️ Potential issue_ | _🟠 Major_ -## Review Comment - -_⚠️ Potential issue_ | _🟠 Major_ - -**Use a real form submit path for the primary action.** - -This dialog has a text input, but the primary action is only wired through `onClick`. Pressing Enter in the channel name field will not submit, which is a pretty common keyboard path for dialogs like this. - -
-Suggested fix - -```diff --
-+
{ -+ event.preventDefault(); -+ onSubmit(); -+ }} -+ > -+
- ... --
-+
- -- -+ - - -- -+ -+ -``` -
- - -Also applies to: 141-153 - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@web/src/systems/network/components/network-create-channel-dialog.tsx` around -lines 43 - 55, The primary action in NetworkCreateChannelDialog is only wired to -an onClick handler so pressing Enter in the channel name input doesn't submit; -wrap the dialog content in a form (or add an onSubmit on the existing form -container), move the primary create logic into an onSubmit handler (e.g. reuse -the existing createChannel/create handler function referenced in the component) -and call event.preventDefault() as needed, and change the primary Button to -type="submit" so Enter in the TextInput triggers submission; apply the same fix -to the duplicate action referenced around the secondary block (lines 141-153) in -this component. -``` - -
- - - - - -## Triage - -- Decision: `UNREVIEWED` -- Notes: diff --git a/.compozy/tasks/web-pages/reviews-001/issue_023.md b/.compozy/tasks/web-pages/reviews-001/issue_023.md deleted file mode 100644 index 12ecf9933..000000000 --- a/.compozy/tasks/web-pages/reviews-001/issue_023.md +++ /dev/null @@ -1,106 +0,0 @@ ---- -status: pending -file: web/src/systems/network/components/network-create-channel-dialog.tsx -line: 127 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM56sg4c,comment:PRRC_kwDOR5y4QM63ZMIS ---- - -# Issue 023: _⚠️ Potential issue_ | _🟠 Major_ -## Review Comment - -_⚠️ Potential issue_ | _🟠 Major_ - -**Expose the agent selection state to assistive tech.** - -Each row behaves like a toggle, but the selected state is only visual. Add `aria-pressed={isSelected}` or switch to checkbox semantics so screen readers can tell which agents are selected. - -
-Suggested fix - -```diff - -``` - -
- - - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@web/src/systems/network/components/network-create-channel-dialog.tsx` around -lines 96 - 127, The toggle button for each agent (the
- - - - - -## Triage - -- Decision: `UNREVIEWED` -- Notes: diff --git a/.compozy/tasks/web-pages/reviews-001/issue_024.md b/.compozy/tasks/web-pages/reviews-001/issue_024.md deleted file mode 100644 index afe4559e3..000000000 --- a/.compozy/tasks/web-pages/reviews-001/issue_024.md +++ /dev/null @@ -1,25 +0,0 @@ ---- -status: pending -file: web/src/systems/network/components/network-peers-list-panel.tsx -line: 22 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4103023844,nitpick_hash:c4a038022332 -review_hash: c4a038022332 -source_review_id: "4103023844" -source_review_submitted_at: "2026-04-14T02:37:32Z" ---- - -# Issue 024: Move PeerListItem props into an interface. -## Review Comment - -The inline object type here breaks the repository’s TypeScript shape convention. Defining a `PeerListItemProps` interface would keep this aligned with the rest of the `web/` codebase. - -As per coding guidelines, "Use `interface` for defining object shapes in TypeScript (pattern is in Zod schemas and types)". - ---- - -## Triage - -- Decision: `UNREVIEWED` -- Notes: diff --git a/.compozy/tasks/web-pages/reviews-001/issue_025.md b/.compozy/tasks/web-pages/reviews-001/issue_025.md deleted file mode 100644 index 1cc2323e9..000000000 --- a/.compozy/tasks/web-pages/reviews-001/issue_025.md +++ /dev/null @@ -1,23 +0,0 @@ ---- -status: pending -file: web/src/systems/network/components/network-peers-list-panel.tsx -line: 66 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4103023844,nitpick_hash:062d07c23a7d -review_hash: 062d07c23a7d -source_review_id: "4103023844" -source_review_submitted_at: "2026-04-14T02:37:32Z" ---- - -# Issue 025: Let the panel render loading and error states too. -## Review Comment - -Right now this component only knows how to render a list or an empty state, so routes have to branch around it for loading/failure. That pushes state ownership out of the component and conflicts with the UI-state rule for `web/` components. - -As per coding guidelines, "Handle all loading, error, and empty states in components — never assume `data` exists". - -## Triage - -- Decision: `UNREVIEWED` -- Notes: diff --git a/.compozy/tasks/web-pages/reviews-001/issue_026.md b/.compozy/tasks/web-pages/reviews-001/issue_026.md deleted file mode 100644 index c92c0c7a1..000000000 --- a/.compozy/tasks/web-pages/reviews-001/issue_026.md +++ /dev/null @@ -1,141 +0,0 @@ ---- -status: pending -file: web/src/systems/network/hooks/use-network-actions.ts -line: 22 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM56sg4e,comment:PRRC_kwDOR5y4QM63ZMIV ---- - -# Issue 026: _🛠️ Refactor suggestion_ | _🟠 Major_ -## Review Comment - -_🛠️ Refactor suggestion_ | _🟠 Major_ - -
-🧩 Analysis chain - -🏁 Script executed: - -```shell -#!/bin/bash -python - <<'PY' -from pathlib import Path -p = Path("web/src/systems/network/hooks/use-network-actions.ts") -txt = p.read_text() -for key in ("onMutate", "onError", "onSettled"): - print(f"{key}: {'present' if key in txt else 'missing'}") -PY -``` - -Repository: compozy/agh - -Length of output: 108 - ---- - -🏁 Script executed: - -```shell -cat -n web/src/systems/network/hooks/use-network-actions.ts | head -40 -``` - -Repository: compozy/agh - -Length of output: 1021 - ---- - - - -
- -**Add optimistic update handlers to this mutation.** - -The hook invalidates on settle but is missing the required optimistic update pattern (`onMutate` + `onError`) as specified in the coding guidelines. Implement query cancellation and rollback snapshots: - -
-Suggested implementation - -```diff - export function useCreateNetworkChannel() { - const queryClient = useQueryClient(); - - return useMutation({ - mutationFn: (data: CreateNetworkChannelRequest) => createNetworkChannel(data), -+ onMutate: async () => { -+ await queryClient.cancelQueries({ queryKey: networkKeys.all }); -+ const previousQueries = queryClient.getQueriesData({ queryKey: networkKeys.all }); -+ return { previousQueries }; -+ }, -+ onError: (_error, _variables, context) => { -+ if (!context) return; -+ for (const [queryKey, data] of context.previousQueries) { -+ queryClient.setQueryData(queryKey, data); -+ } -+ }, - onSettled: () => invalidateNetworkQueries(queryClient), - }); - } -``` -
- - - -
-📝 Committable suggestion - -> ‼️ **IMPORTANT** -> Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements. - -```suggestion - return useMutation({ - mutationFn: (data: CreateNetworkChannelRequest) => createNetworkChannel(data), - onMutate: async () => { - await queryClient.cancelQueries({ queryKey: networkKeys.all }); - const previousQueries = queryClient.getQueriesData({ queryKey: networkKeys.all }); - return { previousQueries }; - }, - onError: (_error, _variables, context) => { - if (!context) return; - for (const [queryKey, data] of context.previousQueries) { - queryClient.setQueryData(queryKey, data); - } - }, - onSettled: () => invalidateNetworkQueries(queryClient), - }); -``` - -
- - - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@web/src/systems/network/hooks/use-network-actions.ts` around lines 19 - 22, -The mutation in useNetworkActions (mutationFn: createNetworkChannel) needs -optimistic update handlers: implement onMutate to cancel relevant queries via -queryClient.cancelQueries, take a snapshot with queryClient.getQueryData (store -as context), apply an optimistic update using queryClient.setQueryData to insert -the new channel into the network list, and return the snapshot in context; -implement onError to rollback by restoring the snapshot from context with -queryClient.setQueryData; keep the existing onSettled to call -invalidateNetworkQueries(queryClient) after either success or failure. Ensure -you reference the existing symbols: useNetworkActions, createNetworkChannel, -queryClient, invalidateNetworkQueries, and add onMutate/onError handlers -accordingly. -``` - -
- - - - - -## Triage - -- Decision: `UNREVIEWED` -- Notes: diff --git a/.compozy/tasks/web-pages/reviews-001/issue_027.md b/.compozy/tasks/web-pages/reviews-001/issue_027.md deleted file mode 100644 index 9fb89115f..000000000 --- a/.compozy/tasks/web-pages/reviews-001/issue_027.md +++ /dev/null @@ -1,23 +0,0 @@ ---- -status: pending -file: web/src/systems/network/lib/network-formatters.ts -line: 151 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4103023844,nitpick_hash:c95a5d732f19 -review_hash: c95a5d732f19 -source_review_id: "4103023844" -source_review_submitted_at: "2026-04-14T02:37:32Z" ---- - -# Issue 027: Prefer an interface for the metric card shape instead of an inline object type. -## Review Comment - -Use a named `interface` for this return shape to align with project TS conventions. - -As per coding guidelines, `web/**/*.ts?(x)`: Use `interface` for defining object shapes in TypeScript (pattern is in Zod schemas and types). - -## Triage - -- Decision: `UNREVIEWED` -- Notes: diff --git a/.compozy/tasks/web-pages/reviews-001/issue_028.md b/.compozy/tasks/web-pages/reviews-001/issue_028.md deleted file mode 100644 index 23061d509..000000000 --- a/.compozy/tasks/web-pages/reviews-001/issue_028.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -status: pending -file: web/src/systems/workspace/adapters/workspace-api.test.ts -line: 53 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4103023844,nitpick_hash:586256f797c3 -review_hash: 586256f797c3 -source_review_id: "4103023844" -source_review_submitted_at: "2026-04-14T02:37:32Z" ---- - -# Issue 028: Add abort-signal coverage for fetchWorkspace as well. -## Review Comment - -The new test covers the success path, but it doesn’t validate signal propagation to `fetch`, which is part of your list-query test contract. - -## Triage - -- Decision: `UNREVIEWED` -- Notes: diff --git a/.compozy/tasks/web-pages/reviews-001/issue_029.md b/.compozy/tasks/web-pages/reviews-001/issue_029.md deleted file mode 100644 index 67b0e2de9..000000000 --- a/.compozy/tasks/web-pages/reviews-001/issue_029.md +++ /dev/null @@ -1,25 +0,0 @@ ---- -status: pending -file: web/src/systems/workspace/adapters/workspace-api.ts -line: 30 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4103023844,nitpick_hash:a3b38dcd1b6f -review_hash: a3b38dcd1b6f -source_review_id: "4103023844" -source_review_submitted_at: "2026-04-14T02:37:32Z" ---- - -# Issue 029: Consider using a typed error class for consistency with other adapters. -## Review Comment - -Per coding guidelines, API adapters should use typed error classes (e.g., `WorkspaceApiError`) rather than raw `Error`. While this follows the existing pattern in the file, newer adapters like `bridges-api.ts` use `BridgesApiError` which allows consumers to inspect `status` for error handling. - -This is a pre-existing pattern, so deferring to a follow-up refactor is reasonable. - -As per coding guidelines: "Use typed error classes in API adapters — never throw raw errors" - -## Triage - -- Decision: `UNREVIEWED` -- Notes: diff --git a/.compozy/tasks/web-pages/reviews-001/issue_030.md b/.compozy/tasks/web-pages/reviews-001/issue_030.md deleted file mode 100644 index d59b3e057..000000000 --- a/.compozy/tasks/web-pages/reviews-001/issue_030.md +++ /dev/null @@ -1,23 +0,0 @@ ---- -status: pending -file: web/src/systems/workspace/hooks/use-workspaces.ts -line: 5 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4103023844,nitpick_hash:a9617d94fa4c -review_hash: a9617d94fa4c -source_review_id: "4103023844" -source_review_submitted_at: "2026-04-14T02:37:32Z" ---- - -# Issue 030: Use @/* alias imports in this updated hook module. -## Review Comment - -The new import should follow the project alias convention instead of relative paths. - -As per coding guidelines, `web/src/**/*.{ts,tsx}`: "Use path alias `@/*` to map to `./src/*` for all imports". - -## Triage - -- Decision: `UNREVIEWED` -- Notes: diff --git a/.compozy/tasks/web-pages/reviews-001/issue_031.md b/.compozy/tasks/web-pages/reviews-001/issue_031.md deleted file mode 100644 index f0975f5bc..000000000 --- a/.compozy/tasks/web-pages/reviews-001/issue_031.md +++ /dev/null @@ -1,25 +0,0 @@ ---- -status: pending -file: web/src/systems/workspace/hooks/use-workspaces.ts -line: 12 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4103023844,nitpick_hash:7b18b78deb69 -review_hash: 7b18b78deb69 -source_review_id: "4103023844" -source_review_submitted_at: "2026-04-14T02:37:32Z" ---- - -# Issue 031: Extract hook options into an interface. -## Review Comment - -Please replace the inline object-shape type with a named `interface` for consistency. - -As per coding guidelines, `web/**/*.ts?(x)`: "Use `interface` for defining object shapes in TypeScript". - ---- - -## Triage - -- Decision: `UNREVIEWED` -- Notes: diff --git a/.compozy/tasks/ext-ideas/research/analysis.md b/docs/ideas/ext-ideas/research/analysis.md similarity index 81% rename from .compozy/tasks/ext-ideas/research/analysis.md rename to docs/ideas/ext-ideas/research/analysis.md index 37b944094..39635cfd7 100644 --- a/.compozy/tasks/ext-ideas/research/analysis.md +++ b/docs/ideas/ext-ideas/research/analysis.md @@ -11,12 +11,14 @@ Five parallel research agents analyzed the extension ecosystems of six major AI agent frameworks (Pi-Mono, Hermes, OpenClaw, Claude Code, OpenFang, GoClaw) plus the broader MCP/A2A ecosystem. The research surfaced **40+ concrete extension ideas** that map to AGH's three-dimensional extension model (Resources, Capabilities, Actions). **Three converging industry standards** that AGH must support: + 1. **MCP** (Model Context Protocol) — agent-to-tool communication (5,000+ servers) 2. **A2A** (Agent-to-Agent Protocol) — agent-to-agent communication (150+ orgs, Linux Foundation) 3. **OpenTelemetry** — universal agent observability standard **One critical design principle** discovered across all frameworks: -> *"Hooks guarantee behavior; prompts suggest it."* Instructions achieve ~70% compliance; hooks achieve 100%. Use hooks for must-enforce rules, instructions for should-follow guidance. + +> _"Hooks guarantee behavior; prompts suggest it."_ Instructions achieve ~70% compliance; hooks achieve 100%. Use hooks for must-enforce rules, instructions for should-follow guidance. --- @@ -25,6 +27,7 @@ Five parallel research agents analyzed the extension ecosystems of six major AI ### Resources (Declarative — bundled with extensions) #### R1. Agent Packages ("Hands") + **Inspired by**: OpenFang Hands, Goose Custom Distributions, Roo Code Modes **What**: Self-contained autonomous capability packages bundling agent definition + skills + hooks + MCP configs + settings into a single deployable unit. Each "Hand" is a preconfigured agent persona (e.g., Researcher, Reviewer, DevOps Engineer). **Why**: Reduces setup friction from "configure 15 things" to "install one package". OpenFang ships 7 bundled Hands; Roo Code's Mode Gallery has hundreds of community modes. @@ -32,6 +35,7 @@ Five parallel research agents analyzed the extension ecosystems of six major AI **Priority**: **HIGH** — foundational for ecosystem growth #### R2. Cron/Scheduled Triggers + **Inspired by**: Hermes cronjob tool, OpenClaw Cron tool, OpenFang scheduled Hands **What**: Time-based triggers that create sessions on schedule. Natural language cron expressions. Jobs attach skills, deliver results to any connected interface. **Why**: Makes agents proactive instead of reactive. Nightly code reviews, morning briefings, periodic health checks. Present in every major framework. @@ -39,6 +43,7 @@ Five parallel research agents analyzed the extension ecosystems of six major AI **Priority**: **HIGH** — natural extension of session lifecycle #### R3. Webhook/Event Bridge + **Inspired by**: OpenClaw Webhooks, Claude Code CI/CD hooks, Goose recipes **What**: HTTP endpoints that inject messages into sessions on external events (GitHub push, CI failure, Slack mention, email arrival). **Why**: Enables event-driven agent workflows. Developers want agents triggered by CI events (high-demand feature request). @@ -46,6 +51,7 @@ Five parallel research agents analyzed the extension ecosystems of six major AI **Priority**: **MEDIUM** — depends on cron infrastructure #### R4. Skill Auto-Generation ("Skill Factory") + **Inspired by**: Hermes skill-factory, GoClaw skillEvolve, Pi-Mono self-evolution **What**: After successful task completion, agent analyzes its steps, extracts reusable patterns, and writes a SKILL.md file. Every N tasks, agent evaluates and refines existing skills. **Why**: Compounding institutional knowledge. A DevOps agent that deploys 50 times creates a deployment skill capturing all edge cases. Present in Hermes (80+ community extensions in 3 months), GoClaw (nudge prompts at 70%/90% iteration budget). @@ -53,6 +59,7 @@ Five parallel research agents analyzed the extension ecosystems of six major AI **Priority**: **MEDIUM** — requires observe maturity #### R5. Channel Adapters + **Inspired by**: OpenClaw 25+ channels, OpenFang 40 adapters, Hermes 14 platforms **What**: Messaging platform bridges (Telegram, Discord, Slack, etc.) that route messages into AGH sessions. Shared session context across channels. **Why**: Telegram alone has 145K installs in OpenClaw. Makes agents accessible from anywhere. OpenFang's 40 adapters demonstrate strong demand. @@ -64,6 +71,7 @@ Five parallel research agents analyzed the extension ecosystems of six major AI ### Capabilities (Runtime interfaces the extension provides) #### C1. Permission Gate: Risk Classifier + **Inspired by**: Claude Code PreToolUse hooks, Pi-Mono security/safe-git, OpenClaw tool profiles, GoClaw 7-step policy engine **What**: Classify tool calls by risk level (low/medium/high/critical), require approval for destructive actions. Composable, stackable gates. Three handler tiers: shell command (fast), AI classifier (semantic), sub-agent (deep analysis). **Why**: Present in **every** framework. Claude Code's insight: CLAUDE.md instructions = ~70% compliance, hooks = 100%. The AI classifier pattern (natural-language rules evaluated by a fast model) is more expressive than regex. @@ -71,6 +79,7 @@ Five parallel research agents analyzed the extension ecosystems of six major AI **Priority**: **CRITICAL** — table stakes for production use #### C2. Content Validator: Secret Redaction + **Inspired by**: Pi-Mono filter-output, GoClaw ScrubCredentials, Hermes credential patterns **What**: Scan tool outputs for API keys, tokens, passwords, PII before they reach the LLM. Configurable regex patterns. Block or redact. **Why**: Prevents credential leakage into LLM context (and therefore into provider logs, training data, displayed output). Pi-Mono community built this immediately; GoClaw runs it on every tool output by default. @@ -78,6 +87,7 @@ Five parallel research agents analyzed the extension ecosystems of six major AI **Priority**: **HIGH** — security baseline #### C3. Content Validator: Prompt Injection Scanner + **Inspired by**: OpenFang prompt injection scanner, Hermes HermesHub 65+ threat rules, GoClaw GuardSkillContent **What**: Scan incoming tool results, skill content, and MCP server outputs for prompt injection patterns. Block or flag. **Why**: As AGH connects to external MCP servers and loads community skills, injection attacks become a real threat. GoClaw wraps all MCP output in `<<>>` markers. @@ -85,6 +95,7 @@ Five parallel research agents analyzed the extension ecosystems of six major AI **Priority**: **HIGH** — security for ecosystem trust #### C4. Message Transform: Context Pruning + **Inspired by**: Pi-Mono context-pruning + compaction-safeguard, OpenClaw before_compaction hook, Claude Code PostCompact **What**: TTL-based and token-budget-based pruning of old tool results before LLM calls. Time-decay model: recent = full, old = head+tail, ancient = removed. Custom compaction strategies per domain. **Why**: Context management is the #1 challenge for long-running sessions. Every framework implements this. Pi-Mono's four-layer strategy (message count, token count, TTL decay, smart compaction) is the most sophisticated. @@ -92,6 +103,7 @@ Five parallel research agents analyzed the extension ecosystems of six major AI **Priority**: **HIGH** — essential for long sessions #### C5. Memory Backend: Tiered with Decay + **Inspired by**: OpenClaw memory-lancedb-pro (Weibull decay, 3-tier lifecycle), Hermes 8 pluggable backends, Mem0, Letta **What**: Three-tier memory lifecycle (Peripheral/Working/Core) with mathematical decay curves. Smart extraction categorizing memories into 6 types (facts, decisions, technical details, relationships, tasks, insights). Hybrid retrieval: vector similarity + BM25 keyword search + cross-encoder reranking. **Why**: AGH already has dual-scope memory + dream consolidation (rare advantage). Adding structured decay and hybrid retrieval would make it best-in-class. Hermes proves community will build diverse backends if interface is clean. @@ -99,6 +111,7 @@ Five parallel research agents analyzed the extension ecosystems of six major AI **Priority**: **HIGH** — AGH's existing memory is a differentiator to build on #### C6. Memory Backend: Knowledge Graph + **Inspired by**: Memory MCP (official), Mem0 graph memory, OpenClaw relationship tracking **What**: Graph-based memory for entity relationships (who works on what, how components connect, dependency chains). Complements vector memory for relationship-aware recall. **Why**: Graph memory is emerging as production-critical for complex domains. Vector search finds similar content; graph search finds connected entities. @@ -106,6 +119,7 @@ Five parallel research agents analyzed the extension ecosystems of six major AI **Priority**: **MEDIUM** — valuable but can start with vector-only #### C7. Observe Exporter: OpenTelemetry + **Inspired by**: Traceloop OTel MCP, FastMCP native OTel, AG2 OTel tracing, broader ecosystem convergence **What**: Export AGH events as OpenTelemetry traces with GenAI semantic conventions (model name, provider, token usage, cost, temperature, tool call args/results). Pre-built Grafana dashboards. **Why**: OpenTelemetry is emerging as the universal standard for AI agent observability. AGH's observe system already captures events; OTel export makes them consumable by existing monitoring infrastructure. @@ -113,6 +127,7 @@ Five parallel research agents analyzed the extension ecosystems of six major AI **Priority**: **MEDIUM** — important for production deployments #### C8. Observe Exporter: Cost Tracker + **Inspired by**: Pi-Mono cost-tracker/usage-bar/context, Claude Code cost tracking, developer surveys **What**: Real-time token consumption, cost per session, context composition breakdown (how much context is system prompt vs. skills vs. memory vs. conversation). Per-session cost limits with automatic stop. **Why**: "Cost tracking and budgets" is a top developer feature request. Pi-Mono's `context` extension shows which components consume most tokens. @@ -120,6 +135,7 @@ Five parallel research agents analyzed the extension ecosystems of six major AI **Priority**: **MEDIUM** — strong demand signal #### C9. Prompt Provider: Dynamic Context Injection + **Inspired by**: OpenClaw before_prompt_build hook, Hermes pre_llm_call hook, Claude Code UserPromptSubmit context injection **What**: Hook that fires before each LLM call, allowing extensions to inject context (memory recall, RAG results, channel-specific instructions, safety rails) into the prompt without modifying persisted history. **Why**: Every framework implements this pattern. It's the primary mechanism for memory injection, RAG, and dynamic context augmentation. @@ -127,6 +143,7 @@ Five parallel research agents analyzed the extension ecosystems of six major AI **Priority**: **HIGH** — enables memory, RAG, and context enrichment #### C10. Agent Driver: Multi-Model Consultation ("Oracle") + **Inspired by**: Pi-Mono oracle extension, Hermes MOA (Mixture of Agents), GoClaw multi-model routing **What**: Send current conversation context to an alternative AI model for a second opinion without switching the active session. Could also implement provider fallback chains (primary -> fallback -> economy). **Why**: Different models have different strengths. Getting a second opinion on architecture decisions or bug diagnosis is valuable. Provider fallback chains improve reliability. @@ -134,6 +151,7 @@ Five parallel research agents analyzed the extension ecosystems of six major AI **Priority**: **MEDIUM** — useful differentiation #### C11. Agent Driver: Remote Execution Backends + **Inspired by**: Pi-Mono pi-ssh-remote, Hermes 6 terminal backends (Docker/SSH/Daytona/Modal), OpenFang Docker sandbox **What**: Pluggable execution backends that redirect tool execution to remote hosts (SSH), containers (Docker), or cloud sandboxes (Modal, Daytona). The agent thinks it's running locally but commands execute remotely. **Why**: Security (isolate agent actions), scale (run on powerful remote machines), compliance (execute in approved environments). @@ -145,6 +163,7 @@ Five parallel research agents analyzed the extension ecosystems of six major AI ### Actions (Host API operations extensions can perform) #### A1. Session Delegation (Parent-Child) + **Inspired by**: Hermes delegate_task (3 concurrent subagents), Claude Code subagent orchestration, OpenFang inter-agent tools **What**: Spawn child sessions with isolated context, restricted toolsets, and their own workspace. Results flow back to parent. Up to N concurrent children. Zero context cost to parent. **Why**: Single-agent context windows are finite. Complex tasks (refactor + test + review) benefit from specialized agents that don't pollute each other's context. Present in every major framework. @@ -152,6 +171,7 @@ Five parallel research agents analyzed the extension ecosystems of six major AI **Priority**: **HIGH** — core orchestration capability #### A2. Session Fork with Context Handoff + **Inspired by**: Pi-Mono handoff extension, Claude Code worktree isolation **What**: Distill current conversation context, open editor for review, spawn new focused session with that context. Also: fork session into isolated git worktree for parallel work. **Why**: Long sessions accumulate noise. Handoff lets users start fresh without losing progress. Worktree isolation enables parallel agents on the same repo. @@ -159,6 +179,7 @@ Five parallel research agents analyzed the extension ecosystems of six major AI **Priority**: **MEDIUM** — natural extension of session model #### A3. Session Checkpointing & Rewind + **Inspired by**: Pi-Mono pi-rewind, Git-based snapshots per turn **What**: Automatic git-based snapshots (stored as refs or stash entries) after file-modifying tools. `/rewind` command with checkpoint browser, diff preview, safe restore, redo stack. **Why**: AI agents make mistakes. Clean rewind of file changes while preserving conversation state is essential for confidence in agent-assisted coding. @@ -166,6 +187,7 @@ Five parallel research agents analyzed the extension ecosystems of six major AI **Priority**: **MEDIUM** — strong UX improvement #### A4. MCP Server Mode (AGH as Capability Provider) + **Inspired by**: Hermes MCP server mode, OpenFang bidirectional MCP **What**: AGH exposes its Host API as MCP tools so external agents (Claude Code, Codex, Cursor) can use AGH's sessions, memory, skills, and observe as tools. **Why**: Makes AGH composable with other agent systems. Being only an MCP client limits AGH to consuming tools; being also a server makes it a building block. @@ -173,6 +195,7 @@ Five parallel research agents analyzed the extension ecosystems of six major AI **Priority**: **MEDIUM** — composability multiplier #### A5. Workflow Engine (DAG Orchestration) + **Inspired by**: OpenFang WorkflowEngine (5 step modes), GoClaw pipelines, broader ecosystem patterns **What**: DAG-based task orchestration across sessions. Steps are pure data (prompt template + mode + error handling). Five modes: Sequential, FanOut, Collect, Conditional, Loop. Variable interpolation connects steps. **Why**: Complex tasks need structured multi-step orchestration. The workflow engine adds no execution capability — it only orchestrates when and how existing sessions run. @@ -180,6 +203,7 @@ Five parallel research agents analyzed the extension ecosystems of six major AI **Priority**: **MEDIUM** — depends on session delegation (A1) #### A6. A2A Protocol Gateway + **Inspired by**: Google A2A protocol (v0.3, Linux Foundation, 150+ orgs) **What**: Accept/delegate tasks from external agents via A2A protocol. Publish Agent Cards at `/.well-known/agent.json`. Task lifecycle with SSE streaming. **Why**: A2A maps directly to AGH's Phase 3 (agent network protocol). Complementary to MCP (MCP = agent-to-tool, A2A = agent-to-agent). @@ -191,37 +215,42 @@ Five parallel research agents analyzed the extension ecosystems of six major AI ## Cross-Cutting Patterns ### Pattern 1: Hook Lifecycle Coverage + Every framework converges on these essential hook points: -| Hook Point | Claude Code | Hermes | Pi-Mono | OpenClaw | Priority for AGH | -|---|---|---|---|---|---| -| Pre-tool-call | PreToolUse | pre_tool_call | tool_call (block) | before_tool_call | **CRITICAL** | -| Post-tool-call | PostToolUse | post_tool_call | tool_result | after_tool_call | **HIGH** | -| Pre-LLM-call | UserPromptSubmit | pre_llm_call | context | before_prompt_build | **HIGH** | -| Post-LLM-call | — | post_llm_call | — | — | **MEDIUM** | -| Session start | SessionStart | on_session_start | session_start | session_start | **HIGH** | -| Session end | Stop | on_session_end | session_shutdown | session_end | **HIGH** | -| Pre-compaction | — | — | session_before_compact | before_compaction | **MEDIUM** | -| Post-compaction | PostCompact | — | — | after_compaction | **MEDIUM** | +| Hook Point | Claude Code | Hermes | Pi-Mono | OpenClaw | Priority for AGH | +| --------------- | ---------------- | ---------------- | ---------------------- | ------------------- | ---------------- | +| Pre-tool-call | PreToolUse | pre_tool_call | tool_call (block) | before_tool_call | **CRITICAL** | +| Post-tool-call | PostToolUse | post_tool_call | tool_result | after_tool_call | **HIGH** | +| Pre-LLM-call | UserPromptSubmit | pre_llm_call | context | before_prompt_build | **HIGH** | +| Post-LLM-call | — | post_llm_call | — | — | **MEDIUM** | +| Session start | SessionStart | on_session_start | session_start | session_start | **HIGH** | +| Session end | Stop | on_session_end | session_shutdown | session_end | **HIGH** | +| Pre-compaction | — | — | session_before_compact | before_compaction | **MEDIUM** | +| Post-compaction | PostCompact | — | — | after_compaction | **MEDIUM** | AGH's hook system should implement **at minimum** the 6 hooks marked CRITICAL/HIGH. ### Pattern 2: Three-Tier Skill System + All frameworks converge on three skill tiers: -| Tier | Description | Example | AGH Implementation | -|---|---|---|---| -| **Prompt-only** | Markdown instructions injected into context | SKILL.md with procedures | Already supported | -| **Subprocess** | Code executed via stdin/stdout JSON protocol | Python/Node/Shell scripts | Via extension subprocess | -| **Sandboxed** | Code in WASM sandbox with fuel metering | Untrusted community skills | Future (Extism/wazero) | +| Tier | Description | Example | AGH Implementation | +| --------------- | -------------------------------------------- | -------------------------- | ------------------------ | +| **Prompt-only** | Markdown instructions injected into context | SKILL.md with procedures | Already supported | +| **Subprocess** | Code executed via stdin/stdout JSON protocol | Python/Node/Shell scripts | Via extension subprocess | +| **Sandboxed** | Code in WASM sandbox with fuel metering | Untrusted community skills | Future (Extism/wazero) | ### Pattern 3: Progressive Disclosure for Token Budget + Every framework implements this: only inject skill name + description into the system prompt (~50 tokens per skill). Full skill content loads on-demand when the agent determines relevance. This enables unlimited skills without context bloat. ### Pattern 4: Security Scanning as Default + Hermes (65+ threat rules), GoClaw (GuardSkillContent), OpenFang (Ed25519 signed manifests), OpenClaw (VirusTotal scanning after ClawHavoc attack) — all gate community extensions through automated security scanning. AGH must build this in from the start. ### Pattern 5: MCP Tool Namespacing + Universal convention: `mcp__{server}__{tool}` (Claude Code uses `mcp__`, GoClaw uses `mcp_`). Prevents collisions when multiple servers expose the same tool name. AGH should adopt `mcp__{server}__{tool}`. --- @@ -230,45 +259,45 @@ Universal convention: `mcp__{server}__{tool}` (Claude Code uses `mcp__`, GoClaw ### Tier 1: Ship First (Critical for v1 extension ecosystem) -| # | Extension | Dimension | Why First | -|---|---|---|---| -| C1 | Permission Gate: Risk Classifier | Capability | Table stakes for production — every framework has this | -| C2 | Secret Redaction | Capability | Security baseline — run on every tool output | -| C4 | Context Pruning / Compaction | Capability | Essential for long sessions — #1 user pain point | -| C9 | Dynamic Context Injection | Capability | Enables memory injection, RAG, and skills | -| R1 | Agent Packages | Resource | Foundational for ecosystem — reduces setup friction | -| A1 | Session Delegation | Action | Core orchestration — enables complex workflows | +| # | Extension | Dimension | Why First | +| --- | -------------------------------- | ---------- | ------------------------------------------------------ | +| C1 | Permission Gate: Risk Classifier | Capability | Table stakes for production — every framework has this | +| C2 | Secret Redaction | Capability | Security baseline — run on every tool output | +| C4 | Context Pruning / Compaction | Capability | Essential for long sessions — #1 user pain point | +| C9 | Dynamic Context Injection | Capability | Enables memory injection, RAG, and skills | +| R1 | Agent Packages | Resource | Foundational for ecosystem — reduces setup friction | +| A1 | Session Delegation | Action | Core orchestration — enables complex workflows | ### Tier 2: Build Next (High ecosystem demand) -| # | Extension | Dimension | Why Next | -|---|---|---|---| -| C3 | Prompt Injection Scanner | Capability | Security for ecosystem growth | -| C5 | Tiered Memory Backend | Capability | Build on AGH's existing memory advantage | -| C8 | Cost Tracker | Capability | Top developer feature request | -| R2 | Cron/Scheduled Sessions | Resource | Makes agents proactive — present in all frameworks | -| A2 | Session Fork with Handoff | Action | Natural session model extension | -| A3 | Session Checkpointing | Action | Strong UX improvement for coding workflows | +| # | Extension | Dimension | Why Next | +| --- | ------------------------- | ---------- | -------------------------------------------------- | +| C3 | Prompt Injection Scanner | Capability | Security for ecosystem growth | +| C5 | Tiered Memory Backend | Capability | Build on AGH's existing memory advantage | +| C8 | Cost Tracker | Capability | Top developer feature request | +| R2 | Cron/Scheduled Sessions | Resource | Makes agents proactive — present in all frameworks | +| A2 | Session Fork with Handoff | Action | Natural session model extension | +| A3 | Session Checkpointing | Action | Strong UX improvement for coding workflows | ### Tier 3: Differentiate (Strategic value) -| # | Extension | Dimension | Why Strategic | -|---|---|---|---| -| C7 | OpenTelemetry Exporter | Capability | Production monitoring standard | -| C10 | Multi-Model Consultation | Capability | Unique UX differentiation | -| R4 | Skill Auto-Generation | Resource | Compounding institutional knowledge | -| A4 | MCP Server Mode | Action | Composability multiplier | -| A5 | Workflow Engine | Action | Structured multi-agent orchestration | +| # | Extension | Dimension | Why Strategic | +| --- | ------------------------ | ---------- | ------------------------------------ | +| C7 | OpenTelemetry Exporter | Capability | Production monitoring standard | +| C10 | Multi-Model Consultation | Capability | Unique UX differentiation | +| R4 | Skill Auto-Generation | Resource | Compounding institutional knowledge | +| A4 | MCP Server Mode | Action | Composability multiplier | +| A5 | Workflow Engine | Action | Structured multi-agent orchestration | ### Tier 4: Future (Phase 2-3) -| # | Extension | Dimension | Why Later | -|---|---|---|---| -| C6 | Graph Memory Backend | Capability | Valuable but can start vector-only | -| C11 | Remote Execution Backends | Capability | Important but niche initially | -| R3 | Webhook/Event Bridge | Resource | Depends on cron infrastructure | -| R5 | Channel Adapters | Resource | High effort, demand-dependent | -| A6 | A2A Protocol Gateway | Action | Phase 3 agent network protocol | +| # | Extension | Dimension | Why Later | +| --- | ------------------------- | ---------- | ---------------------------------- | +| C6 | Graph Memory Backend | Capability | Valuable but can start vector-only | +| C11 | Remote Execution Backends | Capability | Important but niche initially | +| R3 | Webhook/Event Bridge | Resource | Depends on cron infrastructure | +| R5 | Channel Adapters | Resource | High effort, demand-dependent | +| A6 | A2A Protocol Gateway | Action | Phase 3 agent network protocol | --- @@ -295,6 +324,7 @@ Universal convention: `mcp__{server}__{tool}` (Claude Code uses `mcp__`, GoClaw ## Sources Detailed per-project research files: + - [analysis_pi_mono.md](research/analysis_pi_mono.md) — 30+ extensions, lifecycle hooks, skill packages - [analysis_hermes.md](research/analysis_hermes.md) — 80+ community extensions, 8 memory backends, plugin system - [analysis_openclaw.md](research/analysis_openclaw.md) — 25+ channels, ClawHub marketplace, memory tiers @@ -302,4 +332,5 @@ Detailed per-project research files: - [analysis_ecosystem.md](research/analysis_ecosystem.md) — OpenFang, A2A, MCP ecosystem, developer requests Previous architectural analyses (from extension architecture task): + - `.compozy/tasks/_archived/20260411-014454-ext-architecture/analysis_*.md` diff --git a/docs/ideas/ext-ideas/research/analysis_claude_code.md b/docs/ideas/ext-ideas/research/analysis_claude_code.md new file mode 100644 index 000000000..21b1e37da --- /dev/null +++ b/docs/ideas/ext-ideas/research/analysis_claude_code.md @@ -0,0 +1,308 @@ +# Claude Code Extensibility Ecosystem -- Research Analysis for AGH + +**Date:** 2026-04-11 +**Scope:** MCP servers, hooks, skills/commands, plugins, Agent SDK patterns, CLAUDE.md conventions, workflow automations +**Purpose:** Identify concrete extension ideas adaptable to AGH's three-dimensional extension model (Resources, Capabilities, Actions) + +--- + +## Overview of Findings + +Claude Code has evolved from a standalone CLI agent into a full extensible platform with a maturing plugin marketplace (101 official plugins, thousands of community skills). The ecosystem is organized around five extension axes: + +1. **MCP Servers** -- external tool connections via the Model Context Protocol (3,000+ integrations, 251+ vendor-verified) +2. **Hooks** -- lifecycle event callbacks (12 events: PreToolUse, PostToolUse, UserPromptSubmit, Stop, SessionStart, Notification, Setup, Elicitation, ElicitationResult, PostCompact, PermissionDenied, PostToolUseFailure) +3. **Skills & Commands** -- reusable slash-command instructions (`.claude/skills/*/SKILL.md` or `.claude/commands/*.md`) +4. **Plugins** -- bundled packages of skills + hooks + MCP servers + commands (official marketplace with `claude-plugins-official` repo) +5. **Agent SDK** -- programmatic agent building in Python/TypeScript with subagent orchestration, hooks, and tool control + +The most impactful patterns for AGH are: hook-based policy enforcement, MCP-driven tool federation, skill-as-instruction files, multi-agent delegation, and classifier-based permission gating. + +--- + +## Extension Catalog + +### MCP Servers (Resources: MCP) + +| Name | Category | Description | AGH Mapping | +| --------------------------- | ------------- | ------------------------------------------------------------------------- | ------------------------------------------------------------------------------------- | +| **GitHub MCP** | DevOps | Full repo management: PRs, issues, code search, branches, commits via API | Resource: MCP server; Capability: agent.driver integration for PR-driven workflows | +| **Filesystem MCP** | Core | Read/write/organize local files with configurable access boundaries | Resource: MCP server (AGH already has file access; useful as a sandboxed alternative) | +| **PostgreSQL MCP** | Database | Natural language database queries and schema inspection | Resource: MCP server; Capability: memory.backend alternative | +| **Playwright MCP** | Testing | Browser automation, E2E testing, screenshot capture, UI interaction | Resource: MCP server; Action: session-level test execution | +| **Memory MCP** | Persistence | Persistent knowledge graph across sessions | Capability: memory.backend; maps directly to AGH's memory layer | +| **Notion MCP** | Productivity | Read/write Notion pages, databases, blocks | Resource: MCP server for knowledge management | +| **Figma MCP** | Design | Read Figma frames/components, design-to-code pipeline | Resource: MCP server; Capability: prompt.provider (design context) | +| **Brave Search MCP** | Research | Privacy-first web search with source citation | Resource: MCP server; Action: observe queries | +| **Supabase MCP** | Backend | Database, auth, edge functions, storage integration | Resource: MCP server | +| **Sequential Thinking MCP** | Reasoning | Enhanced problem-solving via structured thinking steps | Capability: message.transform (reasoning augmentation) | +| **Sentry MCP** | Monitoring | Real-time error fetching, debugging, report creation | Resource: MCP server; Capability: observe.exporter | +| **Linear MCP** | Project Mgmt | Create/update/query issues, sprint management | Resource: MCP server | +| **Slack MCP** | Communication | Send messages, search history, manage channels | Resource: MCP server; Action: notification fan-out | +| **Jira MCP** | Project Mgmt | JQL search, status transitions, comments, ticket creation | Resource: MCP server | +| **Neon MCP** | Database | Serverless Postgres with branching, migrations, query tuning | Resource: MCP server; Capability: memory.backend | + +### Hooks (Capabilities: permission.gate, content.validate, message.transform) + +| Hook / Pattern | Category | Description | AGH Mapping | +| --------------------------------- | ------------- | ---------------------------------------------------------------------------------------------- | ------------------------------------------ | +| **Dangerous command blocker** | Security | PreToolUse on Bash: block `rm -rf`, `DROP TABLE`, force-push commands | Capability: permission.gate | +| **Sensitive file protector** | Security | PreToolUse on Edit/Write: block changes to `.env`, `package-lock.json`, `.git/` | Capability: permission.gate | +| **Auto-formatter** | Quality | PostToolUse on Edit: run Prettier/Black/gofmt after every file edit | Capability: content.validate (post-action) | +| **Auto-test runner** | Quality | PostToolUse on Edit: run test suite on modified files for instant regression feedback | Capability: content.validate | +| **Auto-commit agent work** | DevOps | PostToolUse on Edit: create micro-commits to track agent changes | Action: session event recording | +| **Prompt logger** | Observability | UserPromptSubmit: log every prompt with timestamp to audit file | Capability: observe.exporter | +| **Context injector** | Augmentation | UserPromptSubmit: inject project context, environment info, or relevant docs before processing | Capability: prompt.provider | +| **Tool input modifier** | Transform | PreToolUse (v2.0.10+): transparently modify tool inputs (add dry-run flags, redact secrets) | Capability: message.transform | +| **Permission classifier** | Gating | Transcript classifier (Sonnet 4.6) evaluates tool calls against natural-language rules | Capability: permission.gate (AI-based) | +| **PermissionDenied retry** | Recovery | PermissionDenied hook: retry with modified parameters or defer decision | Capability: permission.gate | +| **PostCompact context preserver** | Memory | PostCompact: ensure critical context survives summarization | Capability: memory.backend | +| **Setup initialization** | Lifecycle | Setup hook: run maintenance scripts, environment checks on session init | Action: session lifecycle | + +### Skills & Commands (Resources: skills) + +| Skill / Command | Category | Description | AGH Mapping | +| ------------------------------- | ----------- | --------------------------------------------------------------------------------------------- | -------------------------------------------------- | +| **Frontend Design (Anthropic)** | UI | Design system + philosophy injection; bold aesthetics, typography, animations (277K installs) | Resource: skill; Capability: prompt.provider | +| **Taste** | UI | Collection improving AI frontend code quality (6.9K stars) | Resource: skill | +| **Apple HIG Designer** | UI | Interfaces following Apple Human Interface Guidelines | Resource: skill; Capability: prompt.provider | +| **Shannon (AI Pen Testing)** | Security | Autonomous pen testing, 96% exploit success rate, 50+ vulnerability types | Resource: skill; Capability: content.validate | +| **VibeSec** | Security | Secure code patterns and vulnerability prevention (496 stars) | Resource: skill | +| **Skill-Threat-Modeling** | Security | STRIDE threat modeling and security review workflows | Resource: skill | +| **Code Review** | Quality | Structured review: security, performance, style violations | Resource: skill; Capability: content.validate | +| **Test Planner/Executor** | Testing | Risk-based test scenario creation (E2E, integration, unit) + execution | Resource: skill | +| **Commit Helper** | DevOps | Conventional commits, co-author tags, force-push prevention | Resource: skill; hook integration | +| **Project Bootstrap** | Scaffolding | New project scaffolding with preferred stack, linting, CI config | Resource: skill | +| **cc-devops-skills** | DevOps | Comprehensive DevOps skill set: deploy, infrastructure, monitoring | Resource: skill | +| **Ship command** | Workflow | Review diff, run tests, commit, push -- all in one `/ship` | Resource: skill (compound workflow) | +| **Valyu (Research)** | Research | Web search + 36 specialized data sources (SEC, PubMed, FRED, etc.) | Resource: MCP + skill; Capability: prompt.provider | + +### Plugins (Resources: bundled packages) + +| Plugin | Category | Description | AGH Mapping | +| --------------------------------- | -------- | ------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------- | +| **Security Guidance (Anthropic)** | Security | Scans file edits for vulnerabilities before execution; blocks + explains | Resource: hook + skill bundle; Capability: content.validate, permission.gate | +| **Local-Review** | Quality | 5 parallel review agents, scores issues, only flags 80+ severity | Resource: agent orchestration; Action: multi-session coordination | +| **Superpowers** | Workflow | Structured lifecycle planning + skills for brainstorming, TDD, debugging, review | Resource: skill bundle | +| **Shipyard** | DevOps | Lifecycle mgmt + IaC validation (Terraform, Ansible, Docker, K8s, CloudFormation) + auditor agent | Resource: skill + agent bundle | +| **Claude-Mem** | Memory | Action capture, compression, context injection via SQLite + Chroma vector search | Capability: memory.backend; Resource: MCP | +| **Ralph Wiggum** | Testing | Visual testing by driving Xcode simulator for Swift apps | Resource: skill + MCP | +| **Figma Plugin** | Design | Read Figma files, generate code from frames/components | Resource: MCP + skill bundle | +| **Language Servers (12)** | IDE | Real-time code intelligence for specific programming languages | Capability: prompt.provider (context enrichment) | +| **Feature-dev (Anthropic)** | Workflow | Guided feature development workflow | Resource: skill | +| **Commit-commands (Anthropic)** | DevOps | Standardized commit workflows | Resource: skill | + +### Agent SDK Patterns (Actions: Host API) + +| Pattern | Category | Description | AGH Mapping | +| ------------------------------ | ------------- | ---------------------------------------------------------------------------------------- | -------------------------------------------------------------------- | +| **Subagent delegation** | Orchestration | Spawn specialized child agents with own context window and tool access | Action: session spawning via Host API | +| **Explore-Plan-Act** | Workflow | Sequential three-phase loop with escalating permissions | Action: session state machine (maps to AGH session lifecycle) | +| **Operator/Orchestrator** | Coordination | Central agent decomposes tasks, delegates to specialized sub-agents, synthesizes results | Action: multi-session coordination via Host API | +| **Split-and-Merge** | Parallelism | Multiple agents in isolated git worktrees working in parallel, merge results | Action: parallel session management | +| **Custom agents via Markdown** | Configuration | `.claude/agents/*.md` with YAML frontmatter defining name, tools, model, system prompt | Resource: agent definition (maps directly to AGH agent config) | +| **Research pipeline** | Workflow | Explore subagents gather info, then act on aggregated results | Action: session chaining | +| **Tool allowlist/blocklist** | Security | `allowed_tools` / `disallowed_tools` for fine-grained tool access per agent | Capability: permission.gate | +| **Context compaction** | Memory | Auto-summarize when context limit approaches, preserve critical info | Capability: memory.backend; Action: observe (context health metrics) | + +### CLAUDE.md / Configuration Patterns (Resources: skills; Capability: prompt.provider) + +| Pattern | Category | Description | AGH Mapping | +| ----------------------------- | ---------- | --------------------------------------------------------------------------------------- | ------------------------------------------------------------ | +| **Hierarchical config files** | Config | Root CLAUDE.md + subdirectory overrides; auto-loaded based on working context | Resource: skill loading by workspace path | +| **Path-scoped rules** | Config | YAML frontmatter restricts rule activation to matching directories | Capability: prompt.provider (context-aware) | +| **Compaction instructions** | Memory | "When compacting, always preserve X" directives in CLAUDE.md | Capability: memory.backend (consolidation rules) | +| **Auto-memory (MEMORY.md)** | Memory | Agent auto-detects patterns and writes own notes (v2.1.32+) | Capability: memory.backend (maps to AGH dream/consolidation) | +| **Hook-enforced rules** | Governance | Critical rules as hooks (100% enforcement) vs. CLAUDE.md instructions (~70% compliance) | Capability: permission.gate vs. prompt.provider | +| **Custom command files** | Workflow | `.claude/commands/*.md` becoming slash commands with shell execution | Resource: skill with action execution | + +--- + +## Detailed Analysis of High-Impact Extensions + +### 1. Hook-Based Policy Enforcement (PreToolUse) + +**What it does:** Intercepts every tool call before execution. Inspects the tool name, arguments, and context. Can approve, deny (exit code 2), or modify the call. The most powerful control mechanism in Claude Code. + +**Why it matters:** CLAUDE.md instructions achieve ~70% compliance. Hooks achieve 100%. For security-critical rules (no force push, no production data deletion, no secrets in commits), this gap is unacceptable. + +**AGH mapping:** This maps directly to AGH's `permission.gate` capability. AGH should implement a PreToolUse hook system where: + +- Hooks are registered per-agent or per-workspace +- Each hook receives the tool call as structured input (tool name, arguments, session context) +- Hooks return allow/deny/modify decisions +- Hooks can be shell scripts, Go plugins, or HTTP endpoints +- Multiple hooks chain with configurable precedence + +**Key insight from Claude Code:** The three-tier handler system (Command hooks for simple checks, Prompt hooks for semantic evaluation, Agent hooks for deep analysis) is a powerful graduated model. AGH could adopt this with shell-based hooks for speed and agent-based hooks for complex policy decisions. + +### 2. MCP Server Federation + +**What it does:** Connects Claude Code to 3,000+ external tools via a standardized protocol. Each MCP server is a subprocess exposing tools, resources, and prompts over JSON-RPC. Claude Code discovers tools on-demand via Tool Search (lazy loading), reducing context consumption by ~95%. + +**Why it matters:** No single agent can have all tools built in. MCP makes the tool surface area effectively infinite while keeping the runtime lean. + +**AGH mapping:** AGH already supports MCP as a resource type. Key lessons from Claude Code's implementation: + +- **Lazy tool discovery** is essential at scale (10+ servers). AGH should implement tool search / on-demand schema loading rather than dumping all tool definitions into agent context. +- **Three scope levels** (user/local/project) map to AGH's global/workspace scoping. Add a `.mcp.json` project-level config for team-shared MCP servers. +- **Skills + MCP composition**: Claude Code skills can orchestrate MCP tools into workflows. AGH's skill system should support MCP tool references in skill definitions. + +### 3. Skills as Instruction Files + +**What it does:** A `SKILL.md` file with YAML frontmatter (name, description, trigger conditions) + markdown body (instructions Claude follows). No compilation, no build step. Skills load on-demand via slash commands or auto-detection based on task context. + +**Why it matters:** This is the lowest-friction extension mechanism. Anyone who can write markdown can create a skill. It democratizes agent customization. + +**AGH mapping:** AGH's skill system should adopt this pattern: + +- Skills are markdown files with frontmatter metadata +- Stored in `~/.agh/skills/` (global) or `.agh/skills/` (workspace) +- Auto-discovered and lazy-loaded based on task context +- Can reference other skills, MCP tools, and hooks +- Budget-capped (1% of context window, ~8K chars fallback) to prevent context bloat +- Keep skills under 500 words / 2K tokens for optimal performance + +### 4. Multi-Agent Orchestration (Subagents) + +**What it does:** The operator pattern decomposes complex tasks and delegates to specialized sub-agents, each with their own context window, tool access, and instructions. Sub-agents can run in parallel in isolated git worktrees. + +**Why it matters:** Single-agent context windows are finite. Complex tasks (refactor + test + review + deploy) benefit from specialized agents that don't pollute each other's context. + +**AGH mapping:** This is core to AGH's architecture. Key patterns to adopt: + +- **Custom agents via markdown** (`.claude/agents/*.md`): AGH already has agent definitions in TOML config. Extend to support workspace-level agent definitions in markdown for quick customization. +- **Split-and-merge in worktrees**: AGH should support spawning sessions in isolated worktrees with automatic branch management and merge coordination. +- **Explore-Plan-Act lifecycle**: Map to AGH's session state machine. Three phases with escalating tool permissions. + +### 5. Classifier-Based Permission Gating + +**What it does:** A fast AI classifier (running on a smaller model) evaluates each tool call against natural-language rules before execution. Two-stage: fast single-token filter, then chain-of-thought only if flagged. Rules are written in prose, not regex. + +**Why it matters:** Traditional permission systems use regex or glob patterns. Prose rules ("don't modify infrastructure files unless the user explicitly asked for infrastructure changes") capture intent that patterns cannot. + +**AGH mapping:** This is a sophisticated `permission.gate` capability: + +- Use a smaller/faster model as a classifier for tool call evaluation +- Rules defined in natural language in config +- Two-stage evaluation for performance (fast filter + deep reasoning) +- Configurable per-agent and per-workspace +- Precedence: deny rules > allow exceptions > explicit user intent + +### 6. Plugin Marketplace Model + +**What it does:** Plugins bundle skills + hooks + MCP servers + commands into installable packages. Official marketplace (`claude-plugins-official`) with 101 plugins, plus community marketplaces. Install via `/plugin install name@registry`. + +**Why it matters:** Individual skills and hooks are useful but fragmented. Plugins provide complete, tested workflows. The marketplace model enables distribution and discovery. + +**AGH mapping:** AGH should plan for a plugin/extension registry: + +- Extensions bundle: agent definitions, skills, hooks, MCP server configs +- Registry format: Git repos with standardized manifest files +- Install via CLI: `agh plugin install name@registry` +- Scope control: user-level vs. workspace-level installation +- Enterprise: managed registries with approval workflows + +### 7. Auto-Memory and Dream Consolidation + +**What it does:** Claude Code v2.1.32 auto-generates MEMORY.md by observing user patterns, preferences, and project conventions. This is separate from CLAUDE.md (human-written project docs). The Claude-Mem plugin adds SQLite + Chroma vector search for hybrid memory retrieval. + +**Why it matters:** Memory that builds itself from observation is more complete and current than manually maintained docs. Vector search enables semantic retrieval of relevant context. + +**AGH mapping:** This maps directly to AGH's memory and dream consolidation layers: + +- Auto-memory: AGH's observe layer already captures events. The consolidation/dream system should synthesize these into persistent memory entries. +- Dual-scope: global memory (user preferences) + workspace memory (project conventions) -- AGH already has this. +- Hybrid retrieval: keyword + vector search over consolidated memories. +- Compaction rules: configurable instructions for what to preserve during context compaction. + +--- + +## Key Takeaways for AGH Extension Ideas + +### High-Priority Extensions to Build + +1. **Hook pipeline with PreToolUse/PostToolUse** -- The single highest-impact extension mechanism. Three handler tiers (command/prompt/agent) provide graduated complexity. Essential for permission.gate and content.validate capabilities. + +2. **Lazy MCP tool discovery** -- As AGH connects to more MCP servers, eager tool loading will bloat agent context. Implement on-demand tool search and schema fetching. + +3. **Skill files with auto-discovery** -- Markdown-based skill definitions with YAML frontmatter. Lowest friction for users. Budget-capped context injection. + +4. **Permission classifier** -- AI-based tool call evaluation using natural-language rules. More expressive than regex patterns. Essential for autonomous agent operation. + +5. **Plugin bundling format** -- Define a standard for packaging skills + hooks + MCP configs + agent definitions as installable extensions. + +### Medium-Priority Extensions + +6. **Subagent orchestration with worktree isolation** -- Spawn parallel agents in isolated git worktrees. Operator pattern for complex multi-phase tasks. + +7. **Auto-memory from observation** -- Agent-generated memory entries from event stream analysis, distinct from human-configured project docs. + +8. **Hierarchical config with path scoping** -- Config files that activate only when the agent works in matching directories. + +9. **PostCompact hooks** -- Ensure critical context survives memory consolidation. + +10. **CI/CD integration actions** -- GitHub Actions / GitLab CI integration for automated code review, security audit, release notes. + +### Design Principles Learned + +- **Deterministic enforcement via hooks, not instructions.** Instructions are probabilistic (~70%). Hooks are deterministic (100%). Use hooks for must-enforce rules, instructions for should-follow guidance. +- **Lazy loading is essential at scale.** Claude Code's Tool Search pattern (95% context reduction) is critical when connecting 10+ MCP servers. +- **Prose rules beat regex for intent.** Permission rules written as natural language capture nuance that glob patterns cannot. +- **Skills should be small.** Under 500 words / 2K tokens. Focused on one workflow. Include examples for better accuracy. +- **Three scope levels** (user/workspace/project-shared) cover all organizational needs. +- **Plugins are the distribution unit.** Individual skills/hooks are building blocks; plugins are the installable product. + +### AGH Extension Model Mapping Summary + +| Claude Code Concept | AGH Dimension | AGH Component | +| -------------------------- | ------------- | ---------------------------------------- | +| MCP Server | Resource | MCP (already supported) | +| Skill / Command | Resource | Skills (already supported) | +| Hook (PreToolUse) | Capability | permission.gate, content.validate | +| Hook (PostToolUse) | Capability | content.validate, observe.exporter | +| Hook (UserPromptSubmit) | Capability | prompt.provider, message.transform | +| Hook (tool input modifier) | Capability | message.transform | +| Permission classifier | Capability | permission.gate (AI-based) | +| Auto-memory / MEMORY.md | Capability | memory.backend (dream consolidation) | +| Agent definition (.md) | Resource | Agent (extend TOML config with markdown) | +| Plugin bundle | Resource | New: composite extension package | +| Subagent delegation | Action | Host API: session spawning | +| Operator pattern | Action | Host API: multi-session coordination | +| Split-and-merge | Action | Host API: parallel session management | +| Context compaction | Action | Observe: context health metrics + memory | +| Tool Search | Action | Host API: lazy MCP tool discovery | + +--- + +## Sources + +- [Hooks reference - Claude Code Docs](https://code.claude.com/docs/en/hooks) +- [Extend Claude with skills - Claude Code Docs](https://code.claude.com/docs/en/skills) +- [Connect Claude Code to tools via MCP - Claude Code Docs](https://code.claude.com/docs/en/mcp) +- [Configure permissions - Claude Code Docs](https://code.claude.com/docs/en/permissions) +- [Agent SDK overview - Claude Code Docs](https://code.claude.com/docs/en/agent-sdk/overview) +- [Best Practices for Claude Code - Claude Code Docs](https://code.claude.com/docs/en/best-practices) +- [Discover and install prebuilt plugins through marketplaces - Claude Code Docs](https://code.claude.com/docs/en/discover-plugins) +- [Claude Code auto mode - Anthropic](https://www.anthropic.com/engineering/claude-code-auto-mode) +- [Using CLAUDE.MD files - Claude Blog](https://claude.com/blog/using-claude-md-files) +- [Building agents with the Claude Agent SDK - Claude Blog](https://claude.com/blog/building-agents-with-the-claude-agent-sdk) +- [awesome-claude-code - GitHub (hesreallyhim)](https://github.com/hesreallyhim/awesome-claude-code) +- [awesome-mcp-servers - GitHub (wong2)](https://github.com/wong2/awesome-mcp-servers) +- [awesome-claude-code-toolkit - GitHub (rohitg00)](https://github.com/rohitg00/awesome-claude-code-toolkit) +- [awesome-agent-skills - GitHub (VoltAgent)](https://github.com/VoltAgent/awesome-agent-skills) +- [claude-plugins-official - GitHub (anthropics)](https://github.com/anthropics/claude-plugins-official) +- [claude-code-hooks-mastery - GitHub (disler)](https://github.com/disler/claude-code-hooks-mastery) +- [Claude Code Hooks Reference: All 12 Events - Pixelmojo](https://www.pixelmojo.io/blogs/claude-code-hooks-production-quality-ci-cd-patterns) +- [Claude Code hooks: A practical guide - eesel AI](https://www.eesel.ai/blog/hooks-in-claude-code) +- [Claude Code Hooks: A Practical Guide - DataCamp](https://www.datacamp.com/tutorial/claude-code-hooks) +- [Claude Code Hook Examples - Steve Kinney](https://stevekinney.com/courses/ai-development/claude-code-hook-examples) +- [CLAUDE.md best practices - DEV Community](https://dev.to/cleverhoods/claudemd-best-practices-from-basic-to-adaptive-9lm) +- [Claude Code Skills vs MCP Servers - DEV Community](https://dev.to/williamwangai/claude-code-skills-vs-mcp-servers-what-to-use-how-to-install-and-the-best-ones-in-2026-548k) +- [Best Claude Code Skills & Plugins 2026 - DEV Community](https://dev.to/raxxostudios/best-claude-code-skills-plugins-2026-guide-4ak4) +- [10 Must-Have Skills for Claude 2026 - Medium](https://medium.com/@unicodeveloper/10-must-have-skills-for-claude-and-any-coding-agent-in-2026-b5451b013051) +- [Claude Code 2.0.13 Plugin Marketplace - Medium](https://alirezarezvani.medium.com/claude-code-2-0-13-be2c0a723856) +- [The Complete Guide to Building Agents with the Claude Agent SDK - Nader Dabit](https://nader.substack.com/p/the-complete-guide-to-building-agents) +- [Top 10 MCP Servers for Claude Code - Apidog](https://apidog.com/blog/top-10-mcp-servers-for-claude-code/) +- [10 Must-Have MCP Servers for Claude Code - Medium](https://roobia.medium.com/the-10-must-have-mcp-servers-for-claude-code-2025-developer-edition-43dc3c15c887) +- [Piebald-AI/claude-code-system-prompts - GitHub](https://github.com/Piebald-AI/claude-code-system-prompts) diff --git a/docs/ideas/ext-ideas/research/analysis_ecosystem.md b/docs/ideas/ext-ideas/research/analysis_ecosystem.md new file mode 100644 index 000000000..da5267dde --- /dev/null +++ b/docs/ideas/ext-ideas/research/analysis_ecosystem.md @@ -0,0 +1,475 @@ +# AI Agent Extension Ecosystem Research + +## Overview + +This document captures research into the AI agent extension ecosystem as of April 2026, with a focus on concrete extension ideas that could be adapted for AGH's three-dimensional extension model (Resources, Capabilities, Actions). The research covers OpenFang (a Rust-based agent OS), the MCP server ecosystem, extension patterns from major AI coding tools, emerging protocols (A2A), agent memory systems, workflow orchestration, permission/sandbox patterns, and developer feature requests. + +--- + +## 1. OpenFang: The Closest Comparable System + +OpenFang is an open-source Agent Operating System built in Rust -- the closest architectural analog to AGH. It compiles to a single ~32MB binary (137K LOC, 14 Rust crates) and runs agents as background daemons. + +### 1.1 Built-in Tools (53 tools in openfang-runtime) + +OpenFang ships 53 tools in its `openfang-runtime` crate, spanning several categories: + +| Category | Tools | Description | +| -------------- | --------------------------------------------- | -------------------------------------------------------------------------------- | +| Web | web_search, browser_automation, web_fetch | Search engines, headless browser control, URL fetching | +| File | file_read, file_write, file_list, file_delete | Workspace-confined file operations with path traversal prevention | +| Code/Process | process_start, code_execute | Subprocess spawning with allowlist validation, env-clearing, timeout enforcement | +| Media | image_generation, tts (text-to-speech) | Image creation via AI models, voice synthesis | +| Data | knowledge_graph, data_analyze | Graph-based knowledge storage, structured data analysis | +| Infrastructure | docker_run, docker_build | Container management for isolated execution | +| Communication | email_send, notification_push | Outbound messaging capabilities | + +All tool code runs inside a WASM sandbox with dual metering (fuel + epoch interruption). File operations are workspace-confined. Subprocesses are env-cleared and timeout-enforced. + +### 1.2 Hands System (7 Bundled Agent Packages) + +"Hands" are OpenFang's core innovation -- self-contained autonomous capability packages that combine configuration, expert knowledge, operational procedures, and tool access into a single deployable unit. + +Each Hand bundles: + +- `HAND.toml` manifest +- System prompt with multi-phase operational playbook +- `SKILL.md` expert knowledge +- Configurable settings +- Dashboard metrics + +| Hand | Domain | What It Does | +| ---------- | ------------- | ---------------------------------------------------------------------------------------- | +| Clip | Content | Transforms long-form video into short clips with captions, thumbnails, voice-overs | +| Lead | Sales | Discovers, enriches, scores, deduplicates qualified leads on schedule with ICP profiling | +| Collector | Intelligence | Monitors targets and gathers competitive intelligence | +| Predictor | Forecasting | Makes predictions with Brier score tracking for calibration | +| Researcher | Productivity | Cross-references sources, fact-checks (CRAAP evaluation), generates cited reports | +| Twitter | Communication | Manages X/Twitter accounts autonomously | +| Browser | Automation | Web automation for scraping, form-filling, and interaction | + +**AGH mapping**: Hands map directly to AGH's Resources (agents + skills bundled together). AGH could implement a similar concept where an "agent package" bundles an agent definition, skills, hooks, and MCP servers into a single deployable unit. + +### 1.3 Channel Adapters (40 Adapters) + +OpenFang connects to 40 messaging platforms: Telegram, Discord, Slack, WhatsApp, Signal, Matrix, Teams, Google Chat, Feishu, DingTalk, Mastodon, Bluesky, LinkedIn, Reddit, IRC, WebChat, and 24+ more. + +Each adapter supports per-channel model overrides, DM/group policies, rate limiting, and output formatting. New adapters implement the `ChannelAdapter` trait. + +**AGH mapping**: These map to AGH's Capabilities dimension. A `channel.adapter` capability type would allow AGH to expose agent sessions across messaging platforms. The adapter pattern (trait/interface implementation) aligns with AGH's interface-based extension model. + +### 1.4 Skills System (60 Bundled Skills) + +OpenFang ships 60 bundled skills compiled into the binary, using the `SKILL.md` format (YAML frontmatter + Markdown body). Categories include CI/CD, Ansible, Prometheus, Nginx, Kubernetes, Terraform, Helm, Docker, sysadmin, shell-scripting, Linux networking. + +Three skill types exist: + +1. **Prompt-only skills (SKILL.md)** -- inject expert domain knowledge into system prompt +2. **Python skills** -- run as subprocesses, communicate via JSON over stdin/stdout +3. **Rust/WASM skills** -- compiled to WASM, run in sandboxed environment with fuel metering + +Each skill has a `skill.toml` manifest with metadata, runtime config, tool declarations, and capability requirements. + +**AGH mapping**: AGH already has a skills system. OpenFang's `skill.toml` manifest pattern (declaring required capabilities like `NetConnect`) is worth adopting. The three-tier skill type system (prompt-only, subprocess, WASM) is a good model for AGH's skill extensibility. + +--- + +## 2. MCP Server Ecosystem + +As of March 2026, there are 5,000+ community MCP servers, with 440 curated in the best-of-mcp-servers list (930K total GitHub stars across 34 categories). + +### 2.1 Most Popular MCP Servers by Category + +| Category | Server | Stars/Installs | What It Does | AGH Mapping | +| ----------------- | ------------------- | ------------------------------------ | --------------------------------------------------------------- | ----------------------------------------------- | +| **Documentation** | Context7 | 11K views, 690 installs (FastMCP #1) | Injects fresh, version-specific docs into prompts | Resource (MCP) + Capability (prompt.provider) | +| **Browser** | Playwright MCP | 30K stars, ~6K views | Structured browser automation via accessibility snapshots | Resource (MCP) + Capability (agent.driver tool) | +| **Git/GitHub** | GitHub MCP | Most-starred MCP server | PR management, issue triaging, code review automation | Resource (MCP) | +| **Database** | PostgreSQL MCP | High adoption | Natural language to SQL, schema introspection | Resource (MCP) | +| **Database** | Supabase MCP | Growing | Postgres + edge functions + schema management | Resource (MCP) | +| **Filesystem** | Filesystem MCP | Official reference | Secure file read/write/search within allowed directories | Resource (MCP) | +| **Memory** | Memory MCP | Official reference | Persistent knowledge graph across sessions | Resource (MCP) + Capability (memory.backend) | +| **Reasoning** | Sequential Thinking | Popular | Structured step-by-step reasoning | Capability (message.transform) | +| **Search** | Firecrawl MCP | Growing | Web scraping with JS rendering, anti-bot, clean markdown output | Resource (MCP) | +| **Cloud** | AWS MCP | 8.7K stars | Integration with AWS services and resources | Resource (MCP) | +| **Automation** | Zapier MCP | Growing | Connects to thousands of apps via Zapier workflows | Resource (MCP) | +| **Automation** | Pipedream MCP | Growing | 2,500 APIs, 8,000+ prebuilt tools | Resource (MCP) | +| **Cloud** | Cloudflare MCP | Growing | Workers/KV/R2/D1 management | Resource (MCP) | +| **Data** | MindsDB MCP | 39K stars | Unified data platform across databases | Resource (MCP) | +| **Search/RAG** | Pinecone MCP | Growing | Vector similarity search for RAG | Resource (MCP) + Capability (memory.backend) | + +### 2.2 MCP Apps (January 2026) + +Anthropic launched MCP Apps -- interactive UIs that render dashboards, forms, and charts directly inside Claude. Launch partners: Amplitude, Asana, Box, Clay, Hex, Salesforce. + +**AGH mapping**: AGH could support MCP Apps as a UI extension point, where MCP servers can provide rendered components in the web UI. + +### 2.3 Recommended Starting Stack for Developers + +1. Context7 (documentation injection) +2. Playwright (browser automation) +3. GitHub (PR/issue management) +4. PostgreSQL or Supabase (database) +5. Memory (persistent knowledge graph) + +**AGH mapping**: AGH should ship with built-in MCP server support and potentially bundle or recommend these servers as defaults for developer-focused use cases. + +--- + +## 3. AI Coding Agent Extension Patterns + +### 3.1 Extension Architectures Across Tools + +| Tool | Extension Mechanism | Key Pattern | AGH Relevance | +| ---------------- | ----------------------------------------------- | ----------------------------------------------------------------------------------------- | ------------------------------------------------------------------------ | +| **Claude Code** | Skills (SKILL.md), hooks, custom slash commands | Markdown-based skills with YAML frontmatter; 12 lifecycle hook events; project/user scope | Directly applicable -- AGH already uses similar patterns | +| **Cursor** | Rules files, MCP, Composer | `.cursor/rules` for project context; multi-agent Composer (8 parallel agents) | Rules files map to AGH config; parallel agents map to session management | +| **Cline** | MCP Marketplace, subagents, CLI 2.0 | Client-side architecture; 5M+ installs; dedicated MCP marketplace for discovery | MCP marketplace concept; subagent spawning | +| **Roo Code** | Custom Modes, Mode Gallery | Specialized AI personas with scoped tool permissions per mode | Maps to AGH agent definitions with capability restrictions | +| **Continue.dev** | `.continuerc.json`, local indexing, @docs | Lancet protocol for local vector indexing; semantic codebase search | Maps to AGH memory.backend + prompt.provider | +| **Aider** | Git-native, BYOM | Terminal-first; git-aware diffs; repository map | Maps to AGH's CLI-first approach | +| **Goose** | MCP extensions, recipes, custom distributions | YAML recipe workflows; Extension Manager UI; custom distros | Recipes map to AGH skills; distros map to agent configs | +| **Windsurf** | Cascade, rules | Cascade learns project patterns; greenfield-optimized | Pattern learning maps to AGH memory system | + +### 3.2 Claude Code's Extension Model (Most Relevant to AGH) + +Claude Code's model is the most directly relevant since AGH manages Claude Code as a subprocess: + +**Skills**: Markdown files in `.claude/skills//SKILL.md` with supporting scripts, templates, examples. Two invocation controls: + +- `disable-model-invocation: true` -- only human can invoke (for side-effect workflows) +- `user-invocable: false` -- only model can invoke (background knowledge) + +**Hooks (12 lifecycle events)**: + +- PreToolUse, PostToolUse, PostToolUseFailure +- SessionStart, SessionEnd, Stop +- SubagentStart, SubagentStop +- UserPromptSubmit, Notification +- PreCompact, PermissionRequest + +"Hooks guarantee behavior; prompts suggest it." This is a critical design principle. + +**AGH mapping**: AGH's hooks system should mirror these 12 events. The separation between "hooks guarantee" and "prompts suggest" maps perfectly to AGH's distinction between deterministic hooks and AI-driven skills. + +### 3.3 Goose's Extension Model + +Goose (29K+ stars, Apache 2.0, now under Linux Foundation's AAIF) provides: + +- **MCP-native extensions**: Any MCP server becomes a Goose extension automatically +- **Recipes**: Reusable YAML workflow definitions packaging goals, required extensions, structured inputs, and sub-recipes +- **Custom Distributions**: Preconfigured provider + extension + branding bundles +- **Extension Manager UI**: Desktop app for browsing, toggling, configuring extensions +- **ACP integration**: Connects to VS Code, Cursor, Windsurf, JetBrains via Agent Client Protocol + +**AGH mapping**: Goose's recipe system maps to AGH's skills. Custom distributions map to workspace-level agent configurations. The Extension Manager UI concept could inform AGH's web UI design. + +### 3.4 Roo Code's Custom Modes + +Roo Code's differentiating feature is Custom Modes -- specialized AI personas with: + +- Tailored system instructions per mode +- Scoped tool permissions (e.g., security reviewer can read but not write) +- Community Mode Gallery for sharing configurations +- 5 built-in modes: Code, Architect, Ask, Debug, Custom + +**AGH mapping**: This maps to AGH agent definitions with per-agent capability restrictions. AGH could implement a "mode" concept as a lightweight agent configuration overlay. + +--- + +## 4. Emerging Protocols and Patterns + +### 4.1 A2A (Agent-to-Agent Protocol) + +Google's A2A protocol (April 2025, now v0.3 as of July 2025) enables communication between opaque agent systems. Now under the Linux Foundation with 150+ supporting organizations. + +**Core concepts**: + +- **Agent Cards**: JSON manifests at `/.well-known/agent.json` listing name, endpoint, skills, auth +- **Task lifecycle**: pending -> in-progress -> completed/failed, with SSE streaming +- **Transport**: HTTP, SSE, JSON-RPC (v0.3 adds gRPC) +- **Complementary to MCP**: MCP = agent-to-tool, A2A = agent-to-agent + +**AGH mapping**: A2A maps directly to AGH's agent network protocol (Phase 3). AGH sessions could publish Agent Cards, accept tasks from external agents via A2A, and delegate subtasks to remote agents. This is a natural fit for AGH's HTTP/SSE API. + +| A2A Concept | AGH Mapping | +| -------------------- | ---------------------------------------------------- | +| Agent Card | Agent definition + session metadata exposed via HTTP | +| Task submission | New session creation or message to existing session | +| Task streaming | SSE event stream (already implemented) | +| Capability discovery | Agent catalog + skills listing | + +### 4.2 Agent Memory Systems + +The memory landscape in 2026 spans three architectural categories: + +| Category | Examples | Description | AGH Mapping | +| ----------------------------- | ------------------------------- | ------------------------------------------ | --------------------------- | +| Extended attention | Infini-attention, recursive LMs | Scale the context window itself | Out of scope (model-level) | +| Memory-augmented transformers | Hybrid models | Learned memory modules in model | Out of scope (model-level) | +| External persistent memory | Mem0, Letta, LangChain Memory | Store/retrieve/manage memory outside model | Capability (memory.backend) | + +**Key frameworks**: + +| Framework | Approach | Key Feature | AGH Relevance | +| -------------------- | ------------------------------- | -------------------------------------------------------------------------------------------------- | --------------------------------------- | +| **Mem0** | Dedicated memory layer | Vector memory (semantic similarity) + graph memory (relationships) | High -- memory.backend implementation | +| **Letta** | Long-context agent architecture | Core memory blocks (persistent labeled context), archival memory (DB-backed), memory editing tools | High -- maps to AGH's dual-scope memory | +| **LangChain Memory** | Modular memory types | Conversation buffer, summary, entity, knowledge graph | Medium -- patterns for memory.backend | +| **ReMe** | Open-source memory kit | Multiple vector store backends, "remember me, refine me" | Medium -- reference implementation | + +**Advanced patterns emerging in 2026**: + +- **Conflict resolution**: When user preferences change, compress old memory into temporal reflection summaries rather than deleting +- **Multi-agent shared memory**: Strict access controls to prevent race conditions and cross-agent contamination +- **Graph memory in production**: For complex entity relationships (medical, enterprise hierarchies, technical systems) +- **Memory cost**: 1M-token context window costs ~15x more per turn than equivalent persistent memory retrieval + +**AGH mapping**: AGH's existing dual-scope memory (global + workspace) with dream consolidation is well-positioned. Extensions should add: + +- Vector-backed memory.backend (semantic search) +- Graph-backed memory.backend (relationship tracking) +- Memory conflict resolution (temporal reflection summaries) +- Cross-session memory sharing with access controls + +### 4.3 Workflow Orchestration Patterns + +Five dominant patterns have emerged: + +| Pattern | Description | Use Case | AGH Mapping | +| ------------------------ | ---------------------------------------------------- | -------------------------------------- | --------------------------------------------- | +| Sequential pipeline | Step-by-step, each stage builds on previous | Progressive refinement tasks | Session chaining via hooks | +| Hierarchical multi-agent | Manager-subordinate delegation | Complex multi-department tasks | Session spawning + parent-child relationships | +| Decentralized swarm | Peer agents collaborate without central control | Resilient, flexible problem-solving | A2A-connected sessions | +| Group chat | Shared conversation thread, chat manager facilitates | Consensus-building (limit to 3 agents) | Multi-agent session with turn management | +| DAG-based | Directed acyclic graphs define task dependencies | Complex pipelines with parallel steps | Workflow engine as new capability | + +**AGH mapping**: AGH's session model could be extended with a `workflow.engine` capability that supports DAG-based task orchestration across sessions. Parent sessions could spawn child sessions with defined dependencies. + +--- + +## 5. Permission, Sandbox, and Human-in-the-Loop Patterns + +### 5.1 Three Levels of Human Oversight + +| Level | Description | When to Use | +| --------------------- | --------------------------------------------------------- | --------------------------------- | +| Human-out-of-the-loop | Agent acts fully autonomously | Low-risk, well-defined tasks | +| Human-in-the-loop | Agent pauses for approval on specific actions | High-risk or destructive actions | +| Human-on-the-loop | Supervisor monitors overall flow, intervenes on anomalies | Medium-risk continuous operations | + +### 5.2 Permission Patterns + +| Pattern | Description | AGH Mapping | +| ------------------------------ | -------------------------------------------------------------------------------- | ---------------------------------------------- | +| Per-tool permission policies | Read vs. write access per tool | Capability (permission.gate) | +| Environment-scoped permissions | Allow destructive ops in staging only | Config-level permission rules | +| Approval vs. suspension | Gatekeeping (yes/no) vs. clarification (need more info) | Action (Host API) with two response types | +| Planning/execution separation | Planner proposes under broad permissions, executor acts under strict permissions | Two-phase session with different agent configs | +| Tool trust spectrum | Classify tools from harmless (search) to destructive (delete) | permission.gate with risk-level classification | +| Centralized governance UI | Dashboard for managing who/what/where/when | Web UI extension | + +### 5.3 Sandbox Strategies + +| Strategy | Description | AGH Mapping | +| ----------------------- | --------------------------------------- | ------------------------------------------ | +| WASM sandbox | Dual-metered execution (fuel + epoch) | Capability (could wrap tool execution) | +| MicroVMs | Firecracker/gVisor for full isolation | Heavy-weight, for untrusted code | +| Short-lived credentials | Temporary tokens scoped per task | Hook (PreToolUse) for credential injection | +| Zero-trust networking | All connections explicitly allowed | Config-level network policies | +| Workspace confinement | File operations restricted to workspace | Already in AGH's workspace model | + +**AGH mapping**: AGH's permission.gate capability should implement the tool trust spectrum. The planning/execution separation pattern maps to AGH's ability to configure different agent definitions for different phases of a workflow. + +--- + +## 6. Observability and Tracing + +### 6.1 OpenTelemetry as the Standard + +OpenTelemetry has emerged as the universal standard for AI agent observability. Key developments: + +| Project | What It Does | AGH Mapping | +| ------------------------- | -------------------------------------------------------------- | --------------------------------------- | +| Traceloop OTel MCP Server | AI agents query distributed traces for automated debugging | Resource (MCP) | +| FastMCP native OTel | Zero-config tracing for tool/prompt/resource operations | Capability (observe.exporter) | +| AG2 OTel Tracing | Structured hierarchical traces with GenAI semantic conventions | Capability (observe.exporter) reference | +| Grafana Cloud + OpenLIT | Pre-built dashboards for MCP observability | Reference architecture | + +**Key metrics to track**: Per-tool latency, error rates, call volume anomalies, end-to-end traces connecting agent reasoning to tool execution. + +**Proposed MCP protocol change**: Add standardized OTel trace spans directly into MCP protocol, with trace context propagation via HTTP headers (SSE/Streamable HTTP) or explicit parameters (stdio). + +**AGH mapping**: AGH's observe.exporter capability should export OpenTelemetry-compatible traces. The GenAI semantic conventions (model name, provider, token usage, cost, temperature, tool call arguments/results) should be adopted for AGH's event recording. + +--- + +## 7. What Developers Most Want + +Based on GitHub issues, Reddit discussions, and developer surveys: + +### 7.1 Top Feature Requests + +| Request | Frequency | Description | AGH Mapping | +| ---------------------------------- | ----------- | ------------------------------------------------------------------- | --------------------------------------------------- | +| **Better large codebase handling** | Very high | Index whole repos, semantic search across files | Capability (prompt.provider) with codebase indexing | +| **Issue-to-PR automation** | High | Assign GitHub issue, agent implements + tests + deploys | Action (Host API) + workflow orchestration | +| **Multi-file agentic workflows** | High | Parallel agents working on different codebase areas | Session management with concurrent agents | +| **Bring Your Own Model (BYOM)** | High | Connect any LLM provider via API keys | Capability (agent.driver) with provider abstraction | +| **Fine-grained permissions** | High | Approval gates before destructive actions, per-task autonomy levels | Capability (permission.gate) | +| **MCP tool discovery** | Medium-high | Browse, install, configure MCP servers easily | Resource (MCP) with registry/marketplace | +| **Reusable workflows/recipes** | Medium | Save and share task automation patterns | Resource (skills) with workflow support | +| **Cost tracking and budgets** | Medium | Token usage monitoring, per-session cost limits | Capability (observe.exporter) + config | +| **Audit trails** | Medium | Complete record of every agent action for compliance | Already in AGH's observe system | +| **Local/offline model support** | Medium | Run with Ollama, Docker Model Runner | Capability (agent.driver) | +| **Custom agent personas** | Medium | Different "modes" for different tasks (code, review, plan) | Resource (agents) with mode overlays | +| **CI/CD integration** | Medium | Agents triggered by CI events, results fed back | Hook + Action (Host API) | + +### 7.2 Anti-Patterns to Avoid + +- Using AI for architecture decisions (better for implementation) +- Infinite agent loops without cost/iteration limits +- Agents that rewrite entire files instead of surgical diffs +- Hardcoded model dependencies (vendor lock-in) +- Trust-all-tools security model + +--- + +## 8. Consolidated Extension Ideas for AGH + +### 8.1 High-Priority Extensions (Strong ecosystem demand, clear AGH mapping) + +| Extension | Type | Dimension | Description | +| ------------------------------------ | ---------- | ------------------------ | --------------------------------------------------------------------------- | +| **OTel Observe Exporter** | Capability | observe.exporter | Export AGH events as OpenTelemetry traces with GenAI semantic conventions | +| **Vector Memory Backend** | Capability | memory.backend | Semantic similarity search over agent memory using embeddings | +| **Graph Memory Backend** | Capability | memory.backend | Relationship-aware memory using knowledge graphs | +| **A2A Protocol Gateway** | Capability | agent.driver (extension) | Accept/delegate tasks via Google's Agent-to-Agent protocol | +| **Permission Gate: Risk Classifier** | Capability | permission.gate | Classify tool calls by risk level, require approval for destructive actions | +| **Codebase Indexer** | Capability | prompt.provider | Index workspace files for semantic search, inject relevant context | +| **GitHub MCP Bundle** | Resource | MCP | Pre-configured GitHub MCP server for PR/issue/code management | +| **Workflow Engine** | Capability | (new) | DAG-based task orchestration across sessions | +| **Agent Package (Hand-style)** | Resource | agents + skills + hooks | Bundled autonomous capability packages | +| **Channel Adapter Framework** | Capability | (new) | Expose sessions via messaging platforms (Slack, Discord, Telegram) | + +### 8.2 Medium-Priority Extensions (Growing demand, useful differentiation) + +| Extension | Type | Dimension | Description | +| ------------------------------------- | ---------- | ----------------- | ----------------------------------------------------------------------- | +| **Cost/Budget Tracker** | Capability | observe.exporter | Track token usage, enforce per-session cost limits | +| **Content Validator: PII** | Capability | content.validate | Detect and mask personally identifiable information | +| **Content Validator: Secret Scanner** | Capability | content.validate | Prevent secrets/credentials from leaking into agent context | +| **Prompt Injection Scanner** | Capability | content.validate | Detect prompt injection attempts in skill/tool inputs | +| **Custom Distribution Builder** | Action | Host API | Package agent configs + skills + MCP into shareable bundles | +| **MCP Server Registry** | Action | Host API | Browse, install, configure MCP servers from a catalog | +| **Webhook/Event Bridge** | Resource | hooks | Trigger sessions from external events (CI/CD, webhooks, cron) | +| **Planning/Execution Splitter** | Capability | message.transform | Separate planning phase (broad tools) from execution (restricted tools) | + +### 8.3 Lower-Priority / Exploratory Extensions + +| Extension | Type | Dimension | Description | +| ---------------------------- | ---------- | ------------------- | ----------------------------------------------------------- | +| **Multi-Agent Group Chat** | Action | Host API | Multiple agents in shared conversation with turn management | +| **Agent Card Publisher** | Action | Host API | Publish `.well-known/agent.json` for A2A discovery | +| **Mode Gallery** | Resource | skills | Community marketplace for agent mode/persona configurations | +| **Recipe/Workflow YAML** | Resource | skills | Goose-style reusable workflow definitions | +| **Memory Conflict Resolver** | Capability | memory.backend | Temporal reflection summaries when knowledge changes | +| **Browser Automation Tool** | Capability | agent.driver (tool) | Playwright-based browser control for agents | +| **Local Model Provider** | Capability | agent.driver | Connect to Ollama/local models as agent backends | + +--- + +## 9. Key Takeaways + +### 9.1 The ecosystem is converging on three standards + +1. **MCP** (Model Context Protocol) for agent-to-tool communication -- 5,000+ servers, universal adoption +2. **A2A** (Agent-to-Agent) for agent-to-agent communication -- 150+ organizations, Linux Foundation governance +3. **OpenTelemetry** for agent observability -- emerging as the universal tracing standard for AI agents + +AGH should support all three natively. + +### 9.2 Skills/extensions are becoming the primary differentiator + +Every major tool (Claude Code, Goose, OpenFang, Cline, Roo Code) has a skills/extension system. The winning pattern is: + +- **Markdown-based** skill definitions (low barrier to authorship) +- **YAML manifests** for metadata and capability declarations +- **Three tiers**: prompt-only (cheapest), subprocess (flexible), sandboxed (secure) +- **Community marketplace** for discovery and sharing + +AGH's existing skills system is well-aligned. Priority: add manifest-based capability declarations and a registry. + +### 9.3 Memory is the next competitive frontier + +Persistent memory across sessions is transitioning from experimental to production-critical. The key patterns are: + +- **Dual-scope** (global + workspace) -- AGH already has this +- **Vector + graph** hybrid -- AGH should add both backends +- **Dream/consolidation** -- AGH already has this (rare advantage) +- **Cost optimization** -- persistent memory retrieval is 15x cheaper than large context windows + +### 9.4 Permission and safety are table stakes + +Every production agent system implements: + +- Per-tool, per-action permission policies +- Planning/execution separation +- Human-in-the-loop for destructive actions +- Audit trails +- Sandbox isolation + +AGH's permission.gate capability should implement risk-based classification with configurable approval thresholds. + +### 9.5 Hooks/middleware are the deterministic control layer + +The universal pattern across all frameworks: "Hooks guarantee behavior; prompts suggest it." Claude Code's 12 hook events (PreToolUse, PostToolUse, SessionStart, etc.) represent the industry standard. AGH's hook system should match or exceed this coverage. + +### 9.6 Workflow orchestration is emerging but not yet standardized + +DAG-based workflows, hierarchical agent delegation, and pipeline patterns are common but each framework implements them differently. AGH has an opportunity to provide a clean, Go-native workflow engine that leverages its session model. + +### 9.7 Channel adapters are a differentiator for non-IDE use cases + +OpenFang's 40 channel adapters demonstrate demand for agent access beyond CLI/IDE. Slack, Discord, and Telegram are the highest-demand channels. AGH could start with 3-5 high-value adapters. + +--- + +## Sources + +- [OpenFang -- The Agent Operating System](https://www.openfang.sh/) +- [OpenFang GitHub](https://github.com/RightNow-AI/openfang) +- [OpenFang Skill Development Docs](https://www.openfang.sh/docs/skill-development) +- [OpenFang Channel Adapters Docs](https://www.openfang.sh/docs/channel-adapters) +- [awesome-mcp-servers (GitHub)](https://github.com/wong2/awesome-mcp-servers) +- [best-of-mcp-servers (GitHub)](https://github.com/tolkonepiu/best-of-mcp-servers) +- [MCP Awesome Directory (1200+ servers)](https://mcp-awesome.com/) +- [Top 10 Most Popular MCP Servers -- FastMCP](https://fastmcp.me/blog/top-10-most-popular-mcp-servers) +- [Top 15 MCP Servers -- DEV Community](https://dev.to/jangwook_kim_e31e7291ad98/top-15-mcp-servers-every-developer-should-install-in-2026-n1h) +- [Agent2Agent Protocol (A2A) -- Google Blog](https://developers.googleblog.com/en/a2a-a-new-era-of-agent-interoperability/) +- [A2A Protocol Specification](https://a2a-protocol.org/latest/specification/) +- [A2A GitHub](https://github.com/a2aproject/A2A) +- [A2A Protocol Upgrade -- Google Cloud Blog](https://cloud.google.com/blog/products/ai-machine-learning/agent2agent-protocol-is-getting-an-upgrade) +- [Linux Foundation A2A Project](https://www.linuxfoundation.org/press/linux-foundation-launches-the-agent2agent-protocol-project-to-enable-secure-intelligent-communication-between-ai-agents) +- [AI Agent Memory Frameworks 2026 -- MachineLearningMastery](https://machinelearningmastery.com/the-6-best-ai-agent-memory-frameworks-you-should-try-in-2026/) +- [Memory for AI Agents -- The New Stack](https://thenewstack.io/memory-for-ai-agents-a-new-paradigm-of-context-engineering/) +- [State of AI Agent Memory 2026 -- Mem0](https://mem0.ai/blog/state-of-ai-agent-memory-2026) +- [Architecture of Memory Systems in AI Agents -- Analytics Vidhya](https://www.analyticsvidhya.com/blog/2026/04/memory-systems-in-ai-agents/) +- [Goose AI Agent -- GitHub](https://github.com/block/goose) +- [Goose Documentation](https://goose-docs.ai/) +- [Goose AI Review 2026](https://aitoolanalysis.com/goose-ai-review/) +- [Cline vs Roo Code vs Continue 2026 -- DevToolReviews](https://www.devtoolreviews.com/reviews/cline-vs-roo-code-vs-continue) +- [Roo Code GitHub](https://github.com/RooCodeInc/Roo-Code) +- [Claude Code Skills Documentation](https://code.claude.com/docs/en/skills) +- [Claude Code Hooks -- Dotzlaw Consulting](https://www.dotzlaw.com/insights/claude-hooks/) +- [Claude Agent SDK Hooks Lifecycle](https://pkg.go.dev/github.com/dotcommander/agent-sdk-go/examples/hooks-lifecycle) +- [OpenTelemetry MCP Server -- Traceloop](https://github.com/traceloop/opentelemetry-mcp-server) +- [MCP Observability with OTel -- SigNoz](https://signoz.io/blog/mcp-observability-with-otel/) +- [Distributed Tracing for Agentic Workflows -- Red Hat](https://developers.redhat.com/articles/2026/04/06/distributed-tracing-agentic-workflows-opentelemetry) +- [How to Sandbox AI Agents 2026 -- Northflank](https://northflank.com/blog/how-to-sandbox-ai-agents) +- [Human-in-the-Loop for AI Agents -- Permit.io](https://www.permit.io/blog/human-in-the-loop-for-ai-agents-best-practices-frameworks-use-cases-and-demo) +- [AI Agent Security Guide 2026 -- MintMCP](https://www.mintmcp.com/blog/ai-agent-security) +- [2026 Guide to Agentic Workflow Architectures -- StackAI](https://www.stackai.com/blog/the-2026-guide-to-agentic-workflow-architectures) +- [Best AI Coding Agents 2026 -- Faros](https://www.faros.ai/blog/best-ai-coding-agents-2026) +- [Best AI for Coding Reddit 2026](https://www.aitooldiscovery.com/guides/best-ai-for-coding-reddit) +- [10 Things Developers Want from Agentic IDEs -- RedMonk](https://redmonk.com/kholterhoff/2025/12/22/10-things-developers-want-from-their-agentic-ides-in-2025/) diff --git a/docs/ideas/ext-ideas/research/analysis_hermes.md b/docs/ideas/ext-ideas/research/analysis_hermes.md new file mode 100644 index 000000000..255442efe --- /dev/null +++ b/docs/ideas/ext-ideas/research/analysis_hermes.md @@ -0,0 +1,308 @@ +# Hermes Agent (hermes-agent) -- Extension & Plugin Research for AGH + +## Overview + +Hermes Agent is an open-source, self-improving AI agent framework built by Nous Research, released February 2026, written in Python. It has ~23k GitHub stars, 142 contributors, and ships as a single CLI binary with a multi-platform messaging gateway. Hermes is the closest comparable project to AGH in spirit: a daemon-like agent harness with persistent memory, session management, tool orchestration, and a rich extension model. + +Key architectural parallels to AGH: + +- **Single-binary daemon** with CLI and gateway modes +- **SQLite-backed persistence** with FTS5 for session search +- **Plugin/extension model** spanning tools, hooks, memory backends, and skills +- **MCP integration** as both client and server +- **Subagent delegation** for parallel workstreams +- **Multi-channel communication** (Telegram, Discord, Slack, WhatsApp, Signal, CLI) + +Hermes is three months old and already has 80+ community extensions. This analysis extracts concrete extension ideas for AGH's three-dimensional model: Resources, Capabilities, and Actions. + +--- + +## Table of Extensions and Tools + +### Built-in Tools (47 registered tools across 20 toolsets) + +| Name / Toolset | Category | Description | AGH Mapping | +| ---------------- | -------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `terminal` | Execution | Six backends: local, Docker, SSH, Daytona, Singularity, Modal. Background process management (list, poll, wait, log, kill, write). PTY mode for interactive CLIs. | **Capability: agent.driver** -- AGH's ACP driver spawns subprocesses; terminal backends map to driver variants. Add Docker/SSH/serverless driver backends. | +| `web` | Search/Extract | Web search, page extraction, URL safety checking, website policy compliance | **Resource: tool** -- Web search as a built-in tool exposed via ACP. Could be an MCP server integration. | +| `browser` | Automation | Full browser automation via CDP (navigate, click, type, screenshot). Backends: Browserbase cloud, Browser Use cloud, local Chrome, local Chromium. | **Capability: agent.driver** or **Resource: MCP** -- Browser automation as an MCP server or specialized driver. | +| `file` | Filesystem | File read/write/edit with persistent context | **Resource: tool** -- Already covered by ACP agent file tools. | +| `vision` | Multimodal | Image analysis via vision-capable models. Clipboard paste support. | **Capability: content.validate** or **message.transform** -- Vision as a content processing capability. | +| `image_gen` | Creative | Text-to-image via FAL.ai FLUX 2 Pro with auto-upscaling | **Resource: MCP** -- Image generation as an MCP server. | +| `tts` | Voice | Text-to-speech with 5 backends (Edge TTS, NeuTTS, ElevenLabs, etc.). Markdown stripping for natural speech. | **Resource: MCP** -- TTS as an MCP tool server. | +| `transcription` | Voice | STT via faster-whisper (local), Groq, or OpenAI. Hallucination filtering (26 known phrases). | **Resource: MCP** -- Transcription as an MCP tool server. | +| `cronjob` | Scheduling | Built-in cron scheduler with natural language. Jobs attach skills, deliver results to any platform. Pause/resume/edit. | **Resource: hook** + **Action: session** -- Cron as a hook trigger that creates scheduled sessions. High value for AGH. | +| `delegation` | Orchestration | Spawn isolated subagents (up to 3 concurrent) with own conversation, terminal, and restricted toolsets. Zero-context-cost via RPC. | **Action: session** -- Subagent delegation maps directly to AGH session spawning. Critical capability. | +| `code_execution` | Execution | Sandboxed Python execution with RPC access to all Hermes tools. 300s timeout, 50 tool calls max, 50KB stdout cap. | **Capability: agent.driver** -- Code execution sandbox as a driver variant or tool. | +| `memory` | Persistence | Dual-file memory (MEMORY.md + USER.md) injected into system prompt. 8 pluggable backends. | **Capability: memory.backend** -- Direct mapping. AGH already has this dimension. | +| `session_search` | Recall | SQLite FTS5 full-text search over all past sessions with LLM summarization | **Action: observe** -- Session search as an observe/query capability. | +| `skills` | Knowledge | On-demand knowledge documents with progressive disclosure. Auto-creation from experience. | **Resource: skill** -- Direct mapping. AGH already has skills. | +| `todo` | Planning | Task/todo management within agent sessions | **Resource: tool** -- Simple tool, low priority. | +| `moa` | Routing | Multi-model orchestration/routing (Mixture of Agents) | **Capability: prompt.provider** -- Model routing as a prompt/provider capability. | +| `homeassistant` | IoT | Smart home control: list entities, control devices, watch state changes. Auto-enabled via HASS_TOKEN. | **Resource: MCP** -- Home Assistant as an MCP server integration. | +| `rl` | Training | RL training pipeline with Atropos (trajectory API), Tinker (training service), and custom environments. GRPO with LoRA. | **Capability: observe.exporter** -- Training data export. Unique to Hermes; not directly applicable to AGH v1. | +| `voice_mode` | Interface | Push-to-talk terminal, voice messages in messengers, Discord VC join/listen/speak | **Resource: hook** -- Voice as a communication channel hook. | +| `clarify` | UX | Ask user for clarification when instructions are ambiguous | **Capability: permission.gate** -- Clarification as a gating mechanism. | +| `send_message` | Communication | Send messages across all connected platforms (Telegram, Discord, Slack, etc.) | **Resource: MCP** -- Messaging as an MCP server (Hermes already does this as MCP server mode). | + +### Plugin System + +| Plugin / Feature | Category | Description | AGH Mapping | +| ---------------------------- | ------------ | ------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------- | +| `pre_llm_call` hook | Lifecycle | Fires before each LLM call. Can inject context into ephemeral system prompt. Used by memory plugins to inject recalled context. | **Resource: hook** -- Pre-processing hook. Maps to AGH's hook system. Critical for memory injection. | +| `post_llm_call` hook | Lifecycle | Fires after each LLM response. Used by memory plugins to retain conversation turns. | **Resource: hook** -- Post-processing hook. Maps to AGH's hook system. | +| `pre_tool_call` hook | Lifecycle | Fires before tool execution. Can intercept/modify tool calls. | **Resource: hook** -- Tool interception hook. Maps to AGH's hook system. | +| `post_tool_call` hook | Lifecycle | Fires after tool execution. Can process/modify tool results. | **Resource: hook** -- Tool result processing hook. | +| `on_session_start` hook | Lifecycle | Fires when a session begins. Used for initialization, context loading. | **Resource: hook** -- Session lifecycle hook. AGH already has session state machine events. | +| `on_session_end` hook | Lifecycle | Fires when a session ends. Used for cleanup, memory extraction. | **Resource: hook** -- Session lifecycle hook. | +| CLI subcommand registration | Extension | Plugins can register new CLI subcommands via the plugin context API. | **Resource: hook** (CLI extension) -- AGH could allow extensions to register CLI commands. | +| Request-scoped API hooks | Extension | Hooks receive correlation IDs for request tracing. | **Capability: observe.exporter** -- Observability enhancement. | +| Env var prompting on install | UX | Plugins prompt for required env vars during installation. | **Resource: hook** (install lifecycle) -- Plugin installation UX. | +| Plugin discovery (3 sources) | Architecture | `~/.hermes/plugins/` (user), `.hermes/plugins/` (project), pip entry points | **Architecture** -- AGH could support user-dir, project-dir, and Go plugin discovery. | + +### Memory Providers (8 pluggable backends) + +| Provider | Category | Description | AGH Mapping | +| ---------------------------------- | ----------------- | ---------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------- | +| **Built-in** (MEMORY.md + USER.md) | Local | Two curated markdown files injected into system prompt. Agent-editable. | **Capability: memory.backend** -- Default backend. AGH's current memory system. | +| **Honcho** | Cloud/Self-hosted | Dialectic reasoning and deep user modeling. Builds model of how user thinks, not just what they said. AGPL v3.0. | **Capability: memory.backend** -- Advanced user modeling backend. High value concept. | +| **Hindsight** | Local/Cloud | Best recall accuracy (91.4% on LongMemEval). Async prefetch + retain. Semantic, graph, temporal retrieval. | **Capability: memory.backend** -- High-accuracy retrieval backend. | +| **Holographic** | Local SQLite | HRR (Holographic Reduced Representations). Sub-millisecond retrieval. Zero deps. Trust scoring with decay. | **Capability: memory.backend** -- Lightweight local backend. Interesting for AGH's SQLite approach. | +| **RetainDB** | Cloud (paid) | Hybrid search (Vector + BM25 + Reranking). 7 memory types. Delta compression. | **Capability: memory.backend** -- Cloud backend option. | +| **Mem0** | Cloud | Fastest setup, free tier. Simple extraction. | **Capability: memory.backend** -- Easy onboarding backend. | +| **ByteRover** | Local Markdown | Human-readable, inspectable memory stored as Markdown files. | **Capability: memory.backend** -- Debug-friendly backend. | +| **OpenViking** | Local | Tiered memory loading (L0/L1/L2) for token efficiency. | **Capability: memory.backend** -- Tiered loading is a smart optimization. | + +### Skills System + +| Skill Category | Examples | Description | AGH Mapping | +| ------------------- | ----------------------------------------------------- | ---------------------------------------------------------------- | ----------------------------------------------------------------------------------------- | +| Apple/macOS | iMessage, Reminders, Notes, FindMy | macOS-specific automation. Platform-gated (only loads on macOS). | **Resource: skill** -- Platform-conditional skills. AGH could gate skills by OS/platform. | +| Agent Orchestration | Multi-agent workflows, coding agent spawning | Skills for delegating to and coordinating with other agents. | **Resource: skill** + **Action: session** -- Multi-agent coordination skills. | +| Data Science | Jupyter, data analysis, visualization | Interactive exploration and notebook-based workflows. | **Resource: skill** -- Domain knowledge skills. | +| Creative | ASCII art, hand-drawn diagrams, visual design | Creative output skills. | **Resource: skill** -- Domain knowledge skills. | +| DevOps | Infrastructure automation | CI/CD, deployment, infrastructure skills. | **Resource: skill** -- Domain knowledge skills. | +| Media | YouTube transcripts, GIF search, music gen, audio viz | Media processing and generation. | **Resource: skill** -- Domain knowledge skills. | +| MLOps | Model hub, GPU cloud, eval benchmarks, quantization | ML workflow automation. | **Resource: skill** -- Domain knowledge skills. | +| Smart Home | Light/switch/sensor control | Home automation skills. | **Resource: skill** -- Domain knowledge skills. | +| Social Platforms | Posting, reading, monitoring | Social media automation. | **Resource: skill** -- Domain knowledge skills. | + +### Community Extensions (Selected from 80+) + +| Extension | Author | Status | Description | AGH Mapping | +| ---------------------------------- | ------------------ | ------------ | --------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------- | +| **hermes-workspace** | outsourc-e | Production | Web-based GUI: chat, terminal, memory browser, skills manager, inspector | **Resource: MCP** / Web UI -- AGH already has web UI via HTTP/SSE. Workspace concept maps to AGH's web layer. | +| **mission-control** | builderz-labs | Production | Agent fleet orchestration dashboard. Dispatch tasks, track costs, coordinate multi-agent workflows. 3.7k stars. | **Action: session** + **observe** -- Fleet management is a natural AGH extension for multi-session orchestration. | +| **hermes-payguard** | nativ3ai | Experimental | USDC/x402 payment plugin with spending limits and approval flows | **Capability: permission.gate** -- Payment gating/approval as a permission gate. | +| **hindsight** (plugin) | Vectorize | Production | Long-term memory layer. retain/recall/reflect workflows. 8.3k stars. | **Capability: memory.backend** -- Memory backend plugin. | +| **hermes-web-search-plus** | robbyczgw-cla | Beta | Multi-provider web search with intelligent routing (Serper, Tavily, Exa) | **Resource: MCP** -- Search aggregation as an MCP server. | +| **lintlang** | roli-lpci | Beta | Static linter for agent configs/prompts. HERM v1.1 scoring. | **Resource: tool** -- Config validation tool. Could be a pre-session hook. | +| **hermes-plugins** (4-pack) | 42-evey | Beta | Goal management, inter-agent bridge, model selection, cost control | **Multiple** -- Each maps to different AGH dimensions. | +| **hermes-skill-factory** | community | Beta | Auto-generates SKILL.md files from successful workflows | **Resource: skill** -- Skill auto-generation. High value for AGH's skills system. | +| **hermes-weather-plugin** | FahrenheitResearch | Beta | Professional weather with NWS model imagery, NEXRAD radar | **Resource: MCP** -- Domain-specific MCP server. | +| **hermes-agent-acp-skill** | Rainhoole | Beta | Multi-agent delegation bridging Hermes, Codex, and Claude Code | **Resource: skill** + **Capability: agent.driver** -- Cross-agent delegation. Directly relevant to AGH's ACP model. | +| **Anthropic-Cybersecurity-Skills** | community | Production | 734+ security skills mapped to MITRE ATT&CK. 3.6k stars. | **Resource: skill** -- Security skill library. | +| **autonovel** | NousResearch | Production | Autonomous novel-writing pipeline (100k+ words) | **Resource: skill** -- Long-running workflow skill. | +| **hermes-agent-self-evolution** | NousResearch | Research | Evolutionary self-improvement via DSPy and GEPA | **Capability: observe.exporter** -- Self-improvement pipeline. Research-grade. | +| **HermesHub** | amanning3390 | Production | Curated skills marketplace with security scanning (65+ threat rules), creator marketplace, x402 payments | **Architecture** -- Skills marketplace concept for AGH. | +| **vessel-browser** | unmodeled-tyler | Experimental | AI-native Linux browser with MCP control | **Resource: MCP** -- Browser as MCP server. | +| **orahermes-agent** | jasperan | Production | Oracle AI Agent Harness with OCI GenAI integration | **Capability: agent.driver** -- Enterprise driver variant. | +| **portable-hermes-agent** | rookiemann | Beta | Windows desktop app bundling 100 tools, GUI, local models | **Architecture** -- Desktop packaging concept. | + +### MCP Integration Details + +| Feature | Description | AGH Mapping | +| ----------------------- | ------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------------------- | +| MCP Client (native-mcp) | Auto-discovers MCP servers, registers tools, supports stdio + HTTP transports | **Resource: MCP** -- AGH already supports MCP. Enhance with auto-discovery. | +| MCP Server Mode | Hermes exposes its messaging capabilities as an MCP server (list conversations, read history, send messages) | **Resource: MCP** -- AGH could expose session/memory/observe as MCP server. | +| Dynamic tool updates | Handles `notifications/tools/list_changed` for runtime tool registry updates | **Resource: MCP** -- Dynamic tool refresh. | +| Security filtering | Allow/block lists and attribute-based rules for MCP tools | **Capability: permission.gate** -- MCP tool filtering as a permission gate. | +| OAuth 2.1 PKCE | Full OAuth flow for MCP server authentication | **Resource: MCP** -- Auth for MCP servers. | +| OSV Malware Scanning | Automatic vulnerability scanning of MCP extension packages | **Capability: content.validate** -- Security scanning for extensions. | +| IDE Integration | VS Code, Zed, JetBrains can register MCP servers that Hermes picks up | **Resource: MCP** -- IDE-sourced MCP server discovery. | + +--- + +## Detailed Analysis of High-Impact Extensions + +### 1. Pluggable Memory Backends + +**What Hermes does:** Memory is abstracted behind a provider ABC (Abstract Base Class). Eight backends implement it, from local SQLite (Holographic) to cloud services (Hindsight, Honcho). Memory providers hook into `pre_llm_call` to inject recalled context and `post_llm_call` to retain new information. The agent sees a unified interface regardless of backend. + +**Why it matters for AGH:** AGH already has `memory.backend` as a Capability dimension. Hermes proves that the community will build diverse memory backends if the interface is clean. The key insight is that memory providers need lifecycle hooks (pre/post LLM call) to be truly useful -- simple CRUD is not enough. + +**AGH adaptation:** + +- Define a `MemoryBackend` interface in `internal/memory/` with `Recall(ctx, query) -> []Memory` and `Retain(ctx, turn) error` methods +- Wire it into the session lifecycle via the existing hook system +- Ship Holographic-style SQLite backend as default (zero deps, local-first) +- Allow registration of additional backends via the plugin system + +### 2. Cron/Scheduled Sessions + +**What Hermes does:** A built-in cron scheduler lets users define recurring tasks in natural language. Jobs can attach skills, use specific tools, and deliver results to any messaging platform. Jobs support pause/resume/edit. + +**Why it matters for AGH:** AGH sessions are currently user-initiated. Scheduled sessions enable autonomous operation: nightly code reviews, morning briefings, periodic health checks, automated testing runs. This is a natural extension of AGH's session lifecycle. + +**AGH adaptation:** + +- Add a `scheduler` package under `internal/` with cron expression parsing +- Integrate with `internal/session` to create sessions on schedule +- Persist job definitions in `globaldb` +- Expose via UDS API for CLI management and HTTP API for web UI +- Map to **Resource: hook** (cron trigger) + **Action: session.create** + +### 3. Subagent Delegation with Isolated Context + +**What Hermes does:** The `delegate_task` tool spawns child agent instances with isolated context, restricted toolsets, and their own terminal sessions. Up to 3 concurrent subagents. Results are collected with zero context cost to the parent. + +**Why it matters for AGH:** AGH already manages sessions, but parent-child session relationships and context isolation are not yet modeled. Delegation enables complex workflows: a parent session spawns specialized child sessions for parallel tasks, collects results, and synthesizes. + +**AGH adaptation:** + +- Add parent-child session relationships in `internal/session` +- Add a `delegate` action to the Host API +- Child sessions inherit parent's workspace but get restricted tool access +- Results flow back via the observe system +- Map to **Action: session.delegate** + **Capability: agent.driver** (child driver selection) + +### 4. Skill Auto-Generation (Skill Factory) + +**What Hermes does:** After completing a task successfully, the agent analyzes its steps, identifies reusable patterns, and writes a SKILL.md file capturing the workflow. Next time a similar task arises, it loads the skill. Every 15 tasks, the agent evaluates and refines skills. + +**Why it matters for AGH:** This is the "self-improving" core of Hermes. For AGH, it means agents can build institutional knowledge over time. A DevOps agent that deploys 50 times creates a deployment skill that captures all edge cases. + +**AGH adaptation:** + +- Add skill generation to `internal/skills/` triggered by session completion hooks +- Use the observe system to capture successful session trajectories +- LLM-based skill extraction as a post-session hook +- Store generated skills in workspace-scoped skill directory +- Map to **Resource: skill** (auto-generated) + **Resource: hook** (session.end trigger) + +### 5. MCP Server Mode (Exposing Agent Capabilities) + +**What Hermes does:** Hermes can act as an MCP server, exposing its messaging capabilities to other MCP clients. Other agents (Claude Code, Codex, Cursor) can use Hermes's messaging, conversation history, and platform delivery as tools. + +**Why it matters for AGH:** AGH could expose its session management, memory, skills, and observe capabilities as MCP tools. This makes AGH a "capability provider" for any MCP-compatible agent, not just a harness for running agents. + +**AGH adaptation:** + +- Add MCP server mode to `internal/api/` alongside HTTP and UDS servers +- Expose key Host API actions as MCP tools: `agh_create_session`, `agh_query_memory`, `agh_list_skills`, `agh_get_events` +- This makes AGH composable with other agent systems +- Map to **Resource: MCP** (server role) + **Action: all Host API actions** + +### 6. Security Scanning for Extensions (Skills Guard) + +**What Hermes does:** All hub-installed skills pass through a security scanner checking 65+ threat rules across 8 categories: data exfiltration, prompt injection, destructive commands, obfuscation, hardcoded secrets, network abuse, env abuse, supply-chain signals. Critical findings block installation. + +**Why it matters for AGH:** As AGH's extension ecosystem grows, untrusted extensions become a risk vector. A validation layer for skills, hooks, and MCP servers prevents malicious or buggy extensions from compromising the agent or host system. + +**AGH adaptation:** + +- Add a `validate` package under `internal/skills/` or a general `internal/security/` +- Implement pattern-based scanning for skill content before loading +- Gate MCP server connections through permission checks +- Map to **Capability: content.validate** + **Capability: permission.gate** + +### 7. Multi-Platform Messaging Gateway + +**What Hermes does:** A single gateway process handles 14 platform adapters (Telegram, Discord, Slack, WhatsApp, Signal, Feishu/Lark, WeCom, DingTalk, SMS/Twilio, Mattermost, Matrix, Webhook, Home Assistant, CLI). Cross-platform conversation continuity -- start on Telegram, continue on Discord. + +**Why it matters for AGH:** AGH currently exposes HTTP/SSE (web) and UDS (CLI). Adding messaging platform adapters would make AGH accessible from anywhere, enabling always-on agent availability. + +**AGH adaptation:** + +- Add a `gateway` package under `internal/api/` with adapter interface +- Each platform adapter implements message receive/send +- Route incoming messages to session creation/resumption +- Map to **Resource: hook** (message adapters) + **Action: session** + +### 8. Credential Pool with Rotation + +**What Hermes does:** Same-Provider Credential Pools let you configure multiple API keys for the same provider. Thread-safe least-used strategy distributes load. 401 failures trigger automatic rotation. + +**Why it matters for AGH:** Multi-key management is essential for production deployments where rate limits and key rotation are concerns. AGH agents making many API calls benefit from automatic key distribution. + +**AGH adaptation:** + +- Add credential pool support in `internal/config/` +- Thread-safe rotation with `sync.RWMutex` +- Automatic failover on auth errors +- Map to **Capability: agent.driver** (provider credential management) + +--- + +## Key Takeaways for AGH Extension Ideas + +### Highest-Priority Extensions (Immediate Value) + +1. **Pluggable memory backends** -- AGH already has the `memory.backend` dimension. Ship a clean interface and one or two backends (SQLite-based local, plus one cloud option). The pre/post LLM call hook pattern is essential. + +2. **Cron/scheduled sessions** -- Natural extension of AGH's session lifecycle. Enables autonomous operation without user initiation. Relatively straightforward to implement with AGH's existing session manager. + +3. **Subagent delegation** -- AGH manages sessions; parent-child relationships and context isolation unlock complex multi-step workflows. This is a differentiator. + +4. **MCP server mode** -- Expose AGH's Host API as MCP tools so other agents can use AGH as a capability provider. Composability multiplier. + +### Medium-Priority Extensions (Ecosystem Growth) + +5. **Skill auto-generation** -- Self-improving skills from session trajectories. Requires observe system maturity but delivers compounding value. + +6. **Security scanning for extensions** -- Content validation for skills and MCP servers. Important as the extension ecosystem grows. + +7. **Credential pool/rotation** -- Production-grade key management. Important for reliability. + +8. **Platform-conditional resource loading** -- Skills/tools that only load on specific OS/platforms (like Hermes's macOS-only skills). + +### Lower-Priority but Interesting (Future Phases) + +9. **Multi-platform messaging gateway** -- Telegram/Discord/Slack adapters. High effort, niche demand initially. + +10. **RL training pipeline** -- Trajectory generation and model fine-tuning from agent interactions. Research-grade, relevant for Phase 3. + +11. **Voice mode** -- STT/TTS pipeline. Niche but differentiating for certain use cases. + +12. **Skills marketplace** -- Community skill distribution with security scanning. Requires ecosystem scale. + +### Architectural Lessons from Hermes + +- **Hook-based extension is king.** Hermes's four lifecycle hooks (`pre_llm_call`, `post_llm_call`, `on_session_start`, `on_session_end`) plus two tool hooks (`pre_tool_call`, `post_tool_call`) enable the vast majority of extensions without touching core code. AGH's hook system should prioritize these six hook points. + +- **Memory providers need lifecycle integration, not just CRUD.** The ability to inject context before LLM calls and retain information after is what makes memory backends actually useful. Simple read/write APIs are insufficient. + +- **Skills as markdown documents with progressive disclosure** is the winning pattern. Low barrier to create (just write markdown), easy to share, and the progressive disclosure pattern minimizes token waste. + +- **Plugin discovery from three sources** (user home, project directory, package registry) covers all use cases: personal customization, project-specific tools, and community distribution. + +- **MCP dual-role (client + server)** makes the agent composable. Being only an MCP client limits the agent to consuming tools; being also an MCP server makes it a building block for larger systems. + +- **Security scanning is not optional.** Hermes learned this early and gates all community extensions through 65+ threat rules. AGH should build this in from the start, not bolt it on later. + +--- + +## Sources + +- [NousResearch/hermes-agent GitHub](https://github.com/nousresearch/hermes-agent) +- [Hermes Agent Documentation](https://hermes-agent.nousresearch.com/docs/) +- [Tools & Toolsets Reference](https://hermes-agent.nousresearch.com/docs/user-guide/features/tools) +- [Skills System](https://hermes-agent.nousresearch.com/docs/user-guide/features/skills/) +- [MCP Integration](https://hermes-agent.nousresearch.com/docs/user-guide/features/mcp) +- [Memory Providers](https://hermes-agent.nousresearch.com/docs/user-guide/features/memory-providers) +- [RL Training](https://hermes-agent.nousresearch.com/docs/user-guide/features/rl-training) +- [Voice Mode](https://hermes-agent.nousresearch.com/docs/user-guide/features/voice-mode/) +- [Home Assistant Integration](https://hermes-agent.nousresearch.com/docs/user-guide/messaging/homeassistant) +- [Plugin Guide](https://hermes-agent.nousresearch.com/docs/guides/build-a-hermes-plugin/) +- [awesome-hermes-agent](https://github.com/0xNyk/awesome-hermes-agent) +- [Hermes Agent Ecosystem Map](https://hermes-ecosystem.vercel.app/) +- [HermesHub Skills Marketplace](https://github.com/amanning3390/hermeshub) +- [Hindsight Memory Provider](https://hindsight.vectorize.io/sdks/integrations/hermes) +- [Hermes Agent v0.5.0 Release](https://github.com/NousResearch/hermes-agent/releases/tag/v2026.3.28) +- [Hermes Agent v0.7.0 Release](https://github.com/NousResearch/hermes-agent/releases/tag/v2026.4.3) +- [Bundled Skills Catalog](https://hermes-agent.nousresearch.com/docs/reference/skills-catalog) +- [Creating Skills](https://hermes-agent.nousresearch.com/docs/developer-guide/creating-skills/) +- [Architecture](https://hermes-agent.nousresearch.com/docs/developer-guide/architecture/) +- [Hermes Agent on DEV Community](https://dev.to/arshtechpro/hermes-agent-a-self-improving-ai-agent-that-runs-anywhere-2b7d) +- [Hermes Agent Memory Explained (Vectorize)](https://vectorize.io/articles/hermes-agent-memory-explained) +- [Memory Providers Compared (Vectorize)](https://vectorize.io/articles/hermes-agent-memory-providers-compared) diff --git a/.compozy/tasks/ext-ideas/research/analysis_openclaw.md b/docs/ideas/ext-ideas/research/analysis_openclaw.md similarity index 54% rename from .compozy/tasks/ext-ideas/research/analysis_openclaw.md rename to docs/ideas/ext-ideas/research/analysis_openclaw.md index a1350968e..4cc87dd80 100644 --- a/.compozy/tasks/ext-ideas/research/analysis_openclaw.md +++ b/docs/ideas/ext-ideas/research/analysis_openclaw.md @@ -12,110 +12,110 @@ OpenClaw's architecture is relevant to AGH because both share a local-first, sin ### Channels (Messaging Platform Adapters) -| Extension | Category | Description | AGH Mapping | -|-----------|----------|-------------|-------------| -| WhatsApp (Baileys) | Channel | Bidirectional WhatsApp messaging via Baileys library | Resource: channel adapter | -| Telegram (grammY) | Channel | Telegram bot integration, most popular after web browsing | Resource: channel adapter | -| Slack (Bolt) | Channel | Workspace messaging with Bolt SDK, approval buttons | Resource: channel adapter | -| Discord (discord.js) | Channel | Rich interaction support with buttons, slash commands | Resource: channel adapter | -| Google Chat | Channel | Google Workspace chat integration via Chat API | Resource: channel adapter | -| Signal (signal-cli) | Channel | Privacy-focused Signal messenger support | Resource: channel adapter | -| iMessage (BlueBubbles) | Channel | macOS-exclusive iMessage bridge | Resource: channel adapter | -| Microsoft Teams | Channel | Plugin-only as of 2026.1.15 (`@openclaw/msteams`) | Resource: channel adapter | -| Matrix | Channel | Decentralized Matrix protocol support | Resource: channel adapter | -| IRC | Channel | Legacy IRC channel support | Resource: channel adapter | -| WebChat | Channel | Built-in web UI channel | Resource: channel adapter (maps to AGH HTTP/SSE) | -| LINE | Channel | LINE messenger (Asia-Pacific markets) | Resource: channel adapter | -| Mattermost | Channel | Self-hosted team chat | Resource: channel adapter | -| Feishu | Channel | Lark/Feishu for China market | Resource: channel adapter | -| WeChat | Channel | WeChat via Tencent plugin (`@tencent-weixin/openclaw-weixin`) | Resource: channel adapter | -| Nostr | Channel | Decentralized social protocol | Resource: channel adapter | -| Twitch | Channel | Twitch chat integration for streaming | Resource: channel adapter | -| Nextcloud Talk | Channel | Self-hosted Nextcloud chat | Resource: channel adapter | +| Extension | Category | Description | AGH Mapping | +| ---------------------- | -------- | ------------------------------------------------------------- | ------------------------------------------------ | +| WhatsApp (Baileys) | Channel | Bidirectional WhatsApp messaging via Baileys library | Resource: channel adapter | +| Telegram (grammY) | Channel | Telegram bot integration, most popular after web browsing | Resource: channel adapter | +| Slack (Bolt) | Channel | Workspace messaging with Bolt SDK, approval buttons | Resource: channel adapter | +| Discord (discord.js) | Channel | Rich interaction support with buttons, slash commands | Resource: channel adapter | +| Google Chat | Channel | Google Workspace chat integration via Chat API | Resource: channel adapter | +| Signal (signal-cli) | Channel | Privacy-focused Signal messenger support | Resource: channel adapter | +| iMessage (BlueBubbles) | Channel | macOS-exclusive iMessage bridge | Resource: channel adapter | +| Microsoft Teams | Channel | Plugin-only as of 2026.1.15 (`@openclaw/msteams`) | Resource: channel adapter | +| Matrix | Channel | Decentralized Matrix protocol support | Resource: channel adapter | +| IRC | Channel | Legacy IRC channel support | Resource: channel adapter | +| WebChat | Channel | Built-in web UI channel | Resource: channel adapter (maps to AGH HTTP/SSE) | +| LINE | Channel | LINE messenger (Asia-Pacific markets) | Resource: channel adapter | +| Mattermost | Channel | Self-hosted team chat | Resource: channel adapter | +| Feishu | Channel | Lark/Feishu for China market | Resource: channel adapter | +| WeChat | Channel | WeChat via Tencent plugin (`@tencent-weixin/openclaw-weixin`) | Resource: channel adapter | +| Nostr | Channel | Decentralized social protocol | Resource: channel adapter | +| Twitch | Channel | Twitch chat integration for streaming | Resource: channel adapter | +| Nextcloud Talk | Channel | Self-hosted Nextcloud chat | Resource: channel adapter | ### LLM Providers -| Extension | Category | Description | AGH Mapping | -|-----------|----------|-------------|-------------| -| Anthropic (Claude) | Provider | Claude Sonnet 4 recommended as primary | Capability: agent.driver | -| OpenAI (GPT) | Provider | GPT-5.4 with forward-compat fallback | Capability: agent.driver | -| Google (Gemini) | Provider | Gemini API + Vertex AI support | Capability: agent.driver | -| Ollama (Local) | Provider | Auto-detected local models at 127.0.0.1:11434 | Capability: agent.driver | -| OpenRouter | Provider | Aggregation platform for multiple providers | Capability: agent.driver | -| Together AI | Provider | Aggregation/inference provider | Capability: agent.driver | -| Qwen (via OAuth plugin) | Provider | Alibaba Qwen models | Capability: agent.driver | -| Copilot Proxy | Provider | Microsoft Copilot model proxy | Capability: agent.driver | +| Extension | Category | Description | AGH Mapping | +| ----------------------- | -------- | --------------------------------------------- | ------------------------ | +| Anthropic (Claude) | Provider | Claude Sonnet 4 recommended as primary | Capability: agent.driver | +| OpenAI (GPT) | Provider | GPT-5.4 with forward-compat fallback | Capability: agent.driver | +| Google (Gemini) | Provider | Gemini API + Vertex AI support | Capability: agent.driver | +| Ollama (Local) | Provider | Auto-detected local models at 127.0.0.1:11434 | Capability: agent.driver | +| OpenRouter | Provider | Aggregation platform for multiple providers | Capability: agent.driver | +| Together AI | Provider | Aggregation/inference provider | Capability: agent.driver | +| Qwen (via OAuth plugin) | Provider | Alibaba Qwen models | Capability: agent.driver | +| Copilot Proxy | Provider | Microsoft Copilot model proxy | Capability: agent.driver | ### Built-in Tools -| Extension | Category | Description | AGH Mapping | -|-----------|----------|-------------|-------------| -| Browser | Tool | Dedicated Chromium instance with snapshots, actions, form fill, scraping | Action: session tool | -| Canvas (A2UI) | Tool | Agent-driven visual workspace with live push/reset/eval/snapshot | Action: session tool | -| Cron | Tool | Schedule agent actions at specific times (e.g., daily summary at 9 AM) | Resource: hook (time-based) | -| Webhooks | Tool | External trigger points for agent actions (e.g., Gmail push) | Resource: hook (event-based) | -| Sessions | Tool | List sessions, inspect transcripts, send cross-session messages | Action: session management | -| Nodes | Tool | Camera snap/clip, screen record, location.get, notifications | Action: device capability | -| Shell Execution | Tool | Run shell commands on host machine | Action: session tool | -| File System | Tool | Read/write/manage files | Action: session tool | -| MCPorter | Tool | Discover, configure, authenticate, and call MCP servers via NL | Resource: MCP management | +| Extension | Category | Description | AGH Mapping | +| --------------- | -------- | ------------------------------------------------------------------------ | ---------------------------- | +| Browser | Tool | Dedicated Chromium instance with snapshots, actions, form fill, scraping | Action: session tool | +| Canvas (A2UI) | Tool | Agent-driven visual workspace with live push/reset/eval/snapshot | Action: session tool | +| Cron | Tool | Schedule agent actions at specific times (e.g., daily summary at 9 AM) | Resource: hook (time-based) | +| Webhooks | Tool | External trigger points for agent actions (e.g., Gmail push) | Resource: hook (event-based) | +| Sessions | Tool | List sessions, inspect transcripts, send cross-session messages | Action: session management | +| Nodes | Tool | Camera snap/clip, screen record, location.get, notifications | Action: device capability | +| Shell Execution | Tool | Run shell commands on host machine | Action: session tool | +| File System | Tool | Read/write/manage files | Action: session tool | +| MCPorter | Tool | Discover, configure, authenticate, and call MCP servers via NL | Resource: MCP management | ### Memory Plugins -| Extension | Category | Description | AGH Mapping | -|-----------|----------|-------------|-------------| -| Memory Core | Memory | Markdown files + SQLite FTS5 + sqlite-vec (1536-dim embeddings) | Capability: memory.backend | -| Memory LanceDB | Memory | LanceDB vector storage with auto-recall/auto-capture | Capability: memory.backend | -| Memory LanceDB Pro | Memory | Hybrid Vector+BM25, cross-encoder rerank, multi-scope isolation, Weibull decay, smart extraction into 6 categories, three-tier lifecycle (Peripheral/Working/Core) | Capability: memory.backend | -| Dreaming/Consolidation | Memory | Background consolidation pass: collects short-term signals, scores candidates, promotes to long-term MEMORY.md. Opt-in, scheduled via cron, thresholded. | Maps to AGH internal/memory/consolidation | +| Extension | Category | Description | AGH Mapping | +| ---------------------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----------------------------------------- | +| Memory Core | Memory | Markdown files + SQLite FTS5 + sqlite-vec (1536-dim embeddings) | Capability: memory.backend | +| Memory LanceDB | Memory | LanceDB vector storage with auto-recall/auto-capture | Capability: memory.backend | +| Memory LanceDB Pro | Memory | Hybrid Vector+BM25, cross-encoder rerank, multi-scope isolation, Weibull decay, smart extraction into 6 categories, three-tier lifecycle (Peripheral/Working/Core) | Capability: memory.backend | +| Dreaming/Consolidation | Memory | Background consolidation pass: collects short-term signals, scores candidates, promotes to long-term MEMORY.md. Opt-in, scheduled via cron, thresholded. | Maps to AGH internal/memory/consolidation | ### Popular ClawHub Skills (Community) -| Extension | Category | Description | AGH Mapping | -|-----------|----------|-------------|-------------| -| Web Browsing | Skill | Navigate pages, extract content, follow links (180K+ installs) | Resource: skill | -| Telegram Integration | Skill | Connect agent to Telegram (145K+ installs) | Resource: channel skill | -| Capability Evolver | Skill | Self-improving agent capability evolution (35K+ downloads) | Resource: skill + Capability: agent meta-skill | -| Self-Improving Agent | Skill | Agent that improves its own performance (132 stars, highest rated) | Resource: skill | -| GOG (Google Workspace) | Skill | Gmail, Calendar, Drive, Contacts, Sheets, Docs in one skill | Resource: skill | -| Agent Browser | Skill | Full browser automation for web workflows (43 stars) | Resource: skill | -| Tavily Search | Skill | AI-optimized web search for agents | Resource: skill / MCP server | -| n8n Workflow Automation | Skill | Trigger and manage n8n automation workflows | Resource: skill / hook | -| MCP Integration | Skill | General MCP server connectivity wrapper | Resource: MCP | +| Extension | Category | Description | AGH Mapping | +| ----------------------- | -------- | ------------------------------------------------------------------ | ---------------------------------------------- | +| Web Browsing | Skill | Navigate pages, extract content, follow links (180K+ installs) | Resource: skill | +| Telegram Integration | Skill | Connect agent to Telegram (145K+ installs) | Resource: channel skill | +| Capability Evolver | Skill | Self-improving agent capability evolution (35K+ downloads) | Resource: skill + Capability: agent meta-skill | +| Self-Improving Agent | Skill | Agent that improves its own performance (132 stars, highest rated) | Resource: skill | +| GOG (Google Workspace) | Skill | Gmail, Calendar, Drive, Contacts, Sheets, Docs in one skill | Resource: skill | +| Agent Browser | Skill | Full browser automation for web workflows (43 stars) | Resource: skill | +| Tavily Search | Skill | AI-optimized web search for agents | Resource: skill / MCP server | +| n8n Workflow Automation | Skill | Trigger and manage n8n automation workflows | Resource: skill / hook | +| MCP Integration | Skill | General MCP server connectivity wrapper | Resource: MCP | ### Native Apps & Companions -| Extension | Category | Description | AGH Mapping | -|-----------|----------|-------------|-------------| -| macOS Menu Bar App | Native | Menu bar control plane, Voice Wake/PTT, Talk Mode overlay, WebChat, debug tools | N/A (AGH is CLI-first) | -| iOS App | Native | Canvas, screen snapshot, camera, location, talk mode, voice wake | N/A (potential future node) | -| Android App | Native | Customizable wake words ("Hey Claw", "Jarvis", "Computer"), offline Vosk detection, device control | N/A (potential future node) | -| Apple Watch | Native | Inbox UI and notification relay | N/A | -| QuickClaw (iOS) | Native (community) | Clean native iOS interface with notifications, haptics | N/A | -| MacClaw | Native (community) | All-in-one macOS app integrating WhatsApp, Slack, Teams, iMessage | N/A | +| Extension | Category | Description | AGH Mapping | +| ------------------ | ------------------ | -------------------------------------------------------------------------------------------------- | --------------------------- | +| macOS Menu Bar App | Native | Menu bar control plane, Voice Wake/PTT, Talk Mode overlay, WebChat, debug tools | N/A (AGH is CLI-first) | +| iOS App | Native | Canvas, screen snapshot, camera, location, talk mode, voice wake | N/A (potential future node) | +| Android App | Native | Customizable wake words ("Hey Claw", "Jarvis", "Computer"), offline Vosk detection, device control | N/A (potential future node) | +| Apple Watch | Native | Inbox UI and notification relay | N/A | +| QuickClaw (iOS) | Native (community) | Clean native iOS interface with notifications, haptics | N/A | +| MacClaw | Native (community) | All-in-one macOS app integrating WhatsApp, Slack, Teams, iMessage | N/A | ### Plugin Hook System -| Hook | Phase | Description | AGH Mapping | -|------|-------|-------------|-------------| -| `gateway_start` / `gateway_stop` | Lifecycle | Gateway process start/stop | Daemon boot/shutdown hooks | -| `session_start` / `session_end` | Session | Session lifecycle boundaries | Resource: hook | -| `before_agent_start` / `agent_end` | Agent | Agent lifecycle (deprecated in favor of before_prompt_build) | Resource: hook | -| `before_prompt_build` | Prompt | Intercept and modify system prompt before sending to LLM | Capability: prompt.provider | -| `before_tool_call` / `after_tool_call` | Tool | Guard or transform tool invocations (block/allow/requireApproval) | Capability: permission.gate + content.validate | -| `before_compaction` / `after_compaction` | Memory | Context window compaction events | Capability: message.transform | -| `message_received` / `message_sending` / `message_sent` | Message | Message pipeline interception (cancel support) | Capability: message.transform | -| `tool_result_persist` | Persistence | Transform tool results before transcript write | Capability: observe.exporter | -| `before_install` | Plugin | Guard plugin installation (block support) | Resource: hook | +| Hook | Phase | Description | AGH Mapping | +| ------------------------------------------------------- | ----------- | ----------------------------------------------------------------- | ---------------------------------------------- | +| `gateway_start` / `gateway_stop` | Lifecycle | Gateway process start/stop | Daemon boot/shutdown hooks | +| `session_start` / `session_end` | Session | Session lifecycle boundaries | Resource: hook | +| `before_agent_start` / `agent_end` | Agent | Agent lifecycle (deprecated in favor of before_prompt_build) | Resource: hook | +| `before_prompt_build` | Prompt | Intercept and modify system prompt before sending to LLM | Capability: prompt.provider | +| `before_tool_call` / `after_tool_call` | Tool | Guard or transform tool invocations (block/allow/requireApproval) | Capability: permission.gate + content.validate | +| `before_compaction` / `after_compaction` | Memory | Context window compaction events | Capability: message.transform | +| `message_received` / `message_sending` / `message_sent` | Message | Message pipeline interception (cancel support) | Capability: message.transform | +| `tool_result_persist` | Persistence | Transform tool results before transcript write | Capability: observe.exporter | +| `before_install` | Plugin | Guard plugin installation (block support) | Resource: hook | ### Tool Access Control Profiles -| Profile | Description | AGH Mapping | -|---------|-------------|-------------| -| `minimal` | Read-only tools only, for observer agents | Capability: permission.gate | -| `coding` | File ops + execution tools for dev agents | Capability: permission.gate | -| `messaging` | Adds cross-platform messaging on top of coding | Capability: permission.gate | -| `full` | All tools enabled, for trusted agents only | Capability: permission.gate | +| Profile | Description | AGH Mapping | +| ----------- | ------------------------------------------------------------ | --------------------------- | +| `minimal` | Read-only tools only, for observer agents | Capability: permission.gate | +| `coding` | File ops + execution tools for dev agents | Capability: permission.gate | +| `messaging` | Adds cross-platform messaging on top of coding | Capability: permission.gate | +| `full` | All tools enabled, for trusted agents only | Capability: permission.gate | | Tool Groups | Named collections (`group:runtime`, `group:fs`, `group:web`) | Capability: permission.gate | --- diff --git a/.compozy/tasks/ext-ideas/research/analysis_pi_mono.md b/docs/ideas/ext-ideas/research/analysis_pi_mono.md similarity index 56% rename from .compozy/tasks/ext-ideas/research/analysis_pi_mono.md rename to docs/ideas/ext-ideas/research/analysis_pi_mono.md index f1cecbbf2..200d7760a 100644 --- a/.compozy/tasks/ext-ideas/research/analysis_pi_mono.md +++ b/docs/ideas/ext-ideas/research/analysis_pi_mono.md @@ -14,6 +14,7 @@ Pi-Mono is a TypeScript monorepo that provides a minimal AI coding agent with an aggressively extensible architecture. Its philosophy is "what you leave out matters more than what you put in" -- the core ships with only 4 tools (read, write, edit, bash) and no built-in MCP, sub-agents, plan mode, or permission popups. Instead, all of these are buildable via extensions. Pi's extension model has four dimensions: + 1. **Extensions** -- TypeScript modules that hook into lifecycle events, register tools, add commands, customize UI 2. **Skills** -- Directory-based capability packages with a `SKILL.md` file, loaded on-demand (progressive disclosure) 3. **Prompt Templates** -- Reusable prompt shortcuts stored as Markdown @@ -29,70 +30,70 @@ The ecosystem is young but active, with 30+ community extensions, a curated awes ### Official Example Extensions (packages/coding-agent/examples/extensions/) -| Name | Category | Description | AGH Mapping | -|------|----------|-------------|-------------| -| `hello.ts` | Tool + Event | Minimal example: registers a "greet" tool, subscribes to `tool_call` events to block dangerous `rm -rf` commands, adds a `/hello` command | Resource (hook) + Capability (content.validate) | -| `todo.ts` | Stateful Tool | Persistent to-do list with state management across sessions | Resource (skill) + Action (session state) | -| `tool-override.ts` | Tool Override | Overrides the built-in `read` tool with custom behavior while inheriting default rendering | Capability (agent.driver tool override) | -| `dynamic-tools.ts` | Dynamic Tools | Registers/unregisters tools at runtime via `pi.setActiveTools()` | Capability (agent.driver dynamic registration) | -| `question.ts` | Interactive Tool | Tool that prompts the user for input via `ctx.ui` | Action (session interaction) | -| `questionnaire.ts` | Multi-step Wizard | Multi-step interactive tool with sequential user prompts | Action (session interaction) | -| `truncated-tool.ts` | Output Control | Demonstrates output truncation (50KB / 2000 lines limit) | Capability (content.validate) | -| `doom-overlay.ts` | TUI Overlay | Plays DOOM as a WebAssembly overlay in the terminal at 35 FPS | Resource (UI extension) | +| Name | Category | Description | AGH Mapping | +| ------------------- | ----------------- | ----------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------- | +| `hello.ts` | Tool + Event | Minimal example: registers a "greet" tool, subscribes to `tool_call` events to block dangerous `rm -rf` commands, adds a `/hello` command | Resource (hook) + Capability (content.validate) | +| `todo.ts` | Stateful Tool | Persistent to-do list with state management across sessions | Resource (skill) + Action (session state) | +| `tool-override.ts` | Tool Override | Overrides the built-in `read` tool with custom behavior while inheriting default rendering | Capability (agent.driver tool override) | +| `dynamic-tools.ts` | Dynamic Tools | Registers/unregisters tools at runtime via `pi.setActiveTools()` | Capability (agent.driver dynamic registration) | +| `question.ts` | Interactive Tool | Tool that prompts the user for input via `ctx.ui` | Action (session interaction) | +| `questionnaire.ts` | Multi-step Wizard | Multi-step interactive tool with sequential user prompts | Action (session interaction) | +| `truncated-tool.ts` | Output Control | Demonstrates output truncation (50KB / 2000 lines limit) | Capability (content.validate) | +| `doom-overlay.ts` | TUI Overlay | Plays DOOM as a WebAssembly overlay in the terminal at 35 FPS | Resource (UI extension) | ### Community Extensions (from awesome-pi-agent + npm) -| Name | Author | Category | Description | AGH Mapping | -|------|--------|----------|-------------|-------------| -| **filter-output** | community | Security | Redacts sensitive data (API keys, tokens, passwords) from tool results before the LLM sees them | Capability: **content.validate** -- sanitize output before it reaches the model | -| **security** | community | Permission Gate | Blocks dangerous bash commands and protects sensitive file paths from writes | Capability: **permission.gate** -- pre-execution validation | -| **safe-git** | community | Permission Gate | Requires user approval before dangerous git operations (force push, reset --hard, etc.) | Capability: **permission.gate** -- git-specific guard | -| **plan-mode** | community | Tool Override | Read-only exploration mode that restricts the agent to non-destructive tools only | Capability: **permission.gate** + **agent.driver** tool filtering | -| **oracle** | community | Multi-Model | Gets a second opinion from an alternative AI model without switching context | Capability: **agent.driver** (multi-model dispatch) | -| **handoff** | jayshah5696 | Session Management | Transfers context to a new focused session with editable handoff prompt | Action: **session** (fork/spawn with context transfer) | -| **memory-mode** | community | Persistent Memory | Saves instructions to AGENTS.md with AI-assisted integration | Capability: **memory.backend** -- instruction persistence | -| **cost-tracker** | hjanuschka | Observability | Session spending analysis from pi logs | Capability: **observe.exporter** -- cost metrics | -| **usage-bar** | hjanuschka | Observability | Persistent provider status indicator showing token/cost stats in footer | Capability: **observe.exporter** -- real-time metrics display | -| **pi-rewind** | arpagon | Checkpointing | Git-based file checkpoints per turn, `/rewind` command with diff preview, redo stack | Action: **session** (checkpoint/restore) + Resource (hook) | -| **pi-powerline-footer** | nicobailon | UI Customization | Powerline-style status bar with git integration, model switcher, editor stash (Alt+S), welcome overlay, "working vibes" (themed loading messages) | Resource (UI extension) | -| **pi-canvas** | community | UI Component | Interactive TUI canvases (calendar, document, flights) rendered inline | Resource (UI extension) | -| **background-notify** | community | Notifications | Audio beep + terminal focus when tasks complete | Capability: **observe.exporter** (notification channel) | -| **session-emoji** | community | UI | AI-powered emoji in footer representing conversation context | Resource (UI extension) | -| **session-color** | community | UI | Colored band in footer to distinguish active sessions | Resource (UI extension) | -| **pi-ssh-remote** | community | Remote Execution | Redirects all file operations and commands to a remote host via SSH | Capability: **agent.driver** (remote execution backend) | -| **pi-dcp** | community | Context Management | Dynamic context pruning for intelligent conversation optimization | Capability: **message.transform** | -| **pi-rtk** | sherif-fanous | Token Optimization | Routes bash commands through rtk for LLM token savings | Capability: **message.transform** (output compression) | -| **ultrathink** | community | UI Effect | Rainbow animated effect with Knight Rider shimmer during thinking | Resource (UI extension / theme) | -| **pi-gui** | community | UI | GUI extension providing visual interface for the pi agent | Resource (UI extension) | -| **pi-screenshots-picker** | community | Tool | Screenshot picker extension for better screenshot selections | Resource (tool) | -| **pi-super-curl** | community | Tool | Empowers curl requests with coding agent capabilities | Resource (tool) | -| **go-to-bed** | mitsuhiko | Safety Guard | Late-night safety guard with explicit confirmation after midnight | Capability: **permission.gate** (time-based) | -| **multi-edit** | mitsuhiko | Tool Override | Replaces built-in edit tool with batch multi-edits and Codex-style patch support with preflight validation | Capability: **agent.driver** (tool replacement) | -| **loop** | mitsuhiko | Workflow | Prompt loop for rapid iterative coding with optional auto-continue | Action: **session** (iteration control) | -| **context** | mitsuhiko | Observability | Context breakdown showing extensions, skills, AGENTS.md/CLAUDE.md with token usage | Capability: **observe.exporter** | -| **files** | mitsuhiko | Tool | Unified file browser with git status, session references, reveal/open/edit/diff actions | Resource (tool) | +| Name | Author | Category | Description | AGH Mapping | +| ------------------------- | ------------- | ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------- | +| **filter-output** | community | Security | Redacts sensitive data (API keys, tokens, passwords) from tool results before the LLM sees them | Capability: **content.validate** -- sanitize output before it reaches the model | +| **security** | community | Permission Gate | Blocks dangerous bash commands and protects sensitive file paths from writes | Capability: **permission.gate** -- pre-execution validation | +| **safe-git** | community | Permission Gate | Requires user approval before dangerous git operations (force push, reset --hard, etc.) | Capability: **permission.gate** -- git-specific guard | +| **plan-mode** | community | Tool Override | Read-only exploration mode that restricts the agent to non-destructive tools only | Capability: **permission.gate** + **agent.driver** tool filtering | +| **oracle** | community | Multi-Model | Gets a second opinion from an alternative AI model without switching context | Capability: **agent.driver** (multi-model dispatch) | +| **handoff** | jayshah5696 | Session Management | Transfers context to a new focused session with editable handoff prompt | Action: **session** (fork/spawn with context transfer) | +| **memory-mode** | community | Persistent Memory | Saves instructions to AGENTS.md with AI-assisted integration | Capability: **memory.backend** -- instruction persistence | +| **cost-tracker** | hjanuschka | Observability | Session spending analysis from pi logs | Capability: **observe.exporter** -- cost metrics | +| **usage-bar** | hjanuschka | Observability | Persistent provider status indicator showing token/cost stats in footer | Capability: **observe.exporter** -- real-time metrics display | +| **pi-rewind** | arpagon | Checkpointing | Git-based file checkpoints per turn, `/rewind` command with diff preview, redo stack | Action: **session** (checkpoint/restore) + Resource (hook) | +| **pi-powerline-footer** | nicobailon | UI Customization | Powerline-style status bar with git integration, model switcher, editor stash (Alt+S), welcome overlay, "working vibes" (themed loading messages) | Resource (UI extension) | +| **pi-canvas** | community | UI Component | Interactive TUI canvases (calendar, document, flights) rendered inline | Resource (UI extension) | +| **background-notify** | community | Notifications | Audio beep + terminal focus when tasks complete | Capability: **observe.exporter** (notification channel) | +| **session-emoji** | community | UI | AI-powered emoji in footer representing conversation context | Resource (UI extension) | +| **session-color** | community | UI | Colored band in footer to distinguish active sessions | Resource (UI extension) | +| **pi-ssh-remote** | community | Remote Execution | Redirects all file operations and commands to a remote host via SSH | Capability: **agent.driver** (remote execution backend) | +| **pi-dcp** | community | Context Management | Dynamic context pruning for intelligent conversation optimization | Capability: **message.transform** | +| **pi-rtk** | sherif-fanous | Token Optimization | Routes bash commands through rtk for LLM token savings | Capability: **message.transform** (output compression) | +| **ultrathink** | community | UI Effect | Rainbow animated effect with Knight Rider shimmer during thinking | Resource (UI extension / theme) | +| **pi-gui** | community | UI | GUI extension providing visual interface for the pi agent | Resource (UI extension) | +| **pi-screenshots-picker** | community | Tool | Screenshot picker extension for better screenshot selections | Resource (tool) | +| **pi-super-curl** | community | Tool | Empowers curl requests with coding agent capabilities | Resource (tool) | +| **go-to-bed** | mitsuhiko | Safety Guard | Late-night safety guard with explicit confirmation after midnight | Capability: **permission.gate** (time-based) | +| **multi-edit** | mitsuhiko | Tool Override | Replaces built-in edit tool with batch multi-edits and Codex-style patch support with preflight validation | Capability: **agent.driver** (tool replacement) | +| **loop** | mitsuhiko | Workflow | Prompt loop for rapid iterative coding with optional auto-continue | Action: **session** (iteration control) | +| **context** | mitsuhiko | Observability | Context breakdown showing extensions, skills, AGENTS.md/CLAUDE.md with token usage | Capability: **observe.exporter** | +| **files** | mitsuhiko | Tool | Unified file browser with git status, session references, reveal/open/edit/diff actions | Resource (tool) | ### Orchestration Tools (built on Pi SDK) -| Name | Category | Description | AGH Mapping | -|------|----------|-------------|-------------| -| **Grove** | Multi-Agent | Reads a structured markdown plan, parses work streams with dependencies, orchestrates parallel AI agents via web dashboard | Action: **session** (multi-agent orchestration) | -| **PiSwarm** | Multi-Agent | Parallel GitHub issue and PR processing using pi agent and git worktrees | Action: **session** (parallel agent dispatch) | -| **task-factory** | Queue Orchestration | Queue-first work orchestrator with planning, execution skills, and web UI | Action: **session** + **skills** (task queue) | -| **Gondolin** | Sandboxing | Linux micro-VM sandbox with programmable network/filesystem and Pi integration | Capability: **permission.gate** (sandboxed execution) | -| **pi-mobile** | Mobile Client | Android client for Pi coding agent with session management over Tailscale | Resource (client transport) | +| Name | Category | Description | AGH Mapping | +| ---------------- | ------------------- | -------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------- | +| **Grove** | Multi-Agent | Reads a structured markdown plan, parses work streams with dependencies, orchestrates parallel AI agents via web dashboard | Action: **session** (multi-agent orchestration) | +| **PiSwarm** | Multi-Agent | Parallel GitHub issue and PR processing using pi agent and git worktrees | Action: **session** (parallel agent dispatch) | +| **task-factory** | Queue Orchestration | Queue-first work orchestrator with planning, execution skills, and web UI | Action: **session** + **skills** (task queue) | +| **Gondolin** | Sandboxing | Linux micro-VM sandbox with programmable network/filesystem and Pi integration | Capability: **permission.gate** (sandboxed execution) | +| **pi-mobile** | Mobile Client | Android client for Pi coding agent with session management over Tailscale | Resource (client transport) | ### Skills Packages -| Name | Author | Skills Included | AGH Mapping | -|------|--------|-----------------|-------------| -| **agent-stuff (mitsupi)** | mitsuhiko (Armin Ronacher) | commit, changelog, github, web-browser, tmux, sentry, ghidra, google-workspace, mermaid, native-web-search, openscad, pi-share, summarize, uv, frontend-design, librarian | Resource: **skills** catalog | -| **pi-amplike** | community | Web search and webpage extraction via Jina APIs | Resource: **skills** (search/extraction) | +| Name | Author | Skills Included | AGH Mapping | +| ------------------------- | -------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------- | +| **agent-stuff (mitsupi)** | mitsuhiko (Armin Ronacher) | commit, changelog, github, web-browser, tmux, sentry, ghidra, google-workspace, mermaid, native-web-search, openscad, pi-share, summarize, uv, frontend-design, librarian | Resource: **skills** catalog | +| **pi-amplike** | community | Web search and webpage extraction via Jina APIs | Resource: **skills** (search/extraction) | ### Themes -| Name | Description | AGH Mapping | -|------|-------------|-------------| +| Name | Description | AGH Mapping | +| ---------------- | -------------------------------------------- | ---------------------------------------- | | **pi-rose-pine** | Rose Pine themes (main, moon, dawn variants) | Resource: theme (if AGH adds UI theming) | --- @@ -120,6 +121,7 @@ The ecosystem is young but active, with 30+ community extensions, a curated awes **AGH mapping**: Capability: `permission.gate` -- this is a direct match. AGH can define a `PermissionGate` interface that receives a tool call and returns allow/deny/prompt-user. **Implementation ideas**: + - Ship a default `permission.gate` with configurable deny-lists for bash patterns and file paths - Allow stacking multiple gates (security + safe-git both fire) - Gates should be orderable (priority field) so more specific gates can override general ones @@ -129,6 +131,7 @@ The ecosystem is young but active, with 30+ community extensions, a curated awes ### 3. OpenClaw's Context Pruning + Compaction Safeguards **What it does**: OpenClaw uses two Pi extensions for context management: + - `context-pruning.ts`: Cache-TTL based pruning that trims old tool results using a time-decay model (recent = full, old = head+tail, ancient = removed) - `compaction-safeguard.ts`: Multi-stage compaction pipeline that preserves file operation history and tool failure data, with adaptive token budgeting @@ -139,6 +142,7 @@ The four-layer strategy: message count limit, token count limit, TTL time decay, **AGH mapping**: Capability: `message.transform` (for the `context` event equivalent) + a new compaction strategy extension point. **Implementation ideas**: + - Define a `CompactionStrategy` interface in AGH that receives the message history and returns a compacted version - Ship a default strategy but allow override via configuration - The `context` event pattern (rewrite messages before each LLM call) maps to `message.transform` @@ -179,6 +183,7 @@ The four-layer strategy: message count limit, token count limit, TTL time decay, **AGH mapping**: Resource: `hook` (on tool completion) + Action: `session` (checkpoint/restore). **Implementation ideas**: + - AGH hook that fires after file-modifying tools complete - Store checkpoints as git stash entries or lightweight refs - Expose checkpoint browse/restore via UDS API for CLI access @@ -239,6 +244,7 @@ The four-layer strategy: message count limit, token count limit, TTL time decay, ### Pattern 1: Event-Driven Lifecycle Hooks Pi's event system provides these hook points: + - `session_start` / `session_switch` / `session_shutdown` -- session lifecycle - `before_agent_start` -- inject/modify system prompt before each agent turn - `tool_call` -- intercept, gate, or modify tool calls before execution @@ -249,6 +255,7 @@ Pi's event system provides these hook points: - `input` -- transform user input before skill/template expansion **AGH equivalent**: These map to AGH's hook system. The most valuable hooks for AGH: + 1. Pre-tool-execution gate (permission.gate) 2. Pre-LLM message transform (message.transform) 3. Pre-compaction override diff --git a/docs/ideas/ext-ideas/research/integrations.md b/docs/ideas/ext-ideas/research/integrations.md new file mode 100644 index 000000000..2b9947b73 --- /dev/null +++ b/docs/ideas/ext-ideas/research/integrations.md @@ -0,0 +1,346 @@ +# AGH Extension Ideas — Third-Party Integrations + +**Date**: 2026-04-11 +**Sources**: 4 parallel research agents covering DevOps/CI, Communication/Productivity, Data/AI/Search, and Browser/Media/Specialized integrations +**Purpose**: Catalog concrete third-party integrations that could be built as AGH extensions + +--- + +## Executive Summary + +Four parallel research agents surveyed the MCP ecosystem (21,000+ servers on Glama.ai) and mapped **120+ third-party integrations** across 12 categories. The key finding: **~80% of integrations have existing MCP servers** that AGH can wrap as subprocess extensions with minimal effort. The remaining 20% need custom extensions built from REST APIs. + +AGH's differentiator over standalone MCP servers is the **Host API** — extensions can combine external tool access with session memory, skills, observe events, and cross-tool orchestration to create stateful, context-aware workflows. + +--- + +## Priority Summary — Top 30 Integrations + +### Tier 1: Ship First (highest impact, production-ready MCP servers) + +| # | Integration | Category | MCP Status | Use Case | +| --- | ------------------ | -------------- | -------------------- | --------------------------------------------------------- | +| 1 | **GitHub** | DevOps | Official | PR lifecycle, issue management, code review automation | +| 2 | **Slack** | Communication | Official (47 tools) | Team Q&A bot, incident coordination, deploy notifications | +| 3 | **Linear** | Project Mgmt | Community | Ticket-to-PR automation, sprint ops, bug triage | +| 4 | **Notion** | Knowledge Base | Official | Living docs, research compilation, sprint planning | +| 5 | **Sentry** | Monitoring | Official | Error alert → investigate → fix → PR pipeline | +| 6 | **Playwright** | Browser | Official (Microsoft) | E2E testing, web scraping, form automation | +| 7 | **Supabase** | Database | Official (20+ tools) | Full BaaS: DB, auth, storage, edge functions | +| 8 | **Firecrawl** | Web Scraping | Official | Web-to-markdown, site crawling, content extraction | +| 9 | **GitHub Actions** | CI/CD | Community | CI monitoring, failure diagnosis, workflow optimization | +| 10 | **Stripe** | Finance | Official (25 tools) | Billing ops, subscription mgmt, revenue reports | + +### Tier 2: Build Next (strong value, mature ecosystem) + +| # | Integration | Category | MCP Status | Use Case | +| --- | --------------------- | -------------- | ----------------------- | -------------------------------------------------- | +| 11 | **Datadog** | Monitoring | Official (GA) | Observability investigation, latency diagnosis | +| 12 | **Google Workspace** | Productivity | Community (100+ tools) | Email, calendar, docs, sheets automation | +| 13 | **Figma** | Design | Official (Code Connect) | Design-to-code, component sync, design review | +| 14 | **Jira + Confluence** | Project Mgmt | Official (Atlassian) | Enterprise issue tracking, knowledge management | +| 15 | **Neon** | Database | Official | Branch-safe migrations, query tuning | +| 16 | **Terraform** | Infrastructure | Official (HashiCorp) | IaC provisioning, plan/apply workflows | +| 17 | **Kubernetes** | Infrastructure | Multiple | Pod debugging, deployment management, log analysis | +| 18 | **Snyk** | Security | Official (11 tools) | SAST, SCA, container scanning, SBOM | +| 19 | **SonarQube** | Security | Official (423 stars) | Code quality gates, tech debt tracking | +| 20 | **Brave Search** | Web Search | Official | Privacy-first research, error investigation | + +### Tier 3: Differentiate (strategic value, growing demand) + +| # | Integration | Category | MCP Status | Use Case | +| --- | ------------------ | ------------- | ---------- | ---------------------------------------------- | +| 21 | **Grafana** | Monitoring | Official | Dashboard-driven diagnosis, anomaly detection | +| 22 | **PagerDuty** | Monitoring | Community | On-call copilot, incident lifecycle management | +| 23 | **Ollama** | AI/ML | Community | Local model inference, privacy-sensitive ops | +| 24 | **OpenRouter** | AI/ML | Community | Multi-model gateway, cost-optimized inference | +| 25 | **n8n** | Automation | Community | Self-hosted workflow automation (1,396 nodes) | +| 26 | **Home Assistant** | IoT | Official | Smart home control, energy monitoring | +| 27 | **AWS S3** | Cloud Storage | Official | File management, data pipeline triggers | +| 28 | **Exa** | Web Search | Community | Neural semantic search for research | +| 29 | **PostHog** | Analytics | Official | Product analytics, feature flags, experiments | +| 30 | **Twitter/X** | Social Media | Community | Social media management, brand monitoring | + +--- + +## Detailed Integration Catalog + +### 1. DevOps & Developer Tools + +| Integration | MCP Server | Tools | Key Capability | Example AGH Workflow | +| ----------------------- | ----------------------------- | --------------------- | ------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------- | +| **GitHub** | Official + community | Full API | PRs, issues, code search, branches, Actions | Agent receives Linear ticket → researches codebase → implements fix → opens PR → monitors CI → responds to review comments | +| **GitLab** | Official | MR, pipelines, issues | Merge requests, CI pipelines, code browsing | Pipeline fails → agent reads logs → correlates with recent MR → auto-fixes or creates issue with root cause | +| **GitHub Actions** | Community | Workflow mgmt | Trigger, cancel, rerun workflows, read logs | Agent monitors builds → diagnoses flaky tests → detects failure patterns → suggests workflow optimizations | +| **CircleCI** | Official | Failure diagnosis | Error summaries, flaky test detection, rollbacks | Build fails → agent diagnoses → correlates with commits → creates fix PR or triggers rollback | +| **Jenkins** | Official plugin | CI/CD automation | Job management, build logs, pipeline control | Enterprise CI management with complex multi-stage pipelines | +| **ArgoCD** | K8s MCP Toolkit | GitOps | App sync, deployment status, rollback | Agent monitors sync status → detects drift → checks pod health → applies fix or rolls back | +| **Vercel** | Official handler | Deployments | Deploy, rollback, environment mgmt | Agent deploys to staging → runs smoke tests → promotes to production → posts summary | +| **Railway** | Official | Service mgmt | Deploy, scale, configure environments | Agent manages Railway services lifecycle | +| **SonarQube** | Official (423 stars) | Quality gates | Bugs, vulnerabilities, code smells, tech debt | PR created → agent runs analysis → auto-fixes simple issues → blocks merge if quality gate fails | +| **Snyk** | Official (11 tools) | Security scanning | SAST, SCA, IaC, container, SBOM, AI-BOM | Nightly scans → triage by severity → auto-PR for critical vulns → SBOM for compliance | +| **Semgrep** | Built into binary | Static analysis | Custom rules, vulnerability detection | Pre-commit scanning → inline PR comments with fix suggestions | +| **Terraform** | Official (HashiCorp) | IaC | Registry, plan, apply, workspace mgmt | Agent generates HCL → runs plan → presents for approval → applies → updates docs | +| **Pulumi** | Official | IaC (code-based) | Infrastructure in Go/TS/Python | Agent writes Pulumi Go code for infrastructure changes | +| **Kubernetes** | Multiple (kubectl, k8m, Lens) | Cluster mgmt | Pods, logs, events, helm, istio | Alert → agent checks pods → reads logs → identifies OOM kill → scales up → creates post-mortem | +| **AWS** | Official (60+ servers) | Cloud ops | Lambda, ECS, S3, EC2, RDS, CloudWatch | Agent monitors cloud costs → identifies unused resources → proposes cleanup with savings | +| **Docker** | Community | Container mgmt | Build, run, manage containers | Agent builds images → runs tests in containers → pushes to registry | +| **Dependabot/Renovate** | **None (build opportunity)** | — | Dependency update management | Agent monitors deps → creates grouped PRs → runs security scans → auto-merges safe patches | + +### 2. Communication & Messaging + +| Integration | MCP Server | Tools | Key Capability | Example AGH Workflow | +| ------------------- | -------------------------------- | ----------------- | --------------------------------------------- | --------------------------------------------------------------------------------------------- | +| **Slack** | Official (47 tools, GA Feb 2026) | Full workspace | Channels, messages, threads, canvases, search | Agent monitors #help-engineering → researches codebase → posts threaded answer with code refs | +| **Discord** | Community (multiple) | Server mgmt | Channels, messages, forums, reactions | Community support bot → searches docs + past issues → provides answers | +| **Microsoft Teams** | Official (Work IQ) | Chat/channels | Create chats, post messages, manage channels | Meeting prep agent → pulls docs from SharePoint → posts briefing to Teams channel | +| **Telegram** | Community (multiple) | Bot API + MTProto | Messaging, media, groups | Ops notification pipeline → deployment status, health alerts, CI results | +| **WhatsApp** | Community (beta) | Web API | Send/receive messages | Customer response agent → looks up CRM → drafts contextual replies | +| **Email (Gmail)** | Community (100+ tools) | Full Gmail API | Send, read, search, label, filter | Email triage → categorize by urgency → draft routine replies → escalate important ones | +| **Email (Outlook)** | Official (Work IQ) | Graph API | Messages, calendar, files | Report distributor → generates status reports → formats as email → sends to stakeholders | +| **Matrix** | **None (build opportunity)** | — | Decentralized messaging | Self-hosted comms agent for privacy-focused organizations | + +### 3. Productivity & Project Management + +| Integration | MCP Server | Tools | Key Capability | Example AGH Workflow | +| -------------------- | ------------------------- | -------------- | ---------------------------------------- | ------------------------------------------------------------------------------------------- | +| **Notion** | Official | Full API | Pages, databases, blocks, search | Code changes → agent auto-updates Notion docs → cross-references existing pages | +| **Obsidian** | Community (60+ servers) | Vault access | Read, write, search, tags, backlinks | Personal knowledge agent → auto-creates notes from conversations → links related concepts | +| **Google Workspace** | Community (100+ tools) | Full suite | Gmail, Calendar, Docs, Sheets, Drive | Meeting notes agent → records action items → creates Doc → assigns tasks → sends follow-ups | +| **Microsoft 365** | Official + community | Full suite | Word, Excel, SharePoint, OneDrive, Teams | Onboarding automator → sets up accounts → creates folders → sends welcome email | +| **Linear** | Community | Full API | Issues, projects, cycles, teams | Auto-ticket from TODO/FIXME → sprint reporter → bug-to-fix pipeline → PR-to-ticket linker | +| **Jira** | Official (Atlassian Rovo) | OAuth 2.1 | JQL, epics, sprints, transitions | Ticket auto-population → enriches with codebase context → cross-system sync | +| **Confluence** | Official (Atlassian Rovo) | OAuth 2.1 | Pages, spaces, search | Runbook maintainer → architecture doc generator → post-mortem writer | +| **Asana** | Official (mcp.asana.com) | Full API | Tasks, projects, sections, custom fields | Task breakdown agent → high-level description → subtasks with estimates and dependencies | +| **Monday.com** | Official | GraphQL API | Boards, items, updates, documents | Board automator → external events create/update items automatically | +| **ClickUp** | Community | Broad coverage | Tasks, docs, goals, OKRs, chat | OKR tracker → monitors key results → weekly updates → flags at-risk objectives | +| **Shortcut** | Official (hosted) | OAuth | Stories, Epics, Docs, iterations | Story enricher → researches codebase → adds technical details and acceptance criteria | +| **Figma** | Official (Code Connect) | Design data | Nodes, auto-layout, variants, tokens | Design-to-code → reads frame → maps to codebase components → generates production React | +| **Miro** | Official (beta) | Board mgmt | Elements, boards, collaboration | Architecture diagrammer → creates system diagrams from codebase analysis | +| **Excalidraw** | Official + community | Canvas toolkit | Elements, real-time sync, WebSocket | Architecture sketch → generates diagrams from natural language descriptions | + +### 4. Databases & Data + +| Integration | MCP Server | Tools | Key Capability | Example AGH Workflow | +| ---------------------- | ----------------------------------- | -------------------- | ---------------------------------------------------- | ------------------------------------------------------------------------------------- | +| **PostgreSQL** | Multiple (official, Google Toolbox) | SQL access | Read-only queries, schema inspection | Bug investigation → query production DB → find anomaly → write migration + code patch | +| **MySQL** | Community + Google Toolbox | SQL access | Query, schema browsing | Legacy system analysis → understand schema → generate Go structs | +| **MongoDB** | Official | Document CRUD | Aggregation pipelines, Atlas mgmt | Identify slow aggregations → propose index optimizations → validate with explain() | +| **Redis** | Official (Dec 2025) | Data mgmt | Keys, TTLs, pub/sub, search | Debug cache stampede → inspect TTLs → implement jittered expiration fix | +| **Supabase** | Official (20+ tools, OAuth) | Full BaaS | DB, auth, storage, edge functions | Bootstrap entire backend through natural language in single session | +| **Neon** | Official (29 tools) | Serverless Postgres | Branch-based migrations, query tuning | Create branch → test migration → validate queries → merge or rollback | +| **PlanetScale** | Official | MySQL branching | Branches, deploy requests | Feature branch → apply migrations → run integration tests → deploy request | +| **Turso** | Official (--mcp flag) | Edge SQLite | Schema design, data ops | Set up edge database → design schema → generate client code → deploy replicas | +| **DynamoDB** | Official (AWS Labs) | NoSQL modeling | Design patterns, cost analysis, code gen | Analyze MySQL schema → design DynamoDB single-table model → generate Go SDK code | +| **Google MCP Toolbox** | Official (Google) | **40+ data sources** | Postgres, MySQL, MongoDB, Redis, Neo4j, Snowflake... | Single extension → access any database → join data across engines | + +### 5. Vector Stores & Search + +| Integration | MCP Server | Tools | Key Capability | Example AGH Workflow | +| ----------------- | ----------------------------- | --------------------- | ---------------------------------------- | ----------------------------------------------------------------------------------- | +| **LanceDB** | Community (multiple) | Embedded vectors | Zero-config, disk-based, semantic search | AGH semantic memory backend → index memories + skills → retrieve during sessions | +| **Qdrant** | Official | Vector search | HNSW, filtered search, code search | Index codebase → semantic code search during debugging → find similar patterns | +| **Milvus** | Official (5 search tools) | Industrial-scale | Billion-vector, hybrid search | Large codebase indexing → semantic function search → incident similarity matching | +| **Pinecone** | Via unified (weave-mcp) | Managed vectors | Billion-scale managed | Documentation indexing → semantic search during coding sessions | +| **Elasticsearch** | Community + Google Toolbox | Full-text + analytics | Index mgmt, document ops | Search application logs → diagnose production errors → generate root cause analysis | +| **Algolia** | Official (Go + Node + hosted) | Enterprise search | Synonyms, ranking, analytics | Configure search indexes → set up ranking rules → test quality → deploy | +| **Meilisearch** | Official | Dev-friendly search | Typo-tolerant, fast | Index knowledge base → instant typo-tolerant search → build UI component | +| **Firecrawl** | Official (98K stars) | Web-to-markdown | Crawl, scrape, media parse | Crawl competitor docs → convert to markdown → index for semantic search | +| **Brave Search** | Official | Privacy-first search | Independent index, no tracking | Web research for debugging → search error messages, Stack Overflow, GitHub issues | +| **Exa** | Community | Neural search | Semantic understanding | Semantic code search → "Go implementation of event sourcing with SQLite" | +| **Tavily** | Community | RAG-optimized | Fact-checked, concise results | Research tasks → find API docs, known issues, best practices | + +### 6. AI/ML Platforms + +| Integration | MCP Server | Tools | Key Capability | Example AGH Workflow | +| ---------------- | -------------- | -------------------- | --------------------------------- | --------------------------------------------------------------------------------------------------------- | +| **Ollama** | Community | Local inference | Run Llama, Mistral, etc. locally | Local embedding generation → privacy-sensitive code analysis → offline operation | +| **OpenRouter** | Community | 100+ models | Multi-model gateway, cost routing | Dynamic model selection → fast model for quick questions → reasoning model for complex analysis | +| **Hugging Face** | Community | Model discovery | Hub access, model search | Search for embedding model → download via Ollama → benchmark on domain data → configure as memory backend | +| **Replicate** | Community | Hosted inference | Run any OSS model via API | Image generation for mockups → specialized NLP models for code analysis | +| **Groq** | Via OpenRouter | Ultra-fast inference | Custom LPU hardware | Rapid code analysis → real-time review suggestions during pair programming | + +### 7. Monitoring & Observability + +| Integration | MCP Server | Tools | Key Capability | Example AGH Workflow | +| ------------- | ---------------------- | ------------------ | ----------------------------------------- | ------------------------------------------------------------------------------------------------------- | +| **Sentry** | Official | Error tracking | Stack traces, error frequency, releases | Critical error → query Sentry → search codebase → create fix PR → verify error rate drops | +| **Datadog** | Official (GA Mar 2026) | Full observability | Logs, metrics, traces, APM, SLOs | Latency spike → query traces → correlate with deployment → identify commit → revert PR | +| **Grafana** | Official | Dashboard access | Data sources, incidents, metrics | Query dashboards for anomalies → correlate with deployments → generate incident summaries | +| **PagerDuty** | Community | Incident mgmt | Acknowledge, resolve, reassign, analytics | Alert → acknowledge → gather context from Datadog/Sentry → diagnose → remediate → resolve → post-mortem | + +### 8. Automation & Workflow + +| Integration | MCP Server | Tools | Key Capability | Example AGH Workflow | +| ------------- | ---------- | ----------------- | ----------------------------------- | -------------------------------------------------------------------------------------- | +| **Zapier** | Official | 8,000+ apps | Cross-app automation, auth handling | Code review → create Jira ticket + Slack summary + Google Sheet update in one session | +| **n8n** | Community | 1,396 nodes | Self-hosted, privacy-first | Design workflow → monitor GitHub for issues → classify with AI → assign → track | +| **Inngest** | Official | Durable execution | Go SDK, event-driven, AgentKit | Orchestrate deployment pipeline → test → build → stage → smoke test → promote → notify | +| **Temporal** | Community | Durable workflows | Retry logic, long-running processes | Data pipeline orchestration → retry handlers → compensation → monitoring | +| **Pipedream** | Community | 2,700+ apps | Code-first (Python/Node/Go/Bash) | Webhook listener → process alerts → trigger remediation | + +### 9. Finance + +| Integration | MCP Server | Tools | Key Capability | Example AGH Workflow | +| ----------------- | ---------------------------- | ----------------- | ------------------------------------------- | ------------------------------------------------------------------------------------ | +| **Stripe** | Official (25 tools) | Payment lifecycle | Customers, subscriptions, invoices, refunds | Monitor churn → generate invoices → create discount codes → revenue reports | +| **Coinbase** | Official | Crypto ops | Wallet mgmt, onramps, stablecoins | Portfolio management → track balances → execute trades → tax reporting | +| **Yahoo Finance** | Community | Market data | Prices, fundamentals, earnings | Stock screening → earnings analysis → peer comparison → research notes | +| **Plaid** | **None (build opportunity)** | — | Bank account aggregation | Connect bank accounts → categorize transactions → spending patterns → budget reports | + +### 10. Browser & Media + +| Integration | MCP Server | Tools | Key Capability | Example AGH Workflow | +| ----------------- | ---------------------- | ---------------- | ----------------------------------- | ---------------------------------------------------------------------------------- | +| **Playwright** | Official (Microsoft) | Full browser | Click, fill, navigate, screenshot | E2E testing → navigate pages → assert content → report results via observe API | +| **Browserbase** | Official | Cloud browsers | Bot evasion, managed sessions | Competitive intelligence → scrape JS-heavy pricing pages → extract structured data | +| **YouTube** | Community (490+ stars) | Transcripts | Transcript extraction, search | Research playlist → extract transcripts → summarize → build knowledge base | +| **DALL-E / Flux** | Community | Image generation | Text-to-image | Generate diagrams, illustrations, hero images for documentation | +| **ElevenLabs** | Community | TTS | Voice synthesis | Convert blog posts to podcast-style audio narration | +| **Spotify** | Community (93 tools) | Music control | Playback, playlists, catalog search | Curate workout playlists → analyze track features → learn preferences over time | + +### 11. Social Media + +| Integration | MCP Server | Tools | Key Capability | Example AGH Workflow | +| -------------- | ---------------------- | ---------------- | ------------------------------------ | -------------------------------------------------------------------------------------------- | +| **Twitter/X** | Community (8+ servers) | Posting, search | Tweets, threads, mentions, analytics | Draft tweets from product updates → schedule threads → monitor engagement → weekly analytics | +| **Bluesky** | Community (57 tools) | Full AT Protocol | Posting, firehose, social graph | Cross-post content → monitor brand mentions → audience analytics | +| **LinkedIn** | Via aggregators | Posting | Articles, engagement tracking | Draft LinkedIn articles from internal knowledge → optimize posting times | +| **recast-mcp** | Community | Multi-platform | URL → platform-specific content | Blog post → LinkedIn article + Twitter thread + Reddit post + newsletter | + +### 12. Specialized & Niche + +| Integration | MCP Server | Tools | Key Capability | Example AGH Workflow | +| ------------------- | --------------------- | ----------------- | ----------------------------------------- | ------------------------------------------------------------------------------------------------- | +| **Home Assistant** | Official (built-in) | IoT control | Devices, automations, energy | Manage daily routines → lighting, HVAC, security → energy reports | +| **AWS S3** | Official (multiple) | Object storage | Buckets, objects, presigned URLs | Monitor for new files → process CSVs → generate presigned URLs → manage lifecycle | +| **Google Maps** | Official (18+ tools) | Geolocation | Geocoding, routing, POI search | Optimize multi-stop delivery routes → calculate ETAs → generate static maps | +| **Mapbox** | Official | Geospatial | Routing, isochrones, map matching | Real estate analysis → isochrone maps → commute times → nearby amenities | +| **SendGrid** | Community (14+ tools) | Email marketing | Campaigns, templates, deliverability | Create campaigns from briefs → manage segments → A/B test subjects | +| **Meta-MCP (Magg)** | Community | Self-extending | Discover + install MCP servers at runtime | Agent lacks a tool → discovers and installs appropriate MCP server → uses it → permanently learns | +| **Agoragentic** | Community | Agent marketplace | Agent-to-agent services + crypto payments | Agent hires specialized agents for subtasks → pays via USDC on Base L2 | + +--- + +## Build-From-Scratch Opportunities + +These integrations have no existing MCP server and represent differentiation for AGH: + +| Integration | What to Build | Why It Matters | +| -------------------------- | ------------------------------------------------- | --------------------------------------------------- | +| **Dependabot/Renovate** | Dependency update mgmt with security scanning | Combining updates + security + auto-merge is unique | +| **GitHub Security Alerts** | Dependabot alerts, secret scanning, code scanning | Notable gap — GitHub's security features lack MCP | +| **Plaid** | Banking data aggregation | Personal finance agent enabler | +| **Matrix** | Decentralized messaging | Serves privacy-focused organizations | +| **MQTT (standalone)** | IoT device communication | Industrial monitoring beyond Home Assistant | +| **Remotion** | Programmatic video in React | Data visualization videos | +| **ConvertKit** | Creator email platform | Creator economy automation | +| **LaunchDarkly** | Feature flag management | Agent-controlled progressive rollouts | +| **Incident.io** | Modern incident management | Growing platform with no MCP | + +--- + +## Recommended Extension Bundles + +### Bundle 1: Development Lifecycle + +**Goal**: Ticket → Code → PR → Merged, fully autonomous + +- GitHub MCP (version control, PRs) +- Linear or Jira MCP (issue tracking) +- GitHub Actions MCP (CI monitoring) +- SonarQube + Snyk MCP (quality + security gates) +- Slack MCP (team notifications) + +### Bundle 2: Incident Response + +**Goal**: Alert → Diagnose → Fix → Resolve, with cross-tool investigation + +- Sentry MCP (error tracking) +- Datadog MCP (metrics, traces, logs) +- PagerDuty MCP (incident lifecycle) +- Grafana MCP (dashboards) +- Kubernetes MCP (infrastructure) +- Slack MCP (coordination) +- GitHub MCP (fix PRs) + +### Bundle 3: Infrastructure Operations + +**Goal**: Provision → Deploy → Monitor → Optimize + +- Terraform MCP (IaC) +- AWS/GCP/Azure MCP (cloud resources) +- Kubernetes + ArgoCD MCP (orchestration) +- Datadog/Grafana MCP (monitoring) +- Slack MCP (notifications) + +### Bundle 4: Knowledge Worker + +**Goal**: Research → Document → Share → Keep Updated + +- Notion/Confluence MCP (knowledge base) +- Google Workspace MCP (email, docs, sheets) +- Firecrawl MCP (web research) +- Brave/Exa MCP (search) +- Figma MCP (design context) +- Obsidian MCP (personal knowledge) + +### Bundle 5: Data & Analytics + +**Goal**: Query → Analyze → Report → Automate + +- Google MCP Toolbox (40+ data sources) +- Supabase/Neon MCP (primary databases) +- LanceDB/Qdrant MCP (vector search) +- PostHog MCP (product analytics) +- n8n/Inngest MCP (workflow automation) + +--- + +## Architecture Recommendations + +### 1. Thin Wrapper Pattern (Default) + +Most integrations have existing MCP servers. AGH extensions wrap them as subprocesses, adding: + +- Session context (memory, workspace awareness) +- Observe event emission (audit trail) +- Credential management (TOML config) +- Cross-tool orchestration (compose multiple MCP servers in one workflow) + +### 2. Unified Gateway Pattern (For Categories) + +For categories with many providers (databases, search, vector stores), use a single extension that supports multiple backends: + +- **Google MCP Toolbox** covers 40+ data sources +- **weave-mcp** covers 11 vector databases +- **MCP Omnisearch** covers 7 search providers +- **Composio** covers thousands of APIs + +### 3. Security Boundaries + +- 43% of public MCP servers have command injection vulnerabilities +- 7.6% of ClawHub skills contain dangerous patterns +- AGH extensions must enforce permission boundaries, rate limiting, and audit logging +- Read-only by default; write access requires explicit opt-in +- Credentials managed via TOML config, never hardcoded + +### 4. AGH Differentiator + +Unlike standalone MCP servers, AGH extensions can: + +- **Remember** — Store findings in session memory for future reference +- **Learn** — Generate skills from successful workflows +- **Orchestrate** — Compose multiple tools across services in a single session +- **Observe** — Record full audit trail of cross-system operations + +--- + +## Sources + +Detailed per-category research files: + +- [integrations_devops.md](research/integrations_devops.md) — 37 integrations across DevOps/CI/CD +- [integrations_communication.md](research/integrations_communication.md) — 35 integrations across comms/productivity +- [integrations_data_ai.md](research/integrations_data_ai.md) — 50+ integrations across data/AI/search +- [integrations_specialized.md](research/integrations_specialized.md) — 50 integrations across browser/media/finance/IoT/niche diff --git a/.compozy/tasks/ext-ideas/research/integrations_communication.md b/docs/ideas/ext-ideas/research/integrations_communication.md similarity index 86% rename from .compozy/tasks/ext-ideas/research/integrations_communication.md rename to docs/ideas/ext-ideas/research/integrations_communication.md index ec5ea782e..0d1a9f93a 100644 --- a/.compozy/tasks/ext-ideas/research/integrations_communication.md +++ b/docs/ideas/ext-ideas/research/integrations_communication.md @@ -8,40 +8,40 @@ This document catalogs communication, productivity, and business tool integratio ## Master Integration Table -| Category | Integration | MCP Server Exists? | Server Source | Maturity | AGH Priority | -|---|---|---|---|---|---| -| **Communication** | Slack | Yes (Official + Community) | Slack official (47 tools), korotovsky/slack-mcp-server | Production | High | -| | Discord | Yes (Community) | Multiple community servers | Stable | Medium | -| | Microsoft Teams | Yes (Official) | Microsoft Work IQ Teams MCP | Production | Medium | -| | Telegram | Yes (Community) | overpod, sparfenyuk, kfastov servers | Stable | Medium | -| | WhatsApp | Yes (Community) | lharries/whatsapp-mcp, Sinch MCP | Beta | Low | -| | Email (Gmail) | Yes (Multiple) | taylorwilsdon/google_workspace_mcp | Production | High | -| | Email (Outlook) | Yes (Official + Community) | Microsoft Work IQ Mail, Softeria ms-365-mcp | Production | High | -| | Matrix | No | Needs to be built | N/A | Low | -| **Productivity** | Google Workspace | Yes (Multiple) | taylorwilsdon/google_workspace_mcp (100+ tools) | Production | High | -| | Microsoft 365 | Yes (Official + Community) | Work IQ servers, Softeria, PnP CLI | Production | High | -| | Notion | Yes (Official) | Notion official MCP | Production | High | -| | Obsidian | Yes (Community) | mcpvault, cyanheads, MarkusPfundstein (60+ servers) | Stable | Medium | -| | Roam Research | No | Needs to be built | N/A | Low | -| **Project Mgmt** | Linear | Yes (Community) | jerhadf/linear-mcp-server, DX Heroes | Stable | High | -| | Jira | Yes (Official + Community) | Atlassian Rovo MCP (OAuth 2.1), 74+ servers | Production | High | -| | Asana | Yes (Official) | Official at mcp.asana.com, roychri community | Production | Medium | -| | Monday.com | Yes (Official) | mondaycom/mcp (GraphQL API) | Production | Medium | -| | Trello | Yes (Community) | delorenj/mcp-server-trello, Composio | Stable | Low | -| | ClickUp | Yes (Community) | taazkareem/clickup-mcp-server | Stable | Medium | -| | Basecamp | Yes (Community) | georgeantonopoulos/Basecamp-MCP-Server (75 tools) | Stable | Low | -| | Shortcut | Yes (Official) | Official hosted server (OAuth) | Production | Medium | -| **Knowledge Base** | Confluence | Yes (Official) | Atlassian Rovo MCP | Production | High | -| | GitBook | Yes (Auto-generated) | Auto-generated per docs site, MCPBook | Production | Medium | -| | ReadMe | Yes | MCP server + llms.txt | Stable | Low | -| | Mintlify | Yes (Auto-generated) | Auto-generated per docs site (free) | Production | Low | -| **CRM/Sales** | Salesforce | Yes (Official) | Salesforce Agentforce MCP (60+ tools) | Production | Medium | -| | HubSpot | Yes (Community) | Community server (116 stars, FAISS search) | Stable | Medium | -| | Pipedrive | Yes (Community) | iamsamuelfraga/mcp-pipedrive | Stable | Low | -| | Attio | Yes (Official) | Official at mcp.attio.com + kesslerio community | Production | Low | -| **Design** | Figma | Yes (Official) | Official at mcp.figma.com (Code Connect) | Production | High | -| | Miro | Yes (Official) | Official Miro MCP Server | Beta | Medium | -| | Excalidraw | Yes (Official + Community) | excalidraw/excalidraw-mcp, yctimlin | Stable | Medium | +| Category | Integration | MCP Server Exists? | Server Source | Maturity | AGH Priority | +| ------------------ | ---------------- | -------------------------- | ------------------------------------------------------ | ---------- | ------------ | +| **Communication** | Slack | Yes (Official + Community) | Slack official (47 tools), korotovsky/slack-mcp-server | Production | High | +| | Discord | Yes (Community) | Multiple community servers | Stable | Medium | +| | Microsoft Teams | Yes (Official) | Microsoft Work IQ Teams MCP | Production | Medium | +| | Telegram | Yes (Community) | overpod, sparfenyuk, kfastov servers | Stable | Medium | +| | WhatsApp | Yes (Community) | lharries/whatsapp-mcp, Sinch MCP | Beta | Low | +| | Email (Gmail) | Yes (Multiple) | taylorwilsdon/google_workspace_mcp | Production | High | +| | Email (Outlook) | Yes (Official + Community) | Microsoft Work IQ Mail, Softeria ms-365-mcp | Production | High | +| | Matrix | No | Needs to be built | N/A | Low | +| **Productivity** | Google Workspace | Yes (Multiple) | taylorwilsdon/google_workspace_mcp (100+ tools) | Production | High | +| | Microsoft 365 | Yes (Official + Community) | Work IQ servers, Softeria, PnP CLI | Production | High | +| | Notion | Yes (Official) | Notion official MCP | Production | High | +| | Obsidian | Yes (Community) | mcpvault, cyanheads, MarkusPfundstein (60+ servers) | Stable | Medium | +| | Roam Research | No | Needs to be built | N/A | Low | +| **Project Mgmt** | Linear | Yes (Community) | jerhadf/linear-mcp-server, DX Heroes | Stable | High | +| | Jira | Yes (Official + Community) | Atlassian Rovo MCP (OAuth 2.1), 74+ servers | Production | High | +| | Asana | Yes (Official) | Official at mcp.asana.com, roychri community | Production | Medium | +| | Monday.com | Yes (Official) | mondaycom/mcp (GraphQL API) | Production | Medium | +| | Trello | Yes (Community) | delorenj/mcp-server-trello, Composio | Stable | Low | +| | ClickUp | Yes (Community) | taazkareem/clickup-mcp-server | Stable | Medium | +| | Basecamp | Yes (Community) | georgeantonopoulos/Basecamp-MCP-Server (75 tools) | Stable | Low | +| | Shortcut | Yes (Official) | Official hosted server (OAuth) | Production | Medium | +| **Knowledge Base** | Confluence | Yes (Official) | Atlassian Rovo MCP | Production | High | +| | GitBook | Yes (Auto-generated) | Auto-generated per docs site, MCPBook | Production | Medium | +| | ReadMe | Yes | MCP server + llms.txt | Stable | Low | +| | Mintlify | Yes (Auto-generated) | Auto-generated per docs site (free) | Production | Low | +| **CRM/Sales** | Salesforce | Yes (Official) | Salesforce Agentforce MCP (60+ tools) | Production | Medium | +| | HubSpot | Yes (Community) | Community server (116 stars, FAISS search) | Stable | Medium | +| | Pipedrive | Yes (Community) | iamsamuelfraga/mcp-pipedrive | Stable | Low | +| | Attio | Yes (Official) | Official at mcp.attio.com + kesslerio community | Production | Low | +| **Design** | Figma | Yes (Official) | Official at mcp.figma.com (Code Connect) | Production | High | +| | Miro | Yes (Official) | Official Miro MCP Server | Beta | Medium | +| | Excalidraw | Yes (Official + Community) | excalidraw/excalidraw-mcp, yctimlin | Stable | Medium | --- @@ -56,10 +56,12 @@ This document catalogs communication, productivity, and business tool integratio **What it does:** Full Slack workspace integration -- read channels, search message history, send messages, manage canvases, thread conversations, and manage users. **MCP servers available:** + - **Official Slack MCP Server** (GA February 2026): 47 tools, OAuth authentication, enterprise-grade security. Supports DMs, group DMs, channels, threads, and canvases. - **korotovsky/slack-mcp-server** (Community): Supports Stdio/SSE/HTTP transports, proxy settings, DMs, Group DMs, smart history fetch. 9,000+ active users, 30,000+ monthly visitors. **AGH agent use cases:** + - **Codebase Q&A bot:** Agent monitors a `#help-engineering` channel, detects questions about the codebase, researches the answer using workspace memory and file access, then posts a threaded reply with code references. - **Standup summarizer:** Agent reads daily standup messages from a channel, consolidates blockers and progress across team members, writes a summary to Notion or Confluence. - **Incident coordinator:** Agent detects urgent messages in `#incidents`, creates a Linear/Jira ticket, spins up a dedicated channel, posts runbook steps from the knowledge base. @@ -74,10 +76,12 @@ This document catalogs communication, productivity, and business tool integratio **What it does:** Discord server management, channel operations, message sending/reading, forum post creation, and reaction handling. **MCP servers available:** + - Multiple community servers providing channel management, message operations, and server administration. - Unified notification servers that span Slack, Discord, and Telegram from a single interface. **AGH agent use cases:** + - **Community support bot:** Agent monitors Discord support channels, searches documentation and past issues, provides answers to community questions. - **Release announcer:** Agent publishes release notes to Discord announcement channels when a new version is tagged. - **Meeting scheduler:** Agent coordinates availability across Discord threads and creates calendar events. @@ -91,10 +95,12 @@ This document catalogs communication, productivity, and business tool integratio **What it does:** Search messages, manage chats/channels, send messages, create group chats, handle user/team operations. Uses Microsoft Graph API with device code authentication. **MCP servers available:** + - **Microsoft Work IQ Teams MCP** (Official): Create, update, delete chats; add members; post messages; channel operations. Requires Microsoft 365 Copilot license for full features. - **Community servers** on mcpservers.org (InditexTech). **AGH agent use cases:** + - **Meeting prep agent:** Before a scheduled meeting, agent pulls relevant documents from SharePoint, recent email threads from Outlook, and Jira ticket updates, then posts a briefing to the Teams channel. - **Cross-platform sync:** Agent mirrors important decisions from Teams channels to Slack or Linear for engineering teams that use different tools. @@ -107,12 +113,14 @@ This document catalogs communication, productivity, and business tool integratio **What it does:** Full Telegram access via MTProto protocol or Bot API -- messaging, chat management, media tools, file sending/downloading. **MCP servers available:** + - **overpod/telegram** (MTProto): Full account access, messaging, chat management, media tools. - **sparfenyuk/mcp-telegram**: User client API integration with AI assistants. - **kfastov/telegram-mcp-server**: Bot API integration with 35 tools. - **qpd-v/telegram-communicator**: Multi-account support, tiered permissions (March 2026). **AGH agent use cases:** + - **Ops notification pipeline:** Agent sends deployment status, health alerts, and CI/CD results to a Telegram ops group. - **Personal assistant:** Agent receives natural language commands via Telegram DM, executes tasks (file lookups, scheduling, code searches), and replies with results. @@ -125,11 +133,13 @@ This document catalogs communication, productivity, and business tool integratio **What it does:** Send and receive WhatsApp messages via WhatsApp Web multi-device API or business APIs. **MCP servers available:** + - **lharries/whatsapp-mcp**: WhatsApp Web integration for personal accounts. - **Sinch MCP** (February 2026): Unified messaging across SMS, WhatsApp, RCS, and email. - **Unipile MCP**: Unified access across LinkedIn, WhatsApp, Instagram, Messenger. **AGH agent use cases:** + - **Customer response agent:** Agent receives WhatsApp business messages, looks up customer info in CRM, drafts contextual replies. - **Appointment reminder:** Agent sends appointment reminders via WhatsApp based on calendar events. @@ -142,12 +152,14 @@ This document catalogs communication, productivity, and business tool integratio **What it does:** Send, read, search, label, filter, forward, reply to emails. Manage attachments, calendar invites, and threading. **MCP servers available:** + - **Gmail via Google Workspace MCP** (taylorwilsdon): OAuth 2.1, multi-user, 100+ tools across Gmail/Calendar/Drive/Docs/Sheets. - **Microsoft Work IQ Mail** (Official): Create, update, delete messages; reply/reply-all; semantic search. - **Softeria ms-365-mcp-server**: Graph API integration for Outlook mail, calendar, files. - **Generic IMAP/SMTP MCP**: Works with any email provider -- Gmail, Outlook, Yahoo, Fastmail. **AGH agent use cases:** + - **Email triage agent:** Agent reads incoming emails, categorizes by urgency and topic, drafts replies for routine queries, escalates important ones to Slack. - **Follow-up tracker:** Agent monitors sent emails, detects when no reply is received after N days, creates follow-up tasks in Linear. - **Report distributor:** Agent generates weekly status reports from project management data, formats as email, sends to stakeholder distribution list. @@ -163,6 +175,7 @@ This document catalogs communication, productivity, and business tool integratio **MCP servers available:** None found in current directories. **AGH agent use cases:** + - **Bridge bot:** Agent monitors Matrix rooms and cross-posts to Slack/Discord channels. - **Self-hosted comms agent:** For organizations using Matrix for privacy, agent provides the same Q&A and automation capabilities as Slack integrations. @@ -177,12 +190,14 @@ This document catalogs communication, productivity, and business tool integratio **What it does:** Full CRUD across Gmail, Calendar, Docs, Sheets, Slides, Drive, Forms, Tasks, Chat, and Search. OAuth 2.1 authentication with multi-user support. **MCP servers available:** + - **taylorwilsdon/google_workspace_mcp** (workspacemcp.com): The most complete server -- 12 services, 100+ tools, OAuth 2.1, remote multi-user auth, DXT installer. v1.15.0 on PyPI (March 2026). - **aaronsb/google-workspace-mcp**: Gmail, Calendar, Drive with authenticated access. - **goncaloreis/google-workspace-mcp**: Docs, Sheets, Gmail tools for Claude Desktop. - **Google official MCP** (December 2025): Official support for Google and Google Cloud services. **AGH agent use cases:** + - **Meeting notes agent:** Agent joins a Google Meet (via Calendar), records action items, creates a Google Doc summary, assigns tasks in Linear, and sends follow-up emails. - **Spreadsheet analyst:** Agent reads data from Google Sheets, performs analysis, writes insights back, and creates a Slides presentation with charts. - **Calendar optimizer:** Agent analyzes calendar for meeting overload, suggests consolidated meetings, blocks focus time, and manages RSVPs. @@ -197,12 +212,14 @@ This document catalogs communication, productivity, and business tool integratio **What it does:** Full read/write across Outlook, Teams, SharePoint, OneDrive, Word, Excel, PowerPoint. Enterprise authentication via Microsoft Graph API. **MCP servers available:** + - **Microsoft Work IQ** (Official): Mail, Calendar, SharePoint, OneDrive, Teams servers. Requires M365 Copilot license. - **Softeria/ms-365-mcp-server**: Graph API integration for mail, files, calendar, Excel, OneNote, To Do, Planner, Contacts. - **PnP/cli-microsoft365-mcp-server**: Natural language to CLI commands across Entra ID, OneDrive, Outlook, Planner, Power Apps, SharePoint, Teams. - **Arcade.dev Office 365 MCP**: Five servers for Word, Excel, PowerPoint, OneDrive, SharePoint with full read/write. **AGH agent use cases:** + - **Onboarding automator:** Agent sets up new hire in SharePoint, creates OneDrive folders, sends welcome email via Outlook, schedules intro meetings in Calendar. - **Report generator:** Agent pulls data from multiple sources, generates formatted Excel report, creates PowerPoint summary, distributes via email. - **Document reviewer:** Agent reads Word documents from SharePoint, provides feedback, tracks changes, and notifies authors. @@ -216,12 +233,14 @@ This document catalogs communication, productivity, and business tool integratio **What it does:** Search pages, create/update notes, manage databases, organize knowledge bases, query structured data. **MCP servers available:** + - **Notion official MCP**: Direct integration documented at developers.notion.com. - **Grey-Iris community server**: Markdown-first integration using 92% fewer tokens than official server. - **WayStation**: Unified server connecting Notion, Monday, Asana, Slack. - **Pipedream**: Hosted MCP for 2,500+ APIs including Notion. **AGH agent use cases:** + - **Knowledge base maintainer:** Agent monitors codebase changes, automatically updates relevant Notion docs with new API signatures, configuration changes, or architectural decisions. - **Sprint planning assistant:** Agent reads Notion sprint board, cross-references with Linear tickets, identifies gaps, and creates missing tasks. - **Research compiler:** Agent collects information from web searches, codebase analysis, and memory, then compiles structured research pages in Notion. @@ -235,6 +254,7 @@ This document catalogs communication, productivity, and business tool integratio **What it does:** Read, write, search, and manage notes in Obsidian vaults. Frontmatter extraction, tag management, full-text search, bidirectional link support. **MCP servers available:** + - **bitbonsai/mcpvault** (v0.11.0, March 2026): Vault access with path traversal protection, tag scanning, symlink safety. - **cyanheads/obsidian-mcp-server**: Comprehensive tools via Obsidian Local REST API plugin. - **MarkusPfundstein/mcp-obsidian**: REST API integration. @@ -242,6 +262,7 @@ This document catalogs communication, productivity, and business tool integratio - 60+ servers tracked on PulseMCP. **AGH agent use cases:** + - **Personal knowledge agent:** Agent maintains an Obsidian vault as a developer's second brain -- automatically creates notes from conversations, links related concepts, and surfaces relevant notes during coding sessions. - **Daily journal generator:** Agent compiles daily activity (commits, PRs, meetings, messages) into a structured Obsidian daily note. - **Research vault:** Agent stores research findings with proper tags and backlinks, making them searchable across future sessions. @@ -257,6 +278,7 @@ This document catalogs communication, productivity, and business tool integratio **MCP servers available:** None found. **AGH agent use cases:** + - **Graph-aware knowledge agent:** Agent creates and links blocks in Roam's graph structure based on coding sessions and research. **Status:** Needs to be built. Roam's user base is smaller and more niche than Obsidian. @@ -270,10 +292,12 @@ This document catalogs communication, productivity, and business tool integratio **What it does:** Create, update, search, and comment on issues. Manage projects, cycles, and team workflows. **MCP servers available:** + - **jerhadf/linear-mcp-server**: Full Linear API integration for issue management. - **DX Heroes**: Combined Jira + Linear integration. **AGH agent use cases:** + - **Auto-ticket from code:** Agent detects TODOs and FIXMEs in codebase, creates corresponding Linear issues with proper labels and assignees. - **Sprint reporter:** Agent generates sprint retrospective summaries from completed/moved issues, posts to Slack. - **Bug-to-fix pipeline:** Agent receives a bug report (via Slack or email), creates a Linear issue, researches the codebase for likely root cause, and adds analysis as comments. @@ -288,11 +312,13 @@ This document catalogs communication, productivity, and business tool integratio **What it does:** Execute JQL queries, create/update/transition tickets, manage epics and sprints, add comments and attachments. **MCP servers available:** + - **Atlassian Rovo MCP** (Official, February 2026): OAuth 2.1 authorization, enterprise security. Covers Jira + Confluence. - **Community servers**: 74+ Jira MCP servers tracked on PulseMCP. KS-GEN-AI/jira-mcp-server (TypeScript, JQL support). - **DX Heroes**: Combined Jira + Linear integration. **AGH agent use cases:** + - **Ticket auto-population:** Agent takes a brief description, enriches it with codebase context (affected files, related PRs, similar past issues), and creates a fully detailed Jira ticket. - **Cross-system sync:** Agent keeps Jira tickets in sync with GitHub issues or Linear tickets. - **Sprint velocity tracker:** Agent calculates velocity metrics from completed sprints, identifies trends, posts analysis to Confluence. @@ -306,10 +332,12 @@ This document catalogs communication, productivity, and business tool integratio **What it does:** Manage tasks, projects, workspaces, sections, tags, and custom fields. Full CRUD with permission controls. **MCP servers available:** + - **Official Asana MCP** (mcp.asana.com): Beta server with SSE endpoint. Note: v1 deprecated May 2026, migrating to v2. - **roychri/mcp-server-asana** (Community): Broad tool coverage, extensible, supports disabling write operations for safe testing. **AGH agent use cases:** + - **Task breakdown agent:** Agent takes a high-level project description, breaks it into subtasks with estimates, and creates them in Asana with proper dependencies. - **Status reporter:** Agent reads Asana project status, compiles progress across multiple projects, generates executive summary. @@ -322,10 +350,12 @@ This document catalogs communication, productivity, and business tool integratio **What it does:** Full access to Monday.com's GraphQL API -- boards, items, sub-items, updates, documents, and automation rules. **MCP servers available:** + - **Official Monday.com MCP** (mondaycom/mcp): Plug-and-play server with dynamic API tools for full GraphQL surface. - **Jovan Sakovic** (Community, Python): Boards, items, updates, documents. 8,900+ downloads. **AGH agent use cases:** + - **Board automator:** Agent monitors external events (deploys, alerts) and creates/updates Monday.com items automatically. - **Cross-tool reporter:** Agent aggregates data from Monday.com boards and generates reports in Google Sheets or email. @@ -338,11 +368,13 @@ This document catalogs communication, productivity, and business tool integratio **What it does:** Manage boards, lists, cards, comments, attachments, and labels. **MCP servers available:** + - **delorenj/mcp-server-trello**: TypeScript, rate limiting, type-safe, in official MCP Registry. - **Composio Trello MCP**: AI agent integration for card/list/board management. - **mcp-trello** (PyPI): Python-based board management. **AGH agent use cases:** + - **Kanban manager:** Agent moves cards based on PR status, adds comments with build results, archives completed cards. - **Card creator:** Agent converts meeting notes or Slack messages into Trello cards with proper labels. @@ -355,9 +387,11 @@ This document catalogs communication, productivity, and business tool integratio **What it does:** Tasks, checklists, sprints, comments, tags, spaces, lists, folders, files, docs, chat, time tracking, goals, and OKRs. **MCP servers available:** + - **taazkareem/clickup-mcp-server**: High-performance server with document management, chat, goals/KRs, OAuth. Supports remote MCP connections. **AGH agent use cases:** + - **OKR tracker:** Agent monitors key results progress, sends weekly updates to stakeholders, flags at-risk objectives. - **Doc-to-task converter:** Agent reads ClickUp docs and creates structured task hierarchies from requirements. @@ -370,10 +404,12 @@ This document catalogs communication, productivity, and business tool integratio **What it does:** Manage projects, to-do lists, messages, schedules, and team members. **MCP servers available:** + - **georgeantonopoulos/Basecamp-MCP-Server**: FastMCP-based, 75 tools, compatible with Cursor/Codex/Claude Desktop. - **mcp-basecamp** (PyPI): Projects, to-dos, messages, schedules. **AGH agent use cases:** + - **Bulk task creator:** Agent takes a pasted list of tasks, creates them in the correct Basecamp project and to-do list. - **Daily digest:** Agent summarizes new messages, completed to-dos, and upcoming deadlines across all projects. @@ -386,9 +422,11 @@ This document catalogs communication, productivity, and business tool integratio **What it does:** Stories, Epics, Docs, iteration operations, team workflows, objective tracking, and search. **MCP servers available:** + - **Official Shortcut MCP Server**: Hosted server with OAuth authentication. Find, create, and update Stories, Epics, and Docs. **AGH agent use cases:** + - **Sprint planner:** Agent analyzes velocity and backlog, recommends stories for the next iteration, and moves them into the sprint. - **Story enricher:** Agent reads a brief story description, researches codebase for context, and adds technical details and acceptance criteria. @@ -403,9 +441,11 @@ This document catalogs communication, productivity, and business tool integratio **What it does:** Read, create, and update Confluence pages. Search across spaces. Integrated with Jira for issue-linked documentation. **MCP servers available:** + - **Atlassian Rovo MCP** (Official): Unified Jira + Confluence MCP server with OAuth 2.1 and enterprise security. **AGH agent use cases:** + - **Runbook maintainer:** Agent detects infrastructure changes and updates relevant Confluence runbooks automatically. - **Architecture doc generator:** Agent analyzes codebase structure and generates/updates architecture documentation in Confluence. - **Post-mortem writer:** After an incident, agent compiles timeline from Slack messages, PagerDuty alerts, and code changes into a structured Confluence post-mortem. @@ -419,11 +459,13 @@ This document catalogs communication, productivity, and business tool integratio **What it does:** Access and search published documentation. GitBook auto-generates an MCP server for any published docs site. **MCP servers available:** + - **Auto-generated per site**: Every GitBook published site automatically exposes an MCP server. - **MCPBook** (Community): Scrapes and indexes GitBook docs for searchable MCP access. - GitBook also auto-generates llms.txt and llms-full.txt files. Any page can be fetched as Markdown by appending .md to the URL. **AGH agent use cases:** + - **Documentation search agent:** Agent queries GitBook docs for API references, configuration options, and guides during coding sessions. - **Doc freshness checker:** Agent compares GitBook docs against current codebase and flags outdated content. @@ -436,9 +478,11 @@ This document catalogs communication, productivity, and business tool integratio **What it does:** API documentation with interactive references, changelogs, forums, and engagement analytics. MCP server + llms.txt support. **MCP servers available:** + - ReadMe provides MCP server access, llms.txt support, and AI-powered doc linting via Agent Owlbert. **AGH agent use cases:** + - **API integration helper:** Agent queries ReadMe docs for third-party API specifications, generates client code, and validates against live endpoints. **Status:** Stable. Useful for consuming external API documentation. @@ -450,10 +494,12 @@ This document catalogs communication, productivity, and business tool integratio **What it does:** Developer documentation platform. Auto-generates MCP servers, llms.txt, llms-full.txt, and skill.md files for every docs site. Free on all tiers. **MCP servers available:** + - **Auto-generated per site**: Every Mintlify docs site exposes an MCP server with zero configuration. - AI traffic analytics show which agents visit docs and which MCP queries they run. **AGH agent use cases:** + - **SDK documentation agent:** Agent queries Mintlify-hosted SDK docs during coding to get accurate API signatures and examples. **Status:** Production-ready. Auto-generation with analytics is the most AI-native approach. @@ -467,10 +513,12 @@ This document catalogs communication, productivity, and business tool integratio **What it does:** Full CRM data access -- accounts, leads, contacts, opportunities, conversations. Create, update, and delete records. **MCP servers available:** + - **Salesforce Agentforce MCP** (Official, July 2025 pilot): Native MCP client and server. Enterprise-grade policy enforcement, rate-limiting, access controls. 60+ tools. - **Community servers**: 312 GitHub stars for the most popular third-party implementation. **AGH agent use cases:** + - **Lead enrichment agent:** Agent takes a new lead, researches the company and contact using web search, enriches the Salesforce record with context. - **Deal updater:** Agent monitors email threads and Slack channels for deal-related conversations, updates opportunity stages and notes in Salesforce. - **Forecast reporter:** Agent pulls pipeline data, calculates forecasts, generates reports, and posts summaries to Slack. @@ -484,10 +532,12 @@ This document catalogs communication, productivity, and business tool integratio **What it does:** Read-only CRM data access for analysis, reporting, and insights. FAISS semantic search for intelligent querying. **MCP servers available:** + - **Community server** (116 GitHub stars): Read-only CRM access with FAISS semantic search. - No official HubSpot MCP server yet -- community fills the gap. **AGH agent use cases:** + - **Sales intelligence agent:** Agent queries HubSpot data to prepare for sales calls -- pulls contact history, recent interactions, and deal context. - **Pipeline analyzer:** Agent analyzes deal pipeline, identifies bottlenecks, generates coaching recommendations for sales managers. @@ -500,11 +550,13 @@ This document catalogs communication, productivity, and business tool integratio **What it does:** Full CRM data management -- deals, contacts, activities, organizations, pipelines. **MCP servers available:** + - **iamsamuelfraga/mcp-pipedrive** (December 2025): Described as the most complete Pipedrive MCP implementation. - **Pipedream MCP** (mcp.pipedream.com): Static URL with per-user authentication. - **Coupler.io MCP**: AI-powered sales data analysis. **AGH agent use cases:** + - **Deal progression agent:** Agent monitors deal stages, sends reminders for stale deals, suggests next actions based on historical patterns. - **Activity logger:** Agent creates Pipedrive activities from email threads and meeting notes automatically. @@ -517,10 +569,12 @@ This document catalogs communication, productivity, and business tool integratio **What it does:** Modern CRM with flexible data models. Full CRUD for companies, people, deals, tasks, lists, notes. **MCP servers available:** + - **Official Attio MCP** (mcp.attio.com): OAuth authentication, natural language CRM management. - **kesslerio/attio-mcp-server** (Community): 14 universal tools, batch ops, 10 MCP prompts, 3 Claude Skills, OAuth. 1,291 commits. **AGH agent use cases:** + - **Startup CRM agent:** Agent manages the entire sales pipeline for small teams -- creates contacts from emails, tracks deals, sends follow-ups. - **Investor tracking:** Agent maintains an Attio database of investor contacts, tracks communications, and prepares meeting briefings. @@ -535,10 +589,12 @@ This document catalogs communication, productivity, and business tool integratio **What it does:** Read design data (node tree, auto-layout, variants, design tokens, component properties). Write to canvas (open beta). Code Connect maps design components to code components. **MCP servers available:** + - **Official Figma MCP** (mcp.figma.com): First-party product built by Figma in partnership with Anthropic, Cursor, and VS Code. Open beta with write-to-canvas (March 2026). Local desktop server at 127.0.0.1:3845. - **Framelink Figma Connector**: Layout information for AI coding agents. **AGH agent use cases:** + - **Design-to-code agent:** Agent reads a Figma frame, uses Code Connect to map design components to actual codebase components, generates production React code using the team's real component library. - **Design review agent:** Agent compares implemented UI (via screenshots) against Figma designs and identifies visual discrepancies. - **Component library sync:** Agent monitors Figma component changes and creates PRs to update the codebase component library. @@ -552,9 +608,11 @@ This document catalogs communication, productivity, and business tool integratio **What it does:** AI tools connect with Miro boards for visual workspace interaction -- create/modify elements, manage boards. **MCP servers available:** + - **Official Miro MCP Server** (February 2026): Gateway between AI tools and Miro boards. Documented at developers.miro.com. **AGH agent use cases:** + - **Architecture diagrammer:** Agent creates system architecture diagrams on Miro boards based on codebase analysis. - **Retrospective facilitator:** Agent sets up retro board templates, collects team input from Slack, and organizes sticky notes. - **Brainstorm visualizer:** Agent takes brainstorming session notes and creates organized mind maps on Miro. @@ -568,11 +626,13 @@ This document catalogs communication, productivity, and business tool integratio **What it does:** Create and manipulate hand-drawn style diagrams. Real-time canvas with WebSocket sync, element creation/modification, and AI-assisted diagramming. **MCP servers available:** + - **excalidraw/excalidraw-mcp** (Official, February 2026): Streams hand-drawn diagrams with viewport control. Works with Claude, ChatGPT, VS Code, Goose. - **yctimlin/excalidraw** (Community, November 2025): Node.js API for element creation, modification, and organization. - **excalidraw-mcp** (PyPI): Dual-language server with live canvas, real-time sync, WebSocket updates. **AGH agent use cases:** + - **Architecture sketch agent:** Agent generates architecture diagrams from natural language descriptions using technology-aware styling for 50+ technologies. - **Incident timeline visualizer:** Agent creates visual incident timelines from log data and alert sequences. - **Data flow diagrammer:** Agent analyzes codebase data flows and generates Excalidraw diagrams showing system interactions. @@ -588,6 +648,7 @@ This document catalogs communication, productivity, and business tool integratio ClawHub hosts 5,700+ to 31,000+ community skills covering email management, browser automation, and more. However, security is a concern: 7.6% of publicly available skills contain dangerous patterns. **Relevant patterns for AGH:** + - Skills follow the AgentSkills SKILL.md format, which has converged across OpenClaw and Hermes. - Plugin examples include QQ messaging (OneBot 11), wallet tools, cognitive memory systems, and event visualization. - AGH should monitor ClawHub for popular skill categories that indicate user demand. @@ -597,17 +658,20 @@ ClawHub hosts 5,700+ to 31,000+ community skills covering email management, brow Hermes ships 48 built-in tools across 40 toolsets and supports MCP via `hermes mcp serve`. The ecosystem map (hermes-ecosystem.vercel.app) catalogs 80+ tools, skills, and integrations. **Notable integrations:** + - Multi-platform gateway: Telegram, Discord, Slack, WhatsApp, Signal, Feishu/Lark, WeCom. - Memory systems: hindsight (8,362 stars), autocontext (711 stars), ClawMem (86 stars). - Multi-agent orchestration: mission-control (3,700+ stars). **Relevant patterns for AGH:** + - Hermes's multi-platform gateway pattern (single agent, multiple communication channels) is directly applicable to AGH extensions. - The `hermes mcp serve` pattern of exposing agent sessions to MCP clients is similar to AGH's architecture. #### 7.3 awesome-mcp-servers (wong2/awesome-mcp-servers) The primary community curation point. Categories most relevant to AGH: + - Communication (Slack, Discord, Telegram, Email) - Project Management (Linear, Jira, Asana, Monday, Trello, ClickUp) - Knowledge & Memory (Notion, Obsidian, Confluence) @@ -617,14 +681,14 @@ The primary community curation point. Categories most relevant to AGH: These platforms bundle many integrations under a single MCP interface: -| Platform | Integrations | Notes | -|---|---|---| -| **Composio** | Unified MCP for all hosted apps | Single setup, no per-app npx commands | -| **Pipedream** | 2,500+ APIs (Slack, GitHub, Notion, etc.) | Hosted MCP servers or self-deploy | -| **Knit MCP** | 10,000+ tools (HRIS, ATS, CRM, etc.) | Broadest coverage | -| **WayStation** | Notion, Monday, AirTable, etc. | Productivity-focused hub | -| **Activepieces** | Calendar, Notion, advanced flows | Dynamic server with app composition | -| **Arcade.dev** | Microsoft 365, Google Workspace | 30+ tools per suite | +| Platform | Integrations | Notes | +| ---------------- | ----------------------------------------- | ------------------------------------- | +| **Composio** | Unified MCP for all hosted apps | Single setup, no per-app npx commands | +| **Pipedream** | 2,500+ APIs (Slack, GitHub, Notion, etc.) | Hosted MCP servers or self-deploy | +| **Knit MCP** | 10,000+ tools (HRIS, ATS, CRM, etc.) | Broadest coverage | +| **WayStation** | Notion, Monday, AirTable, etc. | Productivity-focused hub | +| **Activepieces** | Calendar, Notion, advanced flows | Dynamic server with app composition | +| **Arcade.dev** | Microsoft 365, Google Workspace | 30+ tools per suite | --- diff --git a/.compozy/tasks/ext-ideas/research/integrations_data_ai.md b/docs/ideas/ext-ideas/research/integrations_data_ai.md similarity index 77% rename from .compozy/tasks/ext-ideas/research/integrations_data_ai.md rename to docs/ideas/ext-ideas/research/integrations_data_ai.md index ae2eff5d1..36d9bc469 100644 --- a/.compozy/tasks/ext-ideas/research/integrations_data_ai.md +++ b/docs/ideas/ext-ideas/research/integrations_data_ai.md @@ -15,58 +15,58 @@ This document catalogs 50+ integrations across 9 categories with concrete AGH ag ## Master Integration Table -| # | Integration | Category | MCP Server Exists | Server Source | Priority | AGH Value | -|---|---|---|---|---|---|---| -| 1 | PostgreSQL | Database | Yes | Official (Anthropic) | HIGH | Core data access | -| 2 | MySQL | Database | Yes | Community | MED | Legacy system access | -| 3 | MongoDB | Database | Yes | Official (MongoDB) | MED | Document store ops | -| 4 | Redis | Database | Yes | Official (Redis) | HIGH | Cache/state mgmt | -| 5 | Supabase | Database | Yes | Official (hosted+OSS) | HIGH | Full BaaS agent | -| 6 | Neon | Database | Yes | Official | HIGH | Branch-safe migrations | -| 7 | PlanetScale | Database | Yes | Official | MED | MySQL branching | -| 8 | Turso | Database | Yes | Official (`--mcp`) | MED | Edge SQLite | -| 9 | CockroachDB | Database | Yes | Community | LOW | Distributed SQL | -| 10 | DynamoDB | Database | Yes | Official (AWS Labs) | MED | NoSQL modeling | -| 11 | Pinecone | Vector Store | Via unified | weave-mcp / MindsDB | MED | Managed vector search | -| 12 | Qdrant | Vector Store | Yes | Official | HIGH | OSS vector search | -| 13 | Weaviate | Vector Store | Via unified | weave-mcp / MindsDB | MED | Hybrid search | -| 14 | Chroma | Vector Store | Via unified | weave-mcp | LOW | Prototyping | -| 15 | Milvus | Vector Store | Yes | Official | MED | Industrial-scale vectors | -| 16 | LanceDB | Vector Store | Yes | Community (multiple) | HIGH | Embedded, zero-config | -| 17 | Elasticsearch | Search | Yes | Community | MED | Full-text + analytics | -| 18 | Algolia | Search | Yes | Official (hosted+OSS) | HIGH | Enterprise search | -| 19 | Typesense | Search | Yes | Community | LOW | Lightweight search | -| 20 | Meilisearch | Search | Yes | Official | MED | Dev-friendly search | -| 21 | BigQuery | Data Warehouse | Yes | Via Dot/Alkemi | MED | Google analytics DW | -| 22 | Snowflake | Data Warehouse | Yes | Via Dot/Alkemi | MED | Enterprise DW | -| 23 | ClickHouse | Data Warehouse | Yes | Official | MED | Real-time analytics | -| 24 | Databricks | Data Warehouse | Yes | Community | MED | Lakehouse + ML | -| 25 | Hugging Face | AI/ML | Yes | Community | HIGH | Model discovery | -| 26 | Replicate | AI/ML | Yes | Community | MED | Hosted model inference | -| 27 | Modal | AI/ML | Indirect | Host MCP on Modal | MED | Serverless GPU | -| 28 | Together AI | AI/ML | Via OpenRouter | OpenRouter proxy | LOW | OSS model inference | -| 29 | Groq | AI/ML | Via OpenRouter | OpenRouter proxy | MED | Ultra-fast inference | -| 30 | OpenRouter | AI/ML | Yes | Community | HIGH | Multi-model gateway | -| 31 | Ollama | AI/ML | Yes | Community | HIGH | Local model inference | -| 32 | Firecrawl | Web/Scraping | Yes | Official | HIGH | Web-to-markdown | -| 33 | Tavily | Web/Search | Yes | Community | MED | RAG-optimized search | -| 34 | Exa | Web/Search | Yes | Community | HIGH | Neural search | -| 35 | Brave Search | Web/Search | Yes | Official | HIGH | Privacy-first search | -| 36 | SerpAPI | Web/Search | Yes | Community | LOW | Google SERP scraping | -| 37 | Crawl4AI | Web/Scraping | Yes | Community | MED | Async web crawling | -| 38 | Jina | Web/Search | Yes | Community | MED | Multimodal search | -| 39 | Amplitude | Analytics | Yes | Community | LOW | Behavioral analytics | -| 40 | Mixpanel | Analytics | Yes | Community (unified) | LOW | Product analytics | -| 41 | PostHog | Analytics | Yes | Official | MED | OSS product analytics | -| 42 | Plausible | Analytics | Yes | Community | LOW | Privacy analytics | -| 43 | Zapier | Automation | Yes | Official | HIGH | 8000+ app connectors | -| 44 | n8n | Automation | Yes | Community | HIGH | Self-hosted workflows | -| 45 | Make | Automation | Partial | Via API | MED | Visual workflows | -| 46 | Pipedream | Automation | Yes | Community | MED | Code-first automation | -| 47 | Temporal | Automation | Yes | Community | MED | Durable execution | -| 48 | Inngest | Automation | Yes | Official | HIGH | Durable serverless | -| 49 | Google MCP Toolbox | Multi-DB | Yes | Official (Google) | HIGH | 40+ data sources | -| 50 | DBHub | Multi-DB | Yes | Community | MED | Multi-engine gateway | +| # | Integration | Category | MCP Server Exists | Server Source | Priority | AGH Value | +| --- | ------------------ | -------------- | ----------------- | --------------------- | -------- | ------------------------ | +| 1 | PostgreSQL | Database | Yes | Official (Anthropic) | HIGH | Core data access | +| 2 | MySQL | Database | Yes | Community | MED | Legacy system access | +| 3 | MongoDB | Database | Yes | Official (MongoDB) | MED | Document store ops | +| 4 | Redis | Database | Yes | Official (Redis) | HIGH | Cache/state mgmt | +| 5 | Supabase | Database | Yes | Official (hosted+OSS) | HIGH | Full BaaS agent | +| 6 | Neon | Database | Yes | Official | HIGH | Branch-safe migrations | +| 7 | PlanetScale | Database | Yes | Official | MED | MySQL branching | +| 8 | Turso | Database | Yes | Official (`--mcp`) | MED | Edge SQLite | +| 9 | CockroachDB | Database | Yes | Community | LOW | Distributed SQL | +| 10 | DynamoDB | Database | Yes | Official (AWS Labs) | MED | NoSQL modeling | +| 11 | Pinecone | Vector Store | Via unified | weave-mcp / MindsDB | MED | Managed vector search | +| 12 | Qdrant | Vector Store | Yes | Official | HIGH | OSS vector search | +| 13 | Weaviate | Vector Store | Via unified | weave-mcp / MindsDB | MED | Hybrid search | +| 14 | Chroma | Vector Store | Via unified | weave-mcp | LOW | Prototyping | +| 15 | Milvus | Vector Store | Yes | Official | MED | Industrial-scale vectors | +| 16 | LanceDB | Vector Store | Yes | Community (multiple) | HIGH | Embedded, zero-config | +| 17 | Elasticsearch | Search | Yes | Community | MED | Full-text + analytics | +| 18 | Algolia | Search | Yes | Official (hosted+OSS) | HIGH | Enterprise search | +| 19 | Typesense | Search | Yes | Community | LOW | Lightweight search | +| 20 | Meilisearch | Search | Yes | Official | MED | Dev-friendly search | +| 21 | BigQuery | Data Warehouse | Yes | Via Dot/Alkemi | MED | Google analytics DW | +| 22 | Snowflake | Data Warehouse | Yes | Via Dot/Alkemi | MED | Enterprise DW | +| 23 | ClickHouse | Data Warehouse | Yes | Official | MED | Real-time analytics | +| 24 | Databricks | Data Warehouse | Yes | Community | MED | Lakehouse + ML | +| 25 | Hugging Face | AI/ML | Yes | Community | HIGH | Model discovery | +| 26 | Replicate | AI/ML | Yes | Community | MED | Hosted model inference | +| 27 | Modal | AI/ML | Indirect | Host MCP on Modal | MED | Serverless GPU | +| 28 | Together AI | AI/ML | Via OpenRouter | OpenRouter proxy | LOW | OSS model inference | +| 29 | Groq | AI/ML | Via OpenRouter | OpenRouter proxy | MED | Ultra-fast inference | +| 30 | OpenRouter | AI/ML | Yes | Community | HIGH | Multi-model gateway | +| 31 | Ollama | AI/ML | Yes | Community | HIGH | Local model inference | +| 32 | Firecrawl | Web/Scraping | Yes | Official | HIGH | Web-to-markdown | +| 33 | Tavily | Web/Search | Yes | Community | MED | RAG-optimized search | +| 34 | Exa | Web/Search | Yes | Community | HIGH | Neural search | +| 35 | Brave Search | Web/Search | Yes | Official | HIGH | Privacy-first search | +| 36 | SerpAPI | Web/Search | Yes | Community | LOW | Google SERP scraping | +| 37 | Crawl4AI | Web/Scraping | Yes | Community | MED | Async web crawling | +| 38 | Jina | Web/Search | Yes | Community | MED | Multimodal search | +| 39 | Amplitude | Analytics | Yes | Community | LOW | Behavioral analytics | +| 40 | Mixpanel | Analytics | Yes | Community (unified) | LOW | Product analytics | +| 41 | PostHog | Analytics | Yes | Official | MED | OSS product analytics | +| 42 | Plausible | Analytics | Yes | Community | LOW | Privacy analytics | +| 43 | Zapier | Automation | Yes | Official | HIGH | 8000+ app connectors | +| 44 | n8n | Automation | Yes | Community | HIGH | Self-hosted workflows | +| 45 | Make | Automation | Partial | Via API | MED | Visual workflows | +| 46 | Pipedream | Automation | Yes | Community | MED | Code-first automation | +| 47 | Temporal | Automation | Yes | Community | MED | Durable execution | +| 48 | Inngest | Automation | Yes | Official | HIGH | Durable serverless | +| 49 | Google MCP Toolbox | Multi-DB | Yes | Official (Google) | HIGH | 40+ data sources | +| 50 | DBHub | Multi-DB | Yes | Community | MED | Multi-engine gateway | --- @@ -75,60 +75,70 @@ This document catalogs 50+ integrations across 9 categories with concrete AGH ag ### 1. Databases #### PostgreSQL + - **What it does**: Read-only SQL access, schema inspection, query execution against Postgres databases. - **MCP server**: Official Anthropic server (`@modelcontextprotocol/server-postgres`, deprecated July 2025). Replaced by Supabase MCP and Neon MCP for managed Postgres. Generic Postgres MCP Pro (CrystalDBA) for self-hosted. - **AGH use case**: Agent investigating a production bug queries the Postgres database to find anomalous rows, correlates with session logs from AGH's observe system, then writes a migration to fix the data issue and a code patch to prevent recurrence. The agent stores findings in AGH memory for future reference. - **Status**: Multiple servers exist. Google's MCP Toolbox also covers Postgres. #### MySQL + - **What it does**: MySQL query execution, schema browsing, data analysis through controlled interface. - **MCP server**: `designcomputer/mysql_mcp_server` (community). Also covered by DBHub and Google MCP Toolbox. - **AGH use case**: Agent connects to a legacy MySQL database to understand schema for a migration project, generates an entity-relationship map, and creates Go structs matching the schema. - **Status**: Community server available. PlanetScale MCP covers MySQL-compatible databases. #### MongoDB + - **What it does**: Document CRUD, aggregation pipelines, Atlas cloud management, collection operations. - **MCP server**: Official (`mongodb-js/mongodb-mcp-server`). Supports both local MongoDB and Atlas. - **AGH use case**: Agent analyzes MongoDB collections to identify slow aggregation pipelines, proposes index optimizations, and validates them by running `explain()` through the MCP server. Results are recorded in AGH observe events. - **Status**: Official server available. #### Redis + - **What it does**: Data management, search operations, cache inspection, pub/sub monitoring. - **MCP server**: Official Redis MCP Server (released Dec 2025). Also: Redis Cloud API MCP, Upstash MCP. - **AGH use case**: Agent debugging a caching issue inspects Redis keys and TTLs, identifies a cache stampede pattern, then implements a fix with jittered expiration. Uses AGH skills to apply the fix and AGH memory to document the pattern. - **Status**: Official server available. #### Supabase + - **What it does**: Full BaaS management -- database design, queries, edge functions, storage, auth, branching. 20+ tools exposed. - **MCP server**: Official hosted MCP (`https://mcp.supabase.com/mcp`). OAuth 2.1 auth, zero installation. Also: `supabase-community/supabase-mcp` (OSS). - **AGH use case**: Agent bootstraps an entire backend: creates tables, sets up RLS policies, deploys edge functions, configures storage buckets -- all through natural language within an AGH session. Each step is recorded as an AGH observe event for auditability. - **Status**: Official, production-ready, most mature BaaS MCP. #### Neon + - **What it does**: Serverless Postgres management with branch-based migration safety, query tuning, project management. 29 tools with scope-based permissions. - **MCP server**: Official (remote-first, OAuth + API key). Unique branch-based migration safety. - **AGH use case**: Agent creates a Neon branch, tests a schema migration on the branch, validates query performance with `explain`, then merges. If the migration causes regressions, the agent rolls back the branch without affecting production. AGH memory stores the migration history. - **Status**: Official. Rated 4/5 as "best cloud database MCP experience". Neon was acquired by Databricks but operates independently. #### PlanetScale + - **What it does**: MySQL-compatible serverless database with branching. List orgs, databases, branches, run SQL. - **MCP server**: Official with OAuth authentication. - **AGH use case**: Agent creates a database branch for a feature, applies migrations, runs integration tests, then opens a deploy request -- all within an AGH session. - **Status**: Official. No free tier ($39/mo minimum). #### Turso + - **What it does**: Edge-deployed SQLite (libSQL). Schema design, data operations, 9 tools. - **MCP server**: Official via `--mcp` flag. Claude Code integration built in. - **AGH use case**: Agent sets up an edge database for a mobile app, designs the schema through conversation, generates the libSQL client code, and deploys replicas to multiple regions. - **Status**: Official. Free tier (5GB, 100 databases). #### CockroachDB + - **What it does**: Distributed SQL cluster management, monitoring, schema operations, query execution. - **MCP server**: Community (`bpamiri/cockroachdb-mcp`, `dhartunian/cockroachdb-mcp-server`). Also exposes CockroachDB docs. - **AGH use case**: Agent monitors cluster health during a deployment, detects node imbalance, and recommends rebalancing operations. - **Status**: Community servers available. #### DynamoDB + - **What it does**: Data modeling guidance, design pattern recommendations, cost analysis, code generation. 8 tools. - **MCP server**: Official (AWS Labs, `awslabs.dynamodb-mcp-server`). Also: community server by Iman Kamyabi for operational management. - **AGH use case**: Agent analyzes an existing MySQL schema, designs an equivalent DynamoDB single-table model with access patterns, generates cost projections, and produces Go SDK code for the new model. Uses AGH memory to track the migration plan. @@ -139,36 +149,42 @@ This document catalogs 50+ integrations across 9 categories with concrete AGH ag ### 2. Vector Stores #### Pinecone + - **What it does**: Managed vector similarity search at billion-vector scale. - **MCP server**: Via unified servers (weave-mcp supports 11 DBs, MindsDB unified MCP). - **AGH use case**: Agent indexes codebase documentation into Pinecone, then uses semantic search during debugging sessions to find relevant code patterns. AGH memory stores the index metadata. - **Status**: No first-party MCP server. Available through weave-mcp and MindsDB. #### Qdrant + - **What it does**: Vector similarity search, code search, semantic memory. ACORN algorithm for filtered HNSW. - **MCP server**: Official (`qdrant/mcp-server-qdrant`). Supports SSE transport. Can be specialized as code search tool. - **AGH use case**: Agent stores code snippets and documentation in Qdrant, then during code review, semantically searches for similar patterns, known bugs, or relevant implementations. AGH skills catalog is indexed for semantic discovery. - **Status**: Official. 1GB free tier forever. #### Weaviate + - **What it does**: Hybrid search (BlockMax WAND + RSF), vector + keyword search combined. - **MCP server**: Via weave-mcp and MindsDB unified MCP. - **AGH use case**: Agent builds a hybrid search index over project documentation and code comments, enabling natural language queries like "find all error handling patterns for database timeouts." - **Status**: No first-party MCP server. Available through unified servers. #### Chroma + - **What it does**: Lightweight, developer-friendly vector database for prototyping and small/medium apps. - **MCP server**: Via weave-mcp. - **AGH use case**: Agent uses Chroma as a local-first semantic memory store for session context, indexing conversation history for retrieval during long-running tasks. - **Status**: Community only. Best for prototyping, not production scale. #### Milvus + - **What it does**: Industrial-scale vector search (40K+ GitHub stars). Full-text, vector, hybrid, and multi-vector search. - **MCP server**: Official with 5 search tools: `milvus-text-search`, `milvus-vector-search`, `milvus-hybrid-search`, `milvus-multi-vector-search`, `milvus-query`. - **AGH use case**: Agent indexes a large codebase (millions of functions) into Milvus for semantic code search at scale. During incident response, the agent searches for similar past incidents using vector similarity on error signatures stored in AGH memory. - **Status**: Official. Best for billion-vector scale. #### LanceDB + - **What it does**: Embedded vector database with zero configuration. Stores text with vector embeddings for semantic memory. Scales to millions of vectors on disk. - **MCP server**: Multiple community implementations (Python: `RyanLisse/lancedb_mcp`, Node.js: `vurtnec`, PyPI: `mcp-lance-db`). - **AGH use case**: AGH uses LanceDB as its local semantic memory backend -- zero external dependencies, embedded in the daemon process. Agent memories, skill descriptions, and session summaries are all indexed for semantic retrieval. This is the most natural fit for AGH's "single-binary, local-first" architecture. @@ -179,24 +195,28 @@ This document catalogs 50+ integrations across 9 categories with concrete AGH ag ### 3. Search Engines #### Elasticsearch + - **What it does**: Full-text search, analytics, index management, document operations. Also "Elasticsearch Memory" variant with hierarchical categorization and semantic search. - **MCP server**: Community implementation. Also covered by Google MCP Toolbox and weave-mcp. - **AGH use case**: Agent searches application logs indexed in Elasticsearch to diagnose a production error, correlates with metrics, and generates a root cause analysis document. Findings are stored in AGH memory for the team. - **Status**: Community server. OpenSearch (fork) also has an official MCP server. #### Algolia + - **What it does**: Enterprise search and recommendations. Go and Node.js MCP servers plus a hosted MCP server. - **MCP server**: Official. Multiple implementations: Go (`algolia/mcp`), Node.js (`algolia/mcp-node`), Hosted (fully managed, OAuth). - **AGH use case**: Agent configures Algolia search indexes for a product catalog, sets up synonyms and ranking rules, tests search quality with sample queries, and deploys. All actions are tracked in AGH observe events for rollback. - **Status**: Official. Most mature search MCP offering. Usage counts toward existing Algolia plan. #### Typesense + - **What it does**: In-memory search with Raft clustering. Collection management, document operations, search. - **MCP server**: Community (`suhail-ak-s/typesense`). - **AGH use case**: Agent indexes project documentation into Typesense for instant typo-tolerant search, then builds a search UI component. - **Status**: Community server on PulseMCP. #### Meilisearch + - **What it does**: Developer-friendly, fast, typo-tolerant search. Indexing and querying via natural language. - **MCP server**: Official Meilisearch MCP server. - **AGH use case**: Agent sets up Meilisearch for a knowledge base, configures filterable attributes and ranking, imports documents, and validates search quality. @@ -207,24 +227,28 @@ This document catalogs 50+ integrations across 9 categories with concrete AGH ag ### 4. Data Warehouses #### BigQuery + - **What it does**: Google's serverless data warehouse. Query execution, dataset management. - **MCP server**: Via Dot (GetDot.ai) and Alkemi.ai. Google MCP Toolbox also supports it. - **AGH use case**: Agent queries BigQuery to analyze user behavior data, identifies a conversion funnel drop-off, generates a report, and creates a Jira ticket with recommendations. Uses AGH memory to track analytics findings over time. - **Status**: Accessible via unified MCP servers (Dot, Alkemi, Google MCP Toolbox). #### Snowflake + - **What it does**: Enterprise data warehouse with semi-structured data support. - **MCP server**: Via Dot and Alkemi. Also MindsDB unified MCP. - **AGH use case**: Agent runs cost analysis queries on Snowflake, identifies expensive queries consuming credits, proposes optimizations (clustering keys, materialized views), and validates improvements. - **Status**: Via unified servers. Requires clustering key alignment for optimal MCP use. #### ClickHouse + - **What it does**: Real-time analytics database. 100+ GB/s scan per node. Column-oriented. - **MCP server**: Dedicated ClickHouse MCP server listed on official MCP servers repo. ClickHouse acquired Langfuse for LLM observability. - **AGH use case**: Agent queries ClickHouse for real-time application metrics, builds an anomaly detection query, and sets up an alerting pipeline. Integrates with AGH observe for unified monitoring. - **Status**: Dedicated MCP server available. #### Databricks + - **What it does**: Lakehouse platform with Unity Catalog. Workspace interaction, notebook execution, SQL queries. - **MCP server**: Community (`characat0/databricks`). Covered in official MCP servers repo. - **AGH use case**: Agent accesses Databricks workspace to run feature engineering notebooks, evaluates ML model performance, and generates a deployment plan. @@ -235,42 +259,49 @@ This document catalogs 50+ integrations across 9 categories with concrete AGH ag ### 5. AI/ML Platforms #### Hugging Face + - **What it does**: Model and dataset discovery, Hub access, repository browsing. Paired with Ollama for local inference. - **MCP server**: Community MCP for Hub access. - **AGH use case**: Agent searches Hugging Face for a suitable text embedding model, downloads it via Ollama, benchmarks it against the project's domain data, and configures it as AGH's embedding backend for memory consolidation. - **Status**: Community server. Can be combined with Ollama for fully local operation. #### Replicate + - **What it does**: Hosted model inference. Run any open-source model via API. - **MCP server**: Community MCP bridging Replicate's platform. - **AGH use case**: Agent uses Replicate to run image generation models for UI mockups, or to run specialized NLP models for code analysis that aren't available locally. - **Status**: Community server available. #### Modal + - **What it does**: Serverless GPU infrastructure. Python-first SDK, sub-second cold starts, instant autoscaling. - **MCP server**: No dedicated MCP server, but Modal is used to host and scale MCP servers. Python SDK defines everything in code. - **AGH use case**: Agent deploys a custom fine-tuned model on Modal for specialized code review, scales it up during active development hours, and scales to zero overnight. AGH extension wraps Modal's API for on-demand GPU access. - **Status**: No MCP server -- would need to be built as an AGH extension wrapping Modal's Python SDK. #### Together AI + - **What it does**: Open-source model inference at scale. - **MCP server**: Accessible via OpenRouter MCP server (proxy). - **AGH use case**: Agent switches between different open-source models (Llama, Mistral) via Together AI for cost-optimized inference on different task types. - **Status**: Via OpenRouter. No dedicated MCP server. #### Groq + - **What it does**: Ultra-fast LLM inference on custom LPU hardware. Lowest latency provider. - **MCP server**: Accessible via OpenRouter. Also supported by LibreChat MCP. - **AGH use case**: Agent uses Groq for rapid code analysis where latency matters -- e.g., real-time code review suggestions during pair programming sessions within AGH. - **Status**: Via OpenRouter. No dedicated MCP server. #### OpenRouter + - **What it does**: Unified API gateway to 100+ models from OpenAI, Anthropic, Together AI, Groq, and more. - **MCP server**: Multiple community implementations (`stabgan/openrouter-multimodal`, `heltonteixeira/openrouterai`). - **AGH use case**: AGH extension that lets agents dynamically select the best model for each sub-task -- fast model for quick questions, reasoning model for complex analysis, cheap model for bulk operations. Model selection becomes an AGH skill. - **Status**: Community servers available. High-value integration for AGH's multi-agent architecture. #### Ollama + - **What it does**: Run LLMs locally. Download and run Llama, Mistral, and hundreds of other models. - **MCP server**: Community MCP server. Dolphin MCP provides multi-provider support including Ollama. - **AGH use case**: AGH uses Ollama as a local inference backend for privacy-sensitive operations, embedding generation, and offline operation. Agent can run code analysis models locally without sending code to external APIs. @@ -281,42 +312,49 @@ This document catalogs 50+ integrations across 9 categories with concrete AGH ag ### 6. Web Search & Scraping #### Firecrawl + - **What it does**: Converts websites to LLM-ready markdown. Crawling, scraping, media parsing, actions (click/scroll/write). 98.3K GitHub stars. - **MCP server**: Official. Fastest MCP in benchmarks (7s avg, 83% accuracy). - **AGH use case**: Agent crawls a competitor's documentation site, converts to markdown, indexes in LanceDB for semantic search, and uses the knowledge to implement a similar feature. AGH memory stores the crawled content for team reference. - **Status**: Official. Best-in-class web scraping MCP. #### Tavily + - **What it does**: Search engine designed for LLMs and RAG. Concise, fact-checked results to reduce hallucinations. - **MCP server**: Community. npm and Python SDKs available. - **AGH use case**: Agent uses Tavily for research tasks -- finding API documentation, checking for known issues, discovering best practices -- and stores findings in AGH memory for the workspace. - **Status**: Community server. Acquired by Nebius. 1M+ downloads. #### Exa + - **What it does**: Neural search engine built for AI. Semantic understanding, natural language queries. - **MCP server**: Community. Part of MCP Omnisearch unified server. - **AGH use case**: Agent searches for code examples and implementations using semantic queries like "Go implementation of event sourcing with SQLite" and gets highly relevant results. Integrates with AGH skills for automated research workflows. - **Status**: Community server. Top-tier search quality. #### Brave Search + - **What it does**: Privacy-focused search with independent index. No tracking. $5/1K queries. - **MCP server**: Official. Listed in Anthropic's official MCP servers repo. - **AGH use case**: Agent performs web research for debugging (searching error messages, Stack Overflow, GitHub issues) without leaking user data to third parties. Privacy alignment with AGH's local-first philosophy. - **Status**: Official. Recommended for privacy-sensitive deployments. #### SerpAPI + - **What it does**: Google SERP scraping. Returns search metadata (titles, URLs, snippets). - **MCP server**: Community. - **AGH use case**: Agent scrapes Google search results for competitive analysis or SEO research tasks. - **Status**: Community. Reliability risk due to Google anti-scraping measures. #### Crawl4AI + - **What it does**: Async web crawling with content extraction, metadata retrieval, Google search. - **MCP server**: Community (`ritvij14/crawl4ai`). - **AGH use case**: Agent crawls a set of documentation pages to build a local knowledge base for a new library the team is adopting. - **Status**: Community server. #### Jina + - **What it does**: Multimodal search (text + images). `r.jina.ai/` URL prefix for instant markdown conversion. Custom neural search pipelines. - **MCP server**: Community. - **AGH use case**: Agent uses Jina Reader to quickly convert any URL to markdown for context, and Jina's search for finding relevant documentation across multiple sources. @@ -327,24 +365,28 @@ This document catalogs 50+ integrations across 9 categories with concrete AGH ag ### 7. Analytics #### Amplitude + - **What it does**: Deep behavioral analytics, cohort analysis, retention studies, predictive models. - **MCP server**: Community. Integrates with Amplitude analytics platform. - **AGH use case**: Agent queries Amplitude to analyze feature adoption metrics before and after a deployment, generates a report, and flags regressions for the team. - **Status**: Community server. Free tier up to 10M events/month. #### Mixpanel + - **What it does**: Product analytics with session replay, heatmaps, experiments, feature flags. - **MCP server**: Community unified analytics MCP (bridges GA4, Mixpanel, PostHog with natural language queries). Released March 2026. - **AGH use case**: Agent queries Mixpanel funnel data to identify where users drop off, correlates with recent code changes in AGH session history, and proposes UX fixes. - **Status**: Via unified analytics MCP server. #### PostHog + - **What it does**: All-in-one product analytics: event tracking, session replay, feature flags, A/B testing, error tracking, LLM analytics, surveys. Open source. - **MCP server**: Official. Supports latest MCP spec including Streamable HTTP. Free tier available. - **AGH use case**: Agent queries PostHog to check feature flag states, analyzes experiment results, and toggles flags based on performance data. LLM analytics track AGH's own AI usage costs and model performance. AGH observe events can be forwarded to PostHog for unified observability. - **Status**: Official. Best OSS analytics MCP. Free up to 1M events/month. #### Plausible + - **What it does**: Privacy-friendly, cookieless web analytics. Traffic stats, referrers, UTMs. No consent banners. - **MCP server**: Community. Queries traffic metrics, conversions, time-period comparisons. - **AGH use case**: Agent monitors website traffic after a deployment to detect anomalies (traffic drops, error spikes) and alerts the team. @@ -355,36 +397,42 @@ This document catalogs 50+ integrations across 9 categories with concrete AGH ag ### 8. Automation & Workflow #### Zapier + - **What it does**: Connects 8,000+ apps. Handles auth, rate limiting, parameter mapping. AI Agents and MCP support. - **MCP server**: Official. Agent can trigger any Zapier action (Slack message, Jira ticket, Google Sheet update). - **AGH use case**: Agent completing a code review automatically creates a Jira ticket for found issues, sends a Slack summary to the team channel, and updates a Google Sheet tracking code quality metrics -- all through a single AGH session. - **Status**: Official. Free tier (100 tasks/month). #### n8n + - **What it does**: Fair-code workflow automation. Self-hostable. 1,396 nodes (812 core + 584 community). Native MCP support. - **MCP server**: Community (`czlonkowski/n8n-mcp`). Provides AI assistants with node documentation and properties. - **AGH use case**: Agent designs and deploys an n8n workflow that monitors a GitHub repo for new issues, classifies them with AI, assigns to the right team member, and updates project tracking -- all orchestrated from an AGH session. Self-hosted n8n means data never leaves infrastructure. - **Status**: Community MCP. Self-hostable aligns with AGH's local-first philosophy. #### Make (formerly Integromat) + - **What it does**: Visual workflow builder with branching and parallel processing. 1,000+ integrations. - **MCP server**: Partial (via API). No dedicated MCP server found. - **AGH use case**: Agent creates a Make scenario to automate deployment notifications, connecting GitHub Actions to Slack and email. - **Status**: Would need to be built as an AGH extension wrapping Make's API. #### Pipedream + - **What it does**: Event-driven serverless automation. Run Python/Node.js/Go/Bash code. 2,700+ apps. - **MCP server**: Community. - **AGH use case**: Agent creates a Pipedream workflow that listens for webhooks from the production monitoring system, processes alerts with custom logic, and triggers remediation actions. - **Status**: Community server. #### Temporal + - **What it does**: Durable execution for mission-critical workflows. Workflow history, retry logic, long-running processes. - **MCP server**: Community (`mocksi/temporal-workflows`). - **AGH use case**: Agent designs and deploys Temporal workflows for data pipeline orchestration, implementing retry logic, compensation handlers, and monitoring. AGH tracks the workflow design decisions in memory. - **Status**: Community server on PulseMCP. #### Inngest + - **What it does**: Event-driven durable execution. Serverless + serverful. Retries, concurrency, throttling, rate limiting. Checkpointing for near-zero inter-step latency. AgentKit for multi-agent networks. - **MCP server**: Official. Dev Server MCP integration for AI-assisted development. Pre-built skills: `inngest-durable-functions`, `inngest-steps`, `inngest-flow-control`, `inngest-middleware`. - **AGH use case**: Agent uses Inngest to orchestrate a complex deployment pipeline: run tests, build artifacts, deploy to staging, run smoke tests, promote to production, send notifications. Each step is durable and retriable. AGH skills map to Inngest skills for a unified development experience. Inngest's AgentKit could power AGH's own multi-agent orchestration. @@ -395,24 +443,28 @@ This document catalogs 50+ integrations across 9 categories with concrete AGH ag ### 9. Multi-Database & Unified Servers #### Google MCP Toolbox for Databases + - **What it does**: Single MCP server supporting 40+ data sources: PostgreSQL, MySQL, SQL Server, Oracle, MongoDB, Redis, Elasticsearch, CockroachDB, ClickHouse, Couchbase, Neo4j, Snowflake, Trino, and more. - **MCP server**: Official (Google, `googleapis/mcp-toolbox`). Configuration via `tools.yaml`. - **AGH use case**: Single AGH extension that gives agents access to any database in the infrastructure. Agent can join data across PostgreSQL and MongoDB, or migrate between database engines, without needing separate extensions per database. - **Status**: Official. Highest coverage of any single MCP server. #### DBHub + - **What it does**: Unified gateway for PostgreSQL, MySQL, SQLite, DuckDB. Consistent table browsing, schema inspection, safe read-only SQL. - **MCP server**: Community. - **AGH use case**: Agent uses DBHub as a universal database explorer for development environments with multiple database types. - **Status**: Community. Good for development/staging environments. #### MCP Omnisearch + - **What it does**: Unified access to Tavily, Brave, Kagi, Exa, GitHub, Linkup, and Firecrawl through a single MCP interface. - **MCP server**: Community (`spences10/mcp-omnisearch`). - **AGH use case**: Agent performs multi-source research by querying multiple search providers in parallel, deduplicating and ranking results. Single extension replaces 7 individual search integrations. - **Status**: Community. High-value aggregation. #### weave-mcp + - **What it does**: Universal CLI for 11 vector databases: Weaviate, Supabase, MongoDB, Milvus, Chroma, Qdrant, Neo4j, Pinecone, OpenSearch, Elasticsearch. Dual transport (HTTP + stdio). - **MCP server**: Community (`maximilien/weave-mcp`, v0.4.0). - **AGH use case**: Agent manages vector stores across providers -- migrating embeddings from Chroma (prototyping) to Qdrant (production) or benchmarking retrieval quality across multiple vector databases. @@ -424,29 +476,29 @@ This document catalogs 50+ integrations across 9 categories with concrete AGH ag ### Tier 1: Build First (High-value, strong alignment with AGH architecture) -| Integration | Rationale | -|---|---| -| **LanceDB** | Embedded, zero-config, local-first -- perfect fit for AGH's memory/consolidation system. Could replace or augment SQLite for semantic search. | -| **Ollama** | Local model inference. Enables AGH to run without external API dependencies. Critical for air-gapped or privacy-sensitive deployments. | -| **Supabase** | Most mature BaaS MCP. Covers database, auth, storage, functions. High developer demand. | -| **Neon** | Branch-safe migrations. Best cloud Postgres MCP. Already has MCP tools for AGH's use case. | -| **Firecrawl** | Best web scraping MCP. Enables agents to ingest external knowledge. High benchmark scores. | -| **Inngest** | Durable workflow orchestration. Go SDK. Event-driven architecture aligns with AGH. AgentKit for multi-agent. | -| **n8n** | Self-hosted workflow automation. 1,396 nodes. Privacy alignment with AGH. | -| **Google MCP Toolbox** | 40+ data sources from one extension. Maximum coverage, minimum effort. | +| Integration | Rationale | +| ---------------------- | --------------------------------------------------------------------------------------------------------------------------------------------- | +| **LanceDB** | Embedded, zero-config, local-first -- perfect fit for AGH's memory/consolidation system. Could replace or augment SQLite for semantic search. | +| **Ollama** | Local model inference. Enables AGH to run without external API dependencies. Critical for air-gapped or privacy-sensitive deployments. | +| **Supabase** | Most mature BaaS MCP. Covers database, auth, storage, functions. High developer demand. | +| **Neon** | Branch-safe migrations. Best cloud Postgres MCP. Already has MCP tools for AGH's use case. | +| **Firecrawl** | Best web scraping MCP. Enables agents to ingest external knowledge. High benchmark scores. | +| **Inngest** | Durable workflow orchestration. Go SDK. Event-driven architecture aligns with AGH. AgentKit for multi-agent. | +| **n8n** | Self-hosted workflow automation. 1,396 nodes. Privacy alignment with AGH. | +| **Google MCP Toolbox** | 40+ data sources from one extension. Maximum coverage, minimum effort. | ### Tier 2: Build Next (Strong value, good ecosystem) -| Integration | Rationale | -|---|---| -| **Qdrant** | Official MCP. Best OSS vector search with free tier. Good for AGH semantic memory. | -| **OpenRouter** | Multi-model gateway. Lets agents pick optimal model per task. | -| **Brave Search** | Official MCP. Privacy-first search. Aligns with AGH values. | -| **Exa** | Neural search. Best semantic search quality for research tasks. | -| **PostHog** | Official MCP. OSS analytics. Could power AGH's own usage analytics. | -| **Redis** | Official MCP. Cache/state management for distributed AGH deployments. | -| **Zapier** | 8,000+ app connectors. Broadest automation reach. | -| **ClickHouse** | Real-time analytics. Could power AGH's observe/metrics backend. | +| Integration | Rationale | +| ---------------- | ---------------------------------------------------------------------------------- | +| **Qdrant** | Official MCP. Best OSS vector search with free tier. Good for AGH semantic memory. | +| **OpenRouter** | Multi-model gateway. Lets agents pick optimal model per task. | +| **Brave Search** | Official MCP. Privacy-first search. Aligns with AGH values. | +| **Exa** | Neural search. Best semantic search quality for research tasks. | +| **PostHog** | Official MCP. OSS analytics. Could power AGH's own usage analytics. | +| **Redis** | Official MCP. Cache/state management for distributed AGH deployments. | +| **Zapier** | 8,000+ app connectors. Broadest automation reach. | +| **ClickHouse** | Real-time analytics. Could power AGH's observe/metrics backend. | ### Tier 3: Consider Later (Niche or lower priority) diff --git a/.compozy/tasks/ext-ideas/research/integrations_devops.md b/docs/ideas/ext-ideas/research/integrations_devops.md similarity index 82% rename from .compozy/tasks/ext-ideas/research/integrations_devops.md rename to docs/ideas/ext-ideas/research/integrations_devops.md index b772a2218..037caf175 100644 --- a/.compozy/tasks/ext-ideas/research/integrations_devops.md +++ b/docs/ideas/ext-ideas/research/integrations_devops.md @@ -15,45 +15,45 @@ AGH extensions can leverage this ecosystem by wrapping existing MCP servers or b ## Integration Summary Table -| Category | Tool | MCP Server Exists? | Maturity | AGH Extension Priority | -|---|---|---|---|---| -| **Version Control** | GitHub | Yes (official + community) | Production | HIGH | -| | GitLab | Yes (official) | Production | HIGH | -| | Bitbucket | Yes (Atlassian remote MCP) | Beta | MEDIUM | -| **Project Management** | Linear | Yes (community) | Production | HIGH | -| | Jira | Yes (Atlassian remote MCP + mcp-atlassian) | Production | HIGH | -| | Shortcut | No official server found | N/A | LOW | -| | Notion | Yes (official hosted + self-hosted) | Production | HIGH | -| **CI/CD** | GitHub Actions | Yes (community) | Production | HIGH | -| | CircleCI | Yes (official) | Production | MEDIUM | -| | Jenkins | Yes (official plugin) | Production | MEDIUM | -| | ArgoCD | Yes (K8s MCP Toolkit) | Production | HIGH | -| **Deployment Platforms** | Vercel | Yes (official handler) | Production | MEDIUM | -| | Netlify | Yes (community) | Community | LOW | -| | Railway | Yes (official) | Production | MEDIUM | -| | Fly.io | Yes (unified deployment MCP) | Community | LOW | -| **Code Quality** | SonarQube | Yes (official by SonarSource) | Production | HIGH | -| | Semgrep | Yes (built into binary) | Production | HIGH | -| | Snyk | Yes (official, 11 tools) | Production | HIGH | -| | Trivy | Yes (plugin) | Production | MEDIUM | -| | Dependabot/Renovate | No MCP server | N/A | MEDIUM (build) | -| **Infrastructure** | Terraform | Yes (HashiCorp official) | Production | HIGH | -| | Pulumi | Yes (official) | Production | MEDIUM | -| | Docker | Yes (community) | Community | MEDIUM | -| | Kubernetes | Yes (multiple: kubectl, k8m, Lens) | Production | HIGH | -| | AWS | Yes (official, 60+ servers) | Production | HIGH | -| | GCP | Yes (official, preview) | Preview | MEDIUM | -| | Azure | Yes (official) | Production | MEDIUM | -| **Monitoring/Observability** | Sentry | Yes (official + monitoring) | Production | HIGH | -| | Datadog | Yes (official, GA March 2026) | Production | HIGH | -| | Grafana | Yes (official) | Production | HIGH | -| | PagerDuty | Yes (community) | Community | HIGH | -| **Documentation** | Notion | Yes (official) | Production | HIGH | -| | Confluence | Yes (Atlassian remote MCP) | Beta | MEDIUM | -| | Mintlify | Yes (auto-generated) | Production | LOW | -| | ReadMe | Yes (auto-generated) | Production | LOW | -| **Communication** | Slack | Yes (official) | Production | HIGH | -| | Discord | No official (5 community servers) | Community | LOW | +| Category | Tool | MCP Server Exists? | Maturity | AGH Extension Priority | +| ---------------------------- | ------------------- | ------------------------------------------ | ---------- | ---------------------- | +| **Version Control** | GitHub | Yes (official + community) | Production | HIGH | +| | GitLab | Yes (official) | Production | HIGH | +| | Bitbucket | Yes (Atlassian remote MCP) | Beta | MEDIUM | +| **Project Management** | Linear | Yes (community) | Production | HIGH | +| | Jira | Yes (Atlassian remote MCP + mcp-atlassian) | Production | HIGH | +| | Shortcut | No official server found | N/A | LOW | +| | Notion | Yes (official hosted + self-hosted) | Production | HIGH | +| **CI/CD** | GitHub Actions | Yes (community) | Production | HIGH | +| | CircleCI | Yes (official) | Production | MEDIUM | +| | Jenkins | Yes (official plugin) | Production | MEDIUM | +| | ArgoCD | Yes (K8s MCP Toolkit) | Production | HIGH | +| **Deployment Platforms** | Vercel | Yes (official handler) | Production | MEDIUM | +| | Netlify | Yes (community) | Community | LOW | +| | Railway | Yes (official) | Production | MEDIUM | +| | Fly.io | Yes (unified deployment MCP) | Community | LOW | +| **Code Quality** | SonarQube | Yes (official by SonarSource) | Production | HIGH | +| | Semgrep | Yes (built into binary) | Production | HIGH | +| | Snyk | Yes (official, 11 tools) | Production | HIGH | +| | Trivy | Yes (plugin) | Production | MEDIUM | +| | Dependabot/Renovate | No MCP server | N/A | MEDIUM (build) | +| **Infrastructure** | Terraform | Yes (HashiCorp official) | Production | HIGH | +| | Pulumi | Yes (official) | Production | MEDIUM | +| | Docker | Yes (community) | Community | MEDIUM | +| | Kubernetes | Yes (multiple: kubectl, k8m, Lens) | Production | HIGH | +| | AWS | Yes (official, 60+ servers) | Production | HIGH | +| | GCP | Yes (official, preview) | Preview | MEDIUM | +| | Azure | Yes (official) | Production | MEDIUM | +| **Monitoring/Observability** | Sentry | Yes (official + monitoring) | Production | HIGH | +| | Datadog | Yes (official, GA March 2026) | Production | HIGH | +| | Grafana | Yes (official) | Production | HIGH | +| | PagerDuty | Yes (community) | Community | HIGH | +| **Documentation** | Notion | Yes (official) | Production | HIGH | +| | Confluence | Yes (Atlassian remote MCP) | Beta | MEDIUM | +| | Mintlify | Yes (auto-generated) | Production | LOW | +| | ReadMe | Yes (auto-generated) | Production | LOW | +| **Communication** | Slack | Yes (official) | Production | HIGH | +| | Discord | No official (5 community servers) | Community | LOW | --- @@ -68,6 +68,7 @@ AGH extensions can leverage this ecosystem by wrapping existing MCP servers or b **Existing MCP servers:** Multiple options exist. The official GitHub MCP server plus community alternatives like `github-mcp-server` and `ko1ynnky/github-actions-mcp-server` (Actions-specific). The GitHub MCP is the most widely adopted MCP server in the ecosystem. **AGH use case -- Autonomous PR Lifecycle:** + 1. Agent receives a task via session (e.g., from Slack or Linear ticket). 2. Agent clones repo, creates branch, implements changes. 3. Agent opens PR via GitHub MCP, filling description with context from AGH session memory. @@ -84,6 +85,7 @@ AGH extensions can leverage this ecosystem by wrapping existing MCP servers or b **Existing MCP server:** Official GitLab MCP server (`gitlab-org/editor-extensions/gitlab-mcp-server`), plus community server by zereight. GitLab also acts as an MCP client via Duo, connecting to external MCP servers like Jira and Slack. **AGH use case -- Pipeline Failure Triage:** + 1. GitLab webhook triggers AGH session when a pipeline fails. 2. Agent queries GitLab MCP for pipeline logs and failed job details. 3. Agent reads the failing test/build output, cross-references with recent MR diffs. @@ -103,6 +105,7 @@ AGH extensions can leverage this ecosystem by wrapping existing MCP servers or b **Existing MCP server:** Community server (`tacticlaunch/mcp-linear`) with GraphQL API integration. Supports SSE transport. **AGH use case -- Automated Sprint Ops:** + 1. Agent monitors Linear for new issues assigned to it (or a team). 2. When a bug ticket arrives, agent reads the description, searches codebase, proposes a fix. 3. Agent creates a PR (via GitHub MCP), links it to the Linear issue, and moves the issue to "In Progress." @@ -118,6 +121,7 @@ AGH extensions can leverage this ecosystem by wrapping existing MCP servers or b **Existing MCP servers:** Atlassian's official Remote MCP Server (beta, hosted on Cloudflare, first-party partnership with Anthropic). Also `sooperset/mcp-atlassian` open-source server covering both Jira and Confluence. Hainan Zhao's `mcp-gitlab-jira` for unified GitLab+Jira workflows. **AGH use case -- Cross-Platform Knowledge Worker:** + 1. Agent receives a Jira ticket about a production issue. 2. Agent searches Confluence for relevant runbooks and architecture docs. 3. Agent investigates the codebase, identifies the root cause. @@ -134,6 +138,7 @@ AGH extensions can leverage this ecosystem by wrapping existing MCP servers or b **Existing MCP server:** Official hosted server at `mcp.notion.com/sse` with OAuth, plus self-hosted option via `@notionhq/notion-mcp-server` (npm). Version 2.0.0 uses the 2025-09-03 API with data sources as primary abstraction. **AGH use case -- Living Documentation Agent:** + 1. Agent monitors code changes (via GitHub MCP) and automatically updates Notion docs. 2. When a new API endpoint is added, agent generates documentation and creates a Notion page. 3. Agent searches Notion for existing docs to avoid duplication, updates cross-references. @@ -153,6 +158,7 @@ AGH extensions can leverage this ecosystem by wrapping existing MCP servers or b **Existing MCP server:** Community server (`ko1ynnky/github-actions-mcp-server`) with complete workflow management. Compatible with Claude Desktop, Codeium, Windsurf. **AGH use case -- CI Guardian Agent:** + 1. Agent continuously monitors GitHub Actions for the team's repositories. 2. On build failure, agent reads logs, identifies the failing step, and diagnoses the issue. 3. Agent either pushes a fix (for known patterns stored in AGH memory) or creates an issue with diagnosis. @@ -168,6 +174,7 @@ AGH extensions can leverage this ecosystem by wrapping existing MCP servers or b **Existing MCP server:** Official CircleCI MCP server (`CircleCI-Public/mcp-server-circleci`). Features include failure diagnosis, flaky test detection, and interactive rollback guidance. **AGH use case -- Intelligent Build Doctor:** + 1. CircleCI webhook notifies AGH of a failed build. 2. Agent queries CircleCI MCP for structured error logs and test results. 3. Agent correlates failure with recent commits (via GitHub MCP). @@ -194,6 +201,7 @@ AGH extensions can leverage this ecosystem by wrapping existing MCP servers or b **Existing MCP server:** K8s MCP Server Toolkit provides unified kubectl, helm, istioctl, and argocd tools. Supports AWS EKS, Google GKE, and Azure AKS. **AGH use case -- GitOps Deployment Agent:** + 1. Agent monitors Git repository for merged PRs to the main branch. 2. Agent verifies ArgoCD sync status after deployment. 3. If sync fails, agent queries ArgoCD for error details and Kubernetes events. @@ -212,11 +220,13 @@ AGH extensions can leverage this ecosystem by wrapping existing MCP servers or b **What it does:** Deploy applications, manage environments, view deployment logs, rollback deployments, manage environment variables, check health status. **Existing MCP servers:** + - **Vercel:** Official MCP Handler adapter for Next.js/Nuxt/Svelte. - **Railway:** Official MCP server (released Jan 2026) for deployment, service management, environment config. - **Unified Deployment MCP:** Covers Vercel, Render, Railway, and Fly.io from a single endpoint with 9 tools. **AGH use case -- Deploy & Monitor Agent:** + 1. Agent receives "deploy to staging" command in session. 2. Agent triggers deployment via platform MCP (Railway/Vercel). 3. Agent monitors deployment progress and health checks. @@ -237,6 +247,7 @@ AGH extensions can leverage this ecosystem by wrapping existing MCP servers or b **Existing MCP server:** Official by SonarSource (`SonarSource/sonarqube-mcp-server`). 423 stars, 321 commits. Requires JDK 21+, also available as Docker image. Integrates with Claude Code, Cursor, Windsurf. AI CodeFix generates LLM-powered fix suggestions. **AGH use case -- Quality Gate Agent:** + 1. After PR creation, agent triggers SonarQube analysis. 2. Agent queries SonarQube MCP for new issues introduced by the PR. 3. Agent auto-fixes simple issues (code smells, formatting) and pushes a commit. @@ -253,6 +264,7 @@ AGH extensions can leverage this ecosystem by wrapping existing MCP servers or b **Existing MCP server:** Official Snyk MCP at v1.6.1 (March 2026). Also `snyk/agent-scan` (1.9k stars) for scanning MCP servers themselves for security issues. **AGH use case -- Security Sentinel Agent:** + 1. Agent runs nightly security scans via Snyk MCP across all project repositories. 2. Agent triages findings by severity -- critical/high vulns get immediate PRs with fixes. 3. Agent scans container images before deployment, blocking vulnerable images. @@ -269,6 +281,7 @@ AGH extensions can leverage this ecosystem by wrapping existing MCP servers or b **Existing MCP server:** The standalone MCP server was archived Oct 2025; MCP functionality is now built into the Semgrep binary itself. 639 stars (highest in category). **AGH use case -- Code Review Copilot:** + 1. Agent runs Semgrep on every PR before human review. 2. Agent applies custom rule sets based on project type (stored in AGH skills). 3. Agent provides inline comments on the PR with fix suggestions. @@ -293,6 +306,7 @@ AGH extensions can leverage this ecosystem by wrapping existing MCP servers or b **Existing MCP server:** No MCP server exists for either tool. Notable gap identified in the ecosystem. **AGH use case -- Dependency Guardian Agent:** + 1. Agent monitors dependency versions across all repos. 2. Agent creates grouped PRs for dependency updates (similar to Renovate's grouping). 3. Agent runs security scans (via Snyk MCP) on proposed updates before creating PRs. @@ -313,6 +327,7 @@ AGH extensions can leverage this ecosystem by wrapping existing MCP servers or b **Existing MCP server:** Official HashiCorp server (`hashicorp/terraform-mcp-server`). Available as Docker image. AWS Labs also offers a Terraform MCP for AWS best practices and Checkov compliance. **AGH use case -- Infrastructure Agent:** + 1. Agent receives infrastructure request (e.g., "spin up a staging database"). 2. Agent searches Terraform registry for appropriate modules. 3. Agent generates Terraform HCL, runs `terraform plan`, presents the plan for approval. @@ -337,12 +352,14 @@ AGH extensions can leverage this ecosystem by wrapping existing MCP servers or b **What it does:** kubectl operations, pod management, log access, resource monitoring, debugging, helm chart management. Multiple servers available for different needs. **Existing MCP servers:** + - **kubectl-mcp-server:** Direct kubectl access -- diagnose pod crashes, read logs, check events. - **k8m (multi-cluster):** 50+ tools for managing dev/staging/prod across 10+ clusters. - **Lens MCP Server:** Native EKS/AKS integration, visual cluster management. - **K8s MCP Toolkit:** Unified kubectl + helm + istioctl + argocd. **AGH use case -- Kubernetes SRE Agent:** + 1. Agent receives alert from PagerDuty/Grafana about pod crash loop. 2. Agent queries K8s MCP for pod events, logs, resource limits, and node health. 3. Agent identifies root cause (e.g., OOM kill, failed health check, bad config). @@ -363,6 +380,7 @@ AGH extensions can leverage this ecosystem by wrapping existing MCP servers or b **Multi-cloud:** Cloud Pilot MCP supports AWS/Azure/GCP/Alibaba Cloud with 51,900+ operations and OpenTofu integration. **AGH use case -- Cloud Operations Agent:** + 1. Agent monitors cloud costs and resource utilization. 2. Agent identifies unused resources and proposes cleanup (with cost savings estimates). 3. Agent provisions new resources based on Terraform/Pulumi plans. @@ -382,6 +400,7 @@ AGH extensions can leverage this ecosystem by wrapping existing MCP servers or b **Existing MCP server:** Official Sentry MCP server. Sentry also offers MCP server monitoring (beta, Aug 2025) built on OpenTelemetry for observing MCP servers themselves. Their own MCP server handles 50M requests/month. **AGH use case -- Error Response Agent:** + 1. Sentry webhook triggers AGH session on new critical error. 2. Agent queries Sentry MCP for full stack trace, affected users, error frequency. 3. Agent searches codebase for the relevant code, identifies the bug. @@ -399,6 +418,7 @@ AGH extensions can leverage this ecosystem by wrapping existing MCP servers or b **Existing MCP server:** Official Datadog MCP Server (GA March 2026). Covers LLM Observability, product analytics, Cloud Network Monitoring, security, software delivery, synthetics, and workflow automation. Also community server (`shelfio/datadog-mcp`). **AGH use case -- Observability Investigation Agent:** + 1. Datadog alert triggers AGH session (e.g., p99 latency spike). 2. Agent queries Datadog MCP for relevant traces, logs, and metrics. 3. Agent correlates the spike with recent deployments (via GitHub/ArgoCD MCP). @@ -415,6 +435,7 @@ AGH extensions can leverage this ecosystem by wrapping existing MCP servers or b **Existing MCP server:** Official Grafana MCP Server. AWS DevOps Agent includes a built-in Grafana MCP server supporting self-managed, Grafana Cloud, and Amazon Managed Grafana. **AGH use case -- Dashboard-Driven Diagnosis:** + 1. Agent queries Grafana dashboards for anomalies across services. 2. Agent correlates metrics with deployment timelines. 3. Agent generates incident summaries with relevant graphs/data for team review. @@ -429,6 +450,7 @@ AGH extensions can leverage this ecosystem by wrapping existing MCP servers or b **Existing MCP server:** Community MCP server. AWS DevOps Agent has native PagerDuty integration. **AGH use case -- On-Call Copilot:** + 1. PagerDuty alert triggers AGH session for on-call engineer. 2. Agent acknowledges the alert, gathers context from Datadog/Grafana/Sentry. 3. Agent runs diagnostic commands via K8s MCP, reads application logs. @@ -449,6 +471,7 @@ AGH extensions can leverage this ecosystem by wrapping existing MCP servers or b **Existing MCP server:** Official Slack MCP server hosted at `mcp.slack.com` with OAuth. GA early 2026. Note: rate limits changed May 2025 for non-Marketplace apps. **AGH use case -- Team Communication Hub:** + 1. Developers interact with AGH agents via Slack messages. 2. Agent posts deployment notifications, build status, and PR summaries to relevant channels. 3. Agent monitors channels for questions about code/architecture, answers using codebase knowledge. @@ -468,6 +491,7 @@ AGH extensions can leverage this ecosystem by wrapping existing MCP servers or b **Existing MCP servers:** Both auto-generate MCP endpoints. Mintlify hosts at `/mcp` path of any docs site. ~48% of documentation traffic is already from AI agents. **AGH use case -- Documentation-Aware Agent:** + 1. Agent queries Mintlify/ReadMe MCP to understand third-party API docs before writing integration code. 2. Agent uses documentation context to generate accurate API client code. 3. Agent validates generated code against API docs automatically. @@ -481,6 +505,7 @@ AGH extensions can leverage this ecosystem by wrapping existing MCP servers or b Based on this research, the following extension bundles represent the highest-value integrations: ### Bundle 1: Development Lifecycle (Ship Code) + - **GitHub/GitLab MCP** -- version control, PRs, code review - **Linear/Jira MCP** -- issue tracking, sprint management - **GitHub Actions/CircleCI MCP** -- CI pipeline management @@ -489,6 +514,7 @@ Based on this research, the following extension bundles represent the highest-va **Value prop:** Agent takes a ticket from "To Do" to "Merged PR" autonomously, with quality and security checks at every step. ### Bundle 2: Incident Response (Fix Production) + - **Sentry/Datadog MCP** -- error/performance alerting - **PagerDuty MCP** -- incident management - **Grafana MCP** -- metrics/dashboard access @@ -498,6 +524,7 @@ Based on this research, the following extension bundles represent the highest-va **Value prop:** Agent receives production alert, investigates across observability stack, diagnoses root cause, applies fix or rollback, resolves incident, posts summary -- reducing MTTR from hours to minutes. ### Bundle 3: Infrastructure Operations (Manage Infra) + - **Terraform/Pulumi MCP** -- infrastructure as code - **AWS/GCP/Azure MCP** -- cloud resource management - **Kubernetes + ArgoCD MCP** -- deployment and orchestration @@ -506,6 +533,7 @@ Based on this research, the following extension bundles represent the highest-va **Value prop:** Agent manages infrastructure lifecycle from provisioning to deployment to scaling, with cost optimization and compliance checks. ### Bundle 4: Knowledge & Documentation (Stay Informed) + - **Notion/Confluence MCP** -- knowledge base management - **Slack MCP** -- team communication context - **GitHub MCP** -- code-level documentation @@ -519,13 +547,13 @@ Based on this research, the following extension bundles represent the highest-va These integrations have no existing MCP server and represent differentiation opportunities for AGH: -| Integration | What to Build | Why | -|---|---|---| -| **Dependabot/Renovate** | Dependency update management with security scanning | No MCP server exists; combining updates + security is unique | -| **GitHub Security Alerts** | Query Dependabot alerts, secret scanning, code scanning results | Notable gap -- no MCP for GitHub's security features | -| **Shortcut** | Issue tracking for teams using Shortcut | No MCP server found; growing user base | -| **Incident.io** | Modern incident management (replacing Opsgenie) | Growing platform, no MCP server found | -| **LaunchDarkly** | Feature flag management tied to deployments | Agent-controlled progressive rollouts | +| Integration | What to Build | Why | +| -------------------------- | --------------------------------------------------------------- | ------------------------------------------------------------ | +| **Dependabot/Renovate** | Dependency update management with security scanning | No MCP server exists; combining updates + security is unique | +| **GitHub Security Alerts** | Query Dependabot alerts, secret scanning, code scanning results | Notable gap -- no MCP for GitHub's security features | +| **Shortcut** | Issue tracking for teams using Shortcut | No MCP server found; growing user base | +| **Incident.io** | Modern incident management (replacing Opsgenie) | Growing platform, no MCP server found | +| **LaunchDarkly** | Feature flag management tied to deployments | Agent-controlled progressive rollouts | --- diff --git a/.compozy/tasks/ext-ideas/research/integrations_specialized.md b/docs/ideas/ext-ideas/research/integrations_specialized.md similarity index 75% rename from .compozy/tasks/ext-ideas/research/integrations_specialized.md rename to docs/ideas/ext-ideas/research/integrations_specialized.md index 4f9f8b6b7..04df60771 100644 --- a/.compozy/tasks/ext-ideas/research/integrations_specialized.md +++ b/docs/ideas/ext-ideas/research/integrations_specialized.md @@ -7,58 +7,58 @@ ## Summary Table -| # | Integration | Category | MCP Server Exists? | Effort to Build Extension | Priority | -|---|---|---|---|---|---| -| 1 | Playwright | Browser Automation | Yes (official, Microsoft) | Low -- wrap existing MCP | High | -| 2 | Browserbase + Stagehand | Browser Automation | Yes (official) | Low -- wrap existing MCP | High | -| 3 | Puppeteer | Browser Automation | Yes (community) | Low -- wrap existing MCP | Medium | -| 4 | BrowserUse | Browser Automation | Yes (open source) | Low -- wrap existing MCP | Medium | -| 5 | YouTube (transcripts + analytics) | Media / Content | Yes (multiple) | Low -- wrap existing MCP | High | -| 6 | Spotify | Media / Content | Yes (multiple) | Low -- wrap existing MCP | Medium | -| 7 | ElevenLabs (TTS) | Media / Content | Yes | Low -- wrap existing MCP | Medium | -| 8 | DALL-E 3 / GPT Image | Media / Content | Yes | Low -- wrap existing MCP | High | -| 9 | Flux (image gen) | Media / Content | Yes (Replicate-based) | Low -- wrap existing MCP | Medium | -| 10 | Luma AI (video gen) | Media / Content | Yes | Low -- wrap existing MCP | Medium | -| 11 | Remotion (programmatic video) | Media / Content | No | Medium -- build from API | Low | -| 12 | Pod Engine (podcast intel) | Media / Content | Yes (official) | Low -- wrap existing MCP | Low | -| 13 | Stripe | Finance | Yes (official, 25 tools) | Low -- wrap existing MCP | High | -| 14 | Plaid | Finance | No dedicated MCP | Medium -- build from REST API | Medium | -| 15 | Coinbase | Finance | Yes (official) | Low -- wrap existing MCP | Medium | -| 16 | Stock Market (Yahoo Finance, etc.) | Finance | Yes (multiple) | Low -- wrap existing MCP | Medium | -| 17 | CoinGecko / CoinMarketCap | Finance | Yes | Low -- wrap existing MCP | Low | -| 18 | Home Assistant | IoT / Smart Home | Yes (official + community) | Low -- wrap existing MCP | High | -| 19 | MQTT (via Home Assistant) | IoT / Smart Home | Indirect (via HA) | Medium -- build or compose | Medium | -| 20 | AWS S3 | Cloud Storage | Yes (official + community) | Low -- wrap existing MCP | High | -| 21 | Cloudflare R2 | Cloud Storage | Yes (S3-compat MCP) | Low -- wrap existing MCP | Medium | -| 22 | Backblaze B2 | Cloud Storage | Partial (S3-compat) | Low -- use S3-compat MCP | Low | -| 23 | Google Cloud Storage | Cloud Storage | Partial (S3-compat interop) | Medium -- build or adapt | Low | -| 24 | Snyk | Security | Yes (official, 11 tools) | Low -- wrap existing MCP | High | -| 25 | SonarQube | Security | Yes (official, 423 stars) | Low -- wrap existing MCP | High | -| 26 | OWASP ZAP (DAST) | Security | Partial (via DevSecOps-MCP) | Medium -- bundle aggregator | Medium | -| 27 | Semgrep | Security | Yes (official) | Low -- wrap existing MCP | Medium | -| 28 | Twitter/X | Social Media | Yes (multiple, fragmented) | Low -- wrap existing MCP | High | -| 29 | Bluesky (AT Protocol) | Social Media | Yes (57 tools) | Low -- wrap existing MCP | Medium | -| 30 | LinkedIn | Social Media | Partial (via aggregators) | Medium -- aggregator or build | Medium | -| 31 | Reddit | Social Media | Partial (via aggregators) | Medium -- aggregator or build | Low | -| 32 | Mastodon | Social Media | Partial (via multi-platform) | Low -- wrap existing | Low | -| 33 | SendGrid | Email Marketing | Yes (official, 14+ tools) | Low -- wrap existing MCP | Medium | -| 34 | Resend | Email Marketing | Yes (community) | Low -- wrap existing MCP | Medium | -| 35 | Mailchimp | Email Marketing | Yes (community) | Low -- wrap existing MCP | Low | -| 36 | ConvertKit (Kit) | Email Marketing | No | Medium -- build from API | Low | -| 37 | Google Maps | Maps / Location | Yes (official + community) | Low -- wrap existing MCP | High | -| 38 | Mapbox | Maps / Location | Yes (official) | Low -- wrap existing MCP | Medium | -| 39 | OpenStreetMap | Maps / Location | Yes (multiple) | Low -- wrap existing MCP | Medium | -| 40 | Blender 3D | Niche / Creative | Yes | Low -- wrap existing MCP | Low | -| 41 | Unity / Unreal Engine | Niche / Game Dev | Yes (both) | Low -- wrap existing MCP | Low | -| 42 | Minecraft | Niche / Game Dev | Yes | Low -- wrap existing MCP | Low | -| 43 | ROS (Robot OS) | Niche / Robotics | Yes | Low -- wrap existing MCP | Low | -| 44 | OctoEverywhere (3D Printer) | Niche / Hardware | Yes | Low -- wrap existing MCP | Low | -| 45 | KiCAD (PCB design) | Niche / Engineering | Yes | Low -- wrap existing MCP | Low | -| 46 | Ableton Live | Niche / Music | Yes | Low -- wrap existing MCP | Low | -| 47 | Salesforce CRM | Niche / Business | Yes | Low -- wrap existing MCP | Medium | -| 48 | HubSpot CRM | Niche / Business | Yes | Low -- wrap existing MCP | Medium | -| 49 | Odoo ERP | Niche / Business | Yes | Low -- wrap existing MCP | Low | -| 50 | Meta-MCP (Magg) | Niche / Agent Infra | Yes | Low -- wrap existing MCP | Medium | +| # | Integration | Category | MCP Server Exists? | Effort to Build Extension | Priority | +| --- | ---------------------------------- | ------------------- | ---------------------------- | ----------------------------- | -------- | +| 1 | Playwright | Browser Automation | Yes (official, Microsoft) | Low -- wrap existing MCP | High | +| 2 | Browserbase + Stagehand | Browser Automation | Yes (official) | Low -- wrap existing MCP | High | +| 3 | Puppeteer | Browser Automation | Yes (community) | Low -- wrap existing MCP | Medium | +| 4 | BrowserUse | Browser Automation | Yes (open source) | Low -- wrap existing MCP | Medium | +| 5 | YouTube (transcripts + analytics) | Media / Content | Yes (multiple) | Low -- wrap existing MCP | High | +| 6 | Spotify | Media / Content | Yes (multiple) | Low -- wrap existing MCP | Medium | +| 7 | ElevenLabs (TTS) | Media / Content | Yes | Low -- wrap existing MCP | Medium | +| 8 | DALL-E 3 / GPT Image | Media / Content | Yes | Low -- wrap existing MCP | High | +| 9 | Flux (image gen) | Media / Content | Yes (Replicate-based) | Low -- wrap existing MCP | Medium | +| 10 | Luma AI (video gen) | Media / Content | Yes | Low -- wrap existing MCP | Medium | +| 11 | Remotion (programmatic video) | Media / Content | No | Medium -- build from API | Low | +| 12 | Pod Engine (podcast intel) | Media / Content | Yes (official) | Low -- wrap existing MCP | Low | +| 13 | Stripe | Finance | Yes (official, 25 tools) | Low -- wrap existing MCP | High | +| 14 | Plaid | Finance | No dedicated MCP | Medium -- build from REST API | Medium | +| 15 | Coinbase | Finance | Yes (official) | Low -- wrap existing MCP | Medium | +| 16 | Stock Market (Yahoo Finance, etc.) | Finance | Yes (multiple) | Low -- wrap existing MCP | Medium | +| 17 | CoinGecko / CoinMarketCap | Finance | Yes | Low -- wrap existing MCP | Low | +| 18 | Home Assistant | IoT / Smart Home | Yes (official + community) | Low -- wrap existing MCP | High | +| 19 | MQTT (via Home Assistant) | IoT / Smart Home | Indirect (via HA) | Medium -- build or compose | Medium | +| 20 | AWS S3 | Cloud Storage | Yes (official + community) | Low -- wrap existing MCP | High | +| 21 | Cloudflare R2 | Cloud Storage | Yes (S3-compat MCP) | Low -- wrap existing MCP | Medium | +| 22 | Backblaze B2 | Cloud Storage | Partial (S3-compat) | Low -- use S3-compat MCP | Low | +| 23 | Google Cloud Storage | Cloud Storage | Partial (S3-compat interop) | Medium -- build or adapt | Low | +| 24 | Snyk | Security | Yes (official, 11 tools) | Low -- wrap existing MCP | High | +| 25 | SonarQube | Security | Yes (official, 423 stars) | Low -- wrap existing MCP | High | +| 26 | OWASP ZAP (DAST) | Security | Partial (via DevSecOps-MCP) | Medium -- bundle aggregator | Medium | +| 27 | Semgrep | Security | Yes (official) | Low -- wrap existing MCP | Medium | +| 28 | Twitter/X | Social Media | Yes (multiple, fragmented) | Low -- wrap existing MCP | High | +| 29 | Bluesky (AT Protocol) | Social Media | Yes (57 tools) | Low -- wrap existing MCP | Medium | +| 30 | LinkedIn | Social Media | Partial (via aggregators) | Medium -- aggregator or build | Medium | +| 31 | Reddit | Social Media | Partial (via aggregators) | Medium -- aggregator or build | Low | +| 32 | Mastodon | Social Media | Partial (via multi-platform) | Low -- wrap existing | Low | +| 33 | SendGrid | Email Marketing | Yes (official, 14+ tools) | Low -- wrap existing MCP | Medium | +| 34 | Resend | Email Marketing | Yes (community) | Low -- wrap existing MCP | Medium | +| 35 | Mailchimp | Email Marketing | Yes (community) | Low -- wrap existing MCP | Low | +| 36 | ConvertKit (Kit) | Email Marketing | No | Medium -- build from API | Low | +| 37 | Google Maps | Maps / Location | Yes (official + community) | Low -- wrap existing MCP | High | +| 38 | Mapbox | Maps / Location | Yes (official) | Low -- wrap existing MCP | Medium | +| 39 | OpenStreetMap | Maps / Location | Yes (multiple) | Low -- wrap existing MCP | Medium | +| 40 | Blender 3D | Niche / Creative | Yes | Low -- wrap existing MCP | Low | +| 41 | Unity / Unreal Engine | Niche / Game Dev | Yes (both) | Low -- wrap existing MCP | Low | +| 42 | Minecraft | Niche / Game Dev | Yes | Low -- wrap existing MCP | Low | +| 43 | ROS (Robot OS) | Niche / Robotics | Yes | Low -- wrap existing MCP | Low | +| 44 | OctoEverywhere (3D Printer) | Niche / Hardware | Yes | Low -- wrap existing MCP | Low | +| 45 | KiCAD (PCB design) | Niche / Engineering | Yes | Low -- wrap existing MCP | Low | +| 46 | Ableton Live | Niche / Music | Yes | Low -- wrap existing MCP | Low | +| 47 | Salesforce CRM | Niche / Business | Yes | Low -- wrap existing MCP | Medium | +| 48 | HubSpot CRM | Niche / Business | Yes | Low -- wrap existing MCP | Medium | +| 49 | Odoo ERP | Niche / Business | Yes | Low -- wrap existing MCP | Low | +| 50 | Meta-MCP (Magg) | Niche / Agent Infra | Yes | Low -- wrap existing MCP | Medium | --- @@ -113,6 +113,7 @@ **What it does:** Extract video transcripts, search video content, access channel analytics, manage playlists. Two architectural approaches: yt-dlp-based (no API key needed, transcript-only) and YouTube Data API v3-based (full metadata, 10K daily quota). **MCP servers:** + - `anaisbetts/mcp-youtube` -- most popular (490+ stars), yt-dlp-based, transcript extraction - `kimtaeyoon83/mcp-server-youtube-transcript` -- Python-based, pagination for long transcripts, proxy support - YouTube Data API servers for full metadata access @@ -126,6 +127,7 @@ **What it does:** Control playback, manage playlists, search catalog, analyze listening patterns. Premium required for queue operations. **MCP servers:** + - `gupta-kush/spotify-mcp` -- 93 tools including smart shuffle, vibe analysis, artist network mapping - `allensy/spotify-mcp` -- Dockerized, basic playback and search - Composio Spotify MCP -- via CLI integration @@ -236,6 +238,7 @@ **What it does:** Real-time prices, fundamentals, earnings data, stock screening, and historical charts. **MCP servers:** + - Yahoo Finance MCP -- real-time and historical market data - Financial Datasets MCP -- comprehensive financial analyst toolkit - 11+ servers ranked by Lambda Finance for different capabilities @@ -249,6 +252,7 @@ **What it does:** Crypto market data, token prices, market caps, trading volumes, exchange data. **MCP servers:** + - CoinMarketCap MCP (`szcharlesji/coinmarketcap`) - CoinGecko official MCP server @@ -289,6 +293,7 @@ **What it does:** Browse buckets, read/write objects, generate presigned URLs, run SQL queries against S3 Tables, CSV-to-table conversion. **MCP servers:** + - Official AWS: S3 Tables MCP Server (`awslabs/mcp/servers/s3-tables-mcp-server`) - `txn2/mcp-s3` -- S3 and S3-compatible storage, multi-account support - `gangadharrr/aws-s3-mcp` -- full bucket and object management @@ -383,6 +388,7 @@ **What it does:** Post tweets, search content, manage drafts, publish threads. **MCP servers:** + - `EnesCinr/twitter-mcp` (375 stars, TypeScript, MIT) -- most popular, posting and searching - `vidhupv/x-mcp` (61 stars, Python, MIT) -- draft management and thread publishing - 8+ competing servers (most fragmented category) @@ -450,6 +456,7 @@ **What it does:** Email delivery, marketing campaigns, contact list management, template management, deliverability monitoring. **MCP servers:** + - `garethcurl/sendgrid-mcp` -- open source, Flask/Python, stats and template management - MCPBundles SendGrid -- 20 tools, remote hosted - Multiple community alternatives @@ -497,6 +504,7 @@ **What it does:** 18+ tools: geocoding, reverse geocoding, nearby search, place details, directions, distance matrix, elevation, timezone, weather, air quality, static maps, batch geocoding (50 addresses), route optimization (25 stops), local rank tracking. **MCP servers:** + - Official Anthropic-listed Google Maps MCP (`modelcontextprotocol/google-maps`) - `cablate/mcp-google-map` -- 18 tools, advanced features like route planning and competitor rank tracking @@ -519,6 +527,7 @@ **What it does:** Open geospatial data, geocoding, POI search, SQL queries against OSM data with PostGIS. **MCP servers:** + - `wiseman/osm-mcp` -- PostgreSQL/PostGIS integration, web-based map viewing - `jagan-shanmugam/open-streetmap-mcp` -- geocoding and location services @@ -658,41 +667,41 @@ These integrations have mature, official MCP servers and address the most common agent use cases: -| Extension | Why | -|---|---| -| **Playwright** | Web automation is foundational for any agent system | -| **Stripe** | Payment operations are critical for business agents | -| **S3** | Cloud storage access is a basic infrastructure need | -| **Snyk + SonarQube** | Security scanning is essential for code-focused agents | -| **Google Maps** | Location intelligence enables logistics, real estate, and local business agents | -| **YouTube** | Content analysis and research is high-demand | -| **Home Assistant** | Smart home control is the killer IoT use case | -| **Twitter/X** | Social media management is a top agent use case | +| Extension | Why | +| -------------------- | ------------------------------------------------------------------------------- | +| **Playwright** | Web automation is foundational for any agent system | +| **Stripe** | Payment operations are critical for business agents | +| **S3** | Cloud storage access is a basic infrastructure need | +| **Snyk + SonarQube** | Security scanning is essential for code-focused agents | +| **Google Maps** | Location intelligence enables logistics, real estate, and local business agents | +| **YouTube** | Content analysis and research is high-demand | +| **Home Assistant** | Smart home control is the killer IoT use case | +| **Twitter/X** | Social media management is a top agent use case | ### Tier 2 -- Medium Impact, Low-Medium Effort -| Extension | Why | -|---|---| -| **Browserbase + Stagehand** | Cloud browsers with bot evasion for scraping | -| **DALL-E / Flux** | Image generation for content and design agents | -| **Coinbase** | Crypto operations and agent-to-agent payments | -| **Mapbox** | Advanced geospatial features beyond Google Maps | -| **Bluesky** | Growing decentralized social platform | -| **SendGrid / Resend** | Email operations for marketing and transactional workflows | -| **Spotify** | Music/audio control for personal assistant agents | -| **Stock Market** | Financial analysis and monitoring | -| **Meta-MCP (Magg)** | Self-extending agent capabilities -- deeply aligned with AGH's extensibility model | +| Extension | Why | +| --------------------------- | ---------------------------------------------------------------------------------- | +| **Browserbase + Stagehand** | Cloud browsers with bot evasion for scraping | +| **DALL-E / Flux** | Image generation for content and design agents | +| **Coinbase** | Crypto operations and agent-to-agent payments | +| **Mapbox** | Advanced geospatial features beyond Google Maps | +| **Bluesky** | Growing decentralized social platform | +| **SendGrid / Resend** | Email operations for marketing and transactional workflows | +| **Spotify** | Music/audio control for personal assistant agents | +| **Stock Market** | Financial analysis and monitoring | +| **Meta-MCP (Magg)** | Self-extending agent capabilities -- deeply aligned with AGH's extensibility model | ### Tier 3 -- Niche but Differentiated -| Extension | Why | -|---|---| -| **Ableton Live** | Creative AI for music production | -| **ROS / Isaac Sim** | Robotics control opens entirely new agent domains | -| **3D Printing** | Physical-world manufacturing automation | -| **KiCAD** | Hardware design automation | -| **CRM (Salesforce/HubSpot)** | Enterprise sales automation | -| **Agoragentic** | Agent marketplace enables AGH agents to hire other agents | +| Extension | Why | +| ---------------------------- | --------------------------------------------------------- | +| **Ableton Live** | Creative AI for music production | +| **ROS / Isaac Sim** | Robotics control opens entirely new agent domains | +| **3D Printing** | Physical-world manufacturing automation | +| **KiCAD** | Hardware design automation | +| **CRM (Salesforce/HubSpot)** | Enterprise sales automation | +| **Agoragentic** | Agent marketplace enables AGH agents to hire other agents | --- diff --git a/.compozy/tasks/extensability/_meta.md b/docs/ideas/extensability/_meta.md similarity index 99% rename from .compozy/tasks/extensability/_meta.md rename to docs/ideas/extensability/_meta.md index 424b25d6b..6303e06a8 100644 --- a/.compozy/tasks/extensability/_meta.md +++ b/docs/ideas/extensability/_meta.md @@ -4,6 +4,7 @@ updated_at: 2026-04-12T04:00:24.641275Z --- ## Summary + - Total: 0 - Completed: 0 - Pending: 0 diff --git a/.compozy/tasks/extensability/analysis.md b/docs/ideas/extensability/analysis.md similarity index 52% rename from .compozy/tasks/extensability/analysis.md rename to docs/ideas/extensability/analysis.md index 09e70bcc8..5fa54b40b 100644 --- a/.compozy/tasks/extensability/analysis.md +++ b/docs/ideas/extensability/analysis.md @@ -10,30 +10,30 @@ What AGH already has and what is explicitly delegated to ACP agents: -| Capability | Status | Details | -|---|---|---| -| Session lifecycle state machine | **Exists** | `StateStarting → StateActive → StateStopping → StateStopped` in `internal/session/session.go:23-28` | -| Approval/permissions | **Exists (ACP passthrough)** | `ApproveRequest`, `ResolvePermission()`, `PermissionMode` config. Flows through to ACP agents. Sufficient for now. | -| Health endpoint | **Exists** | `GET /api/observe/health` returns uptime, active sessions, DB sizes, version | -| Context compaction | **ACP agents handle it** | Claude Code, Codex, Gemini CLI do their own compaction. Not AGH's concern. | -| MCP server integration | **By design: delegation** | Skills declare MCP servers in frontmatter → `MCPResolver` collects them → passed to ACP agents at startup via `acp.StartOpts.MCPServers`. AGH is not an MCP client; the agents are. | -| Skills system | **Exists (5-tier precedence)** | `SourceBundled < SourceMarketplace < SourceUser < SourceAdditional < SourceWorkspace`. Registry with workspace cache, provenance verification, content safety checks. | -| Hook system | **Exists (info-only)** | `HookRunner` dispatches subprocess hooks for `on_session_created` and `on_session_stopped`. Cannot block or modify. Env allowlist isolation. | -| Event recording | **Exists** | `internal/observe` with per-session SQLite event stores | -| Memory system | **Exists** | Dual-scope (global + workspace) with dream consolidation in `internal/memory/consolidation` | -| Workspace resolver | **Exists** | Config merge, agent definition resolution, additional dirs, workspace-scoped skills | +| Capability | Status | Details | +| ------------------------------- | ------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Session lifecycle state machine | **Exists** | `StateStarting → StateActive → StateStopping → StateStopped` in `internal/session/session.go:23-28` | +| Approval/permissions | **Exists (ACP passthrough)** | `ApproveRequest`, `ResolvePermission()`, `PermissionMode` config. Flows through to ACP agents. Sufficient for now. | +| Health endpoint | **Exists** | `GET /api/observe/health` returns uptime, active sessions, DB sizes, version | +| Context compaction | **ACP agents handle it** | Claude Code, Codex, Gemini CLI do their own compaction. Not AGH's concern. | +| MCP server integration | **By design: delegation** | Skills declare MCP servers in frontmatter → `MCPResolver` collects them → passed to ACP agents at startup via `acp.StartOpts.MCPServers`. AGH is not an MCP client; the agents are. | +| Skills system | **Exists (5-tier precedence)** | `SourceBundled < SourceMarketplace < SourceUser < SourceAdditional < SourceWorkspace`. Registry with workspace cache, provenance verification, content safety checks. | +| Hook system | **Exists (info-only)** | `HookRunner` dispatches subprocess hooks for `on_session_created` and `on_session_stopped`. Cannot block or modify. Env allowlist isolation. | +| Event recording | **Exists** | `internal/observe` with per-session SQLite event stores | +| Memory system | **Exists** | Dual-scope (global + workspace) with dream consolidation in `internal/memory/consolidation` | +| Workspace resolver | **Exists** | Config merge, agent definition resolution, additional dirs, workspace-scoped skills | ### What's Actually Missing -| Gap | Impact | Evidence | -|---|---|---| -| **No extension architecture** | Users can't create tools, hooks, or integrations without modifying Go source | No `internal/extension` package. No plugin manifest. No subprocess or Wasm extension loading. | -| **No tool registry** | Tools are just `[]string` in `AgentPayload` (`internal/api/contract/contract.go:71`). No schema, no availability gating, no namespacing. | No `Tool` interface or `ToolDriver` type anywhere in codebase. | -| **Hooks can't block/modify** | `HookRunner.RunHooks()` returns `[]HookResult` but nothing reads or acts on results. Only 2 events (`on_session_created`, `on_session_stopped`). | `internal/skills/hooks.go:81-125` — results are captured but never used for decision-making. | -| **No session stop reason** | 4 binary states. No classification of *why* a session stopped (user cancel, crash, timeout, budget, loop). | `SessionInfo` has no `StopReason` field. `finalizeStopped()` records an error event but doesn't classify. | -| **No session repair on resume** | `Resume()` re-resolves workspace/agent but doesn't validate stored state integrity. | `manager_lifecycle.go:186-209` — reads meta, re-resolves, but no consistency checks. | -| **No loop/recursion guard** | Zero depth limiting or cycle detection for tool/agent recursion. | No matches for `depth`, `recursion`, `cycle` in session/acp code. | -| **Skills progressive disclosure** | **Resolved on 2026-04-09**. Skills are now metadata-only by default, with body content loaded explicitly on demand in the registry/API/CLI/web flow. | Implemented via metadata-only `Skill` objects plus explicit content loading endpoint/registry path. | +| Gap | Impact | Evidence | +| --------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------- | +| **No extension architecture** | Users can't create tools, hooks, or integrations without modifying Go source | No `internal/extension` package. No plugin manifest. No subprocess or Wasm extension loading. | +| **No tool registry** | Tools are just `[]string` in `AgentPayload` (`internal/api/contract/contract.go:71`). No schema, no availability gating, no namespacing. | No `Tool` interface or `ToolDriver` type anywhere in codebase. | +| **Hooks can't block/modify** | `HookRunner.RunHooks()` returns `[]HookResult` but nothing reads or acts on results. Only 2 events (`on_session_created`, `on_session_stopped`). | `internal/skills/hooks.go:81-125` — results are captured but never used for decision-making. | +| **No session stop reason** | 4 binary states. No classification of _why_ a session stopped (user cancel, crash, timeout, budget, loop). | `SessionInfo` has no `StopReason` field. `finalizeStopped()` records an error event but doesn't classify. | +| **No session repair on resume** | `Resume()` re-resolves workspace/agent but doesn't validate stored state integrity. | `manager_lifecycle.go:186-209` — reads meta, re-resolves, but no consistency checks. | +| **No loop/recursion guard** | Zero depth limiting or cycle detection for tool/agent recursion. | No matches for `depth`, `recursion`, `cycle` in session/acp code. | +| **Skills progressive disclosure** | **Resolved on 2026-04-09**. Skills are now metadata-only by default, with body content loaded explicitly on demand in the registry/API/CLI/web flow. | Implemented via metadata-only `Skill` objects plus explicit content loading endpoint/registry path. | --- @@ -46,12 +46,13 @@ What AGH already has and what is explicitly delegated to ACP agents: **Current state:** `HookRunner` runs subprocess hooks for 2 events. Results captured but ignored. **Target state:** + - Extend `HookRunner` to support **structured responses**: `{continue: bool, updatedInput?, transformedResult?, reason?}` - Add hook points: `session.pre_create`, `session.pre_prompt`, `event.post_record`, `agent.spawned`, `agent.crashed`, `session.pre_stop` - Support 2 hook executor types initially (3rd added by extension architecture later): - **Subprocess hooks** (existing `HookRunner`, extend with structured output parsing) - **Go-native hooks** (typed function-pointer fields on Manager structs, nil-checked — GoClaw pattern that aligns with AGH's "no event bus" principle) - - *(Future, via extension architecture)* **Wasm hooks** for sandboxed in-process execution + - _(Future, via extension architecture)_ **Wasm hooks** for sandboxed in-process execution - Pre-hooks can **block** (return `continue: false`) or **modify** (return `updatedInput`) - Post-hooks can **transform** (return `transformedResult`) or trigger side effects - Hook ordering: Go-native first (fastest, in-process), then subprocess, short-circuit on any deny @@ -68,15 +69,16 @@ What AGH already has and what is explicitly delegated to ACP agents: **Recommended: Three-tier hybrid** (validated by Terraform, VS Code, Grafana, Neovim, Claude Code patterns): -| Tier | Mechanism | Use Case | Language Support | Performance | -|---|---|---|---|---| -| **L1: Go-native** | Go interfaces compiled in | First-party core functionality | Go only | Fastest (in-process) | -| **L2: WebAssembly** | Extism + wazero (pure Go, zero CGO) | Hooks, validators, transformers | Rust, Go, TS (AssemblyScript), C, 16+ langs | ~1-10us/call | -| **L3: Subprocess** | JSON-RPC over stdio | Agent drivers, memory backends, API extensions, tools | Any language | ~100-500us/call | +| Tier | Mechanism | Use Case | Language Support | Performance | +| ------------------- | ----------------------------------- | ----------------------------------------------------- | ------------------------------------------- | -------------------- | +| **L1: Go-native** | Go interfaces compiled in | First-party core functionality | Go only | Fastest (in-process) | +| **L2: WebAssembly** | Extism + wazero (pure Go, zero CGO) | Hooks, validators, transformers | Rust, Go, TS (AssemblyScript), C, 16+ langs | ~1-10us/call | +| **L3: Subprocess** | JSON-RPC over stdio | Agent drivers, memory backends, API extensions, tools | Any language | ~100-500us/call | **Key insight:** L3 is a generalization of AGH's existing ACP subprocess pattern (`internal/acp`). The extension protocol reuses the same launch-binary, JSON-RPC-over-stdio, graceful-shutdown lifecycle. **Deliverables:** + - `internal/extension` package: `Manager`, `Registry`, manifest loading - Extension manifest format (`extension.toml`) - Subprocess extension lifecycle (reuses `internal/acp` patterns) @@ -97,6 +99,7 @@ What AGH already has and what is explicitly delegated to ACP agents: **Current state:** `AgentPayload.Tools` is `[]string`. No schema, no central registry, no availability checking. **Target state:** + ```go type ToolDriver interface { Name() string @@ -129,6 +132,7 @@ type ToolDriver interface { **Current state:** 4 states: `Starting → Active → Stopping → Stopped`. No reason tracking. **Target state:** + ```go type StopReason string const ( @@ -162,6 +166,7 @@ const ( **Current state:** `Resume()` reads `SessionMeta`, re-resolves workspace/agent, starts agent. No validation. **Target state:** + - **Workspace validation:** Check `resolvedWorkspace.RootDir` still exists and is accessible - **Agent validation:** Verify agent definition still present in config. If renamed/removed, return descriptive error - **State consistency:** If meta says `active` but process is dead, transition to `stopped` with `StopReason = "agent_crashed"` @@ -181,6 +186,7 @@ const ( **Current state:** Zero protection. No depth limiting, no cycle detection. **Target state:** + - **Iteration budget:** Configurable per-agent `max_iterations` (default: 200). Tracked in session. When exceeded → stop with `StopReason = "max_iterations"`. - **Cycle detection:** SHA256 fingerprint of last N tool-call sequences. If pattern repeats K times → stop with `StopReason = "loop_detected"`. Configurable sensitivity. - **Delegation depth:** If AGH ever supports agent-to-agent delegation (Phase 3), enforce `MAX_DEPTH` (default: 5) via context propagation. @@ -201,6 +207,7 @@ const ( **Previous state:** `ParseSkillFile()` read complete content into `Skill.Content`, and API responses included full content. **Implemented state:** + - `skills.Skill` is metadata-only; skill bodies are no longer retained on the loaded registry object. - Registry content loading is explicit via `Registry.LoadContent(...)`, covering filesystem and bundled skills. - Skill list/detail API responses are metadata-only. @@ -233,13 +240,13 @@ const ( **Parallelism:** P0 (hooks) and P3-P5 (session resilience) can start simultaneously — zero cross-dependencies. P6 (skill disclosure) can also be done at any time. -| Techspec | Covers | Dependencies | Size Estimate | -|---|---|---|---| -| `techspec-lifecycle-hooks` | Hook taxonomy, structured protocol, Go-native executors, blocking/modification, subprocess output parsing | None | Medium | -| `techspec-extension-architecture` | Three-tier model, manifest, Manager, Registry, subprocess protocol, Wasm runtime, TypeScript SDK design | Lifecycle hooks | Large | -| `techspec-tool-registry` | ToolDriver interface, registry, namespacing, availability gating, toolset composition, hybrid search | Lifecycle hooks, Extension architecture | Medium | -| `techspec-session-resilience` | Stop reasons, session repair, loop/recursion guard, iteration budgets | None | Medium | -| `techspec-skill-progressive-disclosure` | Lazy content loading, API changes, context injection changes | None | Implemented on 2026-04-09 | +| Techspec | Covers | Dependencies | Size Estimate | +| --------------------------------------- | --------------------------------------------------------------------------------------------------------- | --------------------------------------- | ------------------------- | +| `techspec-lifecycle-hooks` | Hook taxonomy, structured protocol, Go-native executors, blocking/modification, subprocess output parsing | None | Medium | +| `techspec-extension-architecture` | Three-tier model, manifest, Manager, Registry, subprocess protocol, Wasm runtime, TypeScript SDK design | Lifecycle hooks | Large | +| `techspec-tool-registry` | ToolDriver interface, registry, namespacing, availability gating, toolset composition, hybrid search | Lifecycle hooks, Extension architecture | Medium | +| `techspec-session-resilience` | Stop reasons, session repair, loop/recursion guard, iteration budgets | None | Medium | +| `techspec-skill-progressive-disclosure` | Lazy content loading, API changes, context injection changes | None | Implemented on 2026-04-09 | --- @@ -247,30 +254,32 @@ const ( These are validated as valuable by the analysis but explicitly deferred per project priorities: -| Feature | Rationale for Deferral | -|---|---| +| Feature | Rationale for Deferral | +| ------------------------------------------- | -------------------------------------------------------------------------------------------------- | | Permission cascade (beyond ACP passthrough) | Current approval mode works. More sophisticated cascade when multi-user or enterprise needs arise. | -| Budget enforcement (token/cost limits) | Important but not blocking. Can layer in after observe tracks usage. | -| FTS5 cross-session search | Powerful for recall but not needed for core extensibility. Phase 2 memory work. | -| Static/dynamic prompt split | Optimization. ACP agents manage their own prompts. | -| Cron scheduler + event triggers | Extension candidate once extension architecture exists. Not core. | -| Channel adapters | Extension once extension architecture exists. Define interface later. | -| Agent-to-agent networking (A2A) | Phase 3. Define `AgentPeer` interface later. | -| Knowledge graph memory backend | Extension on top of memory system. | -| Workflow engine | Extension composing session primitives. | -| Extension marketplace/registry | After extension ecosystem grows enough to need discovery. | +| Budget enforcement (token/cost limits) | Important but not blocking. Can layer in after observe tracks usage. | +| FTS5 cross-session search | Powerful for recall but not needed for core extensibility. Phase 2 memory work. | +| Static/dynamic prompt split | Optimization. ACP agents manage their own prompts. | +| Cron scheduler + event triggers | Extension candidate once extension architecture exists. Not core. | +| Channel adapters | Extension once extension architecture exists. Define interface later. | +| Agent-to-agent networking (A2A) | Phase 3. Define `AgentPeer` interface later. | +| Knowledge graph memory backend | Extension on top of memory system. | +| Workflow engine | Extension composing session primitives. | +| Extension marketplace/registry | After extension ecosystem grows enough to need discovery. | --- ## 5. Key Architectural Decisions (From Research) ### Why JSON-RPC stdio (not gRPC) for subprocess extensions + - AGH already uses JSON-RPC stdio for ACP — same pattern, same code paths - No protobuf toolchain requirement — lower barrier for non-Go extension authors - Aligned with MCP/LSP ecosystem convergence - HashiCorp go-plugin (gRPC) is a good reference but heavier than needed ### Why Wasm via Extism + wazero (not Go native plugins) + - Go native plugins: no Windows, CGO required, no unloading, no security isolation, exact build-env matching. Every major Go project has rejected them. - wazero: **pure Go, zero CGO, zero dependencies**. Fits single-binary constraint perfectly. - Extism: high-level SDK with 16+ host languages and 7+ guest PDKs @@ -279,11 +288,13 @@ These are validated as valuable by the analysis but explicitly deferred per proj - Single portable `.wasm` file distribution — no platform-specific builds ### Why both Wasm AND subprocess (not one or the other) + - **Wasm** for fast-path synchronous operations: hooks, validators, transformers (<1ms latency, sandboxed) - **Subprocess** for rich stateful extensions: agent drivers, memory backends, API routes (full system access, any language) - Different use cases, different trade-offs. Forcing everything into one model over-constrains either power or safety. ### Why TypeScript as the first non-Go language + - Largest developer community for AI/agent tooling - `@agh/extension-sdk` (npm) for subprocess extensions — natural for Node.js developers - AssemblyScript for Wasm hooks — TypeScript-like syntax that compiles to Wasm @@ -295,19 +306,19 @@ These are validated as valuable by the analysis but explicitly deferred per proj These patterns appeared in 4+ of the 6 analyzed frameworks, confirming they are **not framework-specific opinions but industry convergence**: -| Pattern | Frameworks | AGH Status | -|---|---|---| -| Uniform tool interface with JSON Schema | All 6 | Missing | -| Skills as markdown with YAML frontmatter | Claude Code, GoClaw, Hermes, Pi-Mono, OpenClaw | Exists | -| 5-tier skill precedence (workspace > user > bundled) | Claude Code, GoClaw, OpenClaw, Pi-Mono | Exists | -| Lifecycle hooks at named points | All 6 | Exists (limited) | -| Hooks can block/modify (not just observe) | Claude Code, Pi-Mono, OpenClaw, Hermes | Missing | -| Approval flow for dangerous operations | Claude Code, Hermes, OpenClaw, OpenFang | Exists (ACP) | -| MCP tool integration | Claude Code, GoClaw, Hermes, OpenFang, OpenClaw | Exists (delegation) | -| Manifest-first plugin discovery | OpenClaw, Pi-Mono, OpenFang | Missing | -| Session compaction/context management | All 6 | Delegated to ACP | -| Non-blocking fan-out for events | Claude Code, GoClaw, OpenClaw | Exists (notifier) | -| Subprocess environment isolation for hooks | Claude Code, GoClaw, OpenFang | Exists (`hookEnvAllowlist`) | -| Tool namespacing to prevent collisions | Claude Code, GoClaw, OpenFang | Missing | -| Progressive disclosure (lazy skill loading) | Claude Code, Pi-Mono, GoClaw | Implemented on 2026-04-09 | -| Health reporting per subsystem | GoClaw, OpenClaw, OpenFang | Partial | +| Pattern | Frameworks | AGH Status | +| ---------------------------------------------------- | ----------------------------------------------- | --------------------------- | +| Uniform tool interface with JSON Schema | All 6 | Missing | +| Skills as markdown with YAML frontmatter | Claude Code, GoClaw, Hermes, Pi-Mono, OpenClaw | Exists | +| 5-tier skill precedence (workspace > user > bundled) | Claude Code, GoClaw, OpenClaw, Pi-Mono | Exists | +| Lifecycle hooks at named points | All 6 | Exists (limited) | +| Hooks can block/modify (not just observe) | Claude Code, Pi-Mono, OpenClaw, Hermes | Missing | +| Approval flow for dangerous operations | Claude Code, Hermes, OpenClaw, OpenFang | Exists (ACP) | +| MCP tool integration | Claude Code, GoClaw, Hermes, OpenFang, OpenClaw | Exists (delegation) | +| Manifest-first plugin discovery | OpenClaw, Pi-Mono, OpenFang | Missing | +| Session compaction/context management | All 6 | Delegated to ACP | +| Non-blocking fan-out for events | Claude Code, GoClaw, OpenClaw | Exists (notifier) | +| Subprocess environment isolation for hooks | Claude Code, GoClaw, OpenFang | Exists (`hookEnvAllowlist`) | +| Tool namespacing to prevent collisions | Claude Code, GoClaw, OpenFang | Missing | +| Progressive disclosure (lazy skill loading) | Claude Code, Pi-Mono, GoClaw | Implemented on 2026-04-09 | +| Health reporting per subsystem | GoClaw, OpenClaw, OpenFang | Partial | diff --git a/docs/ideas/extensability/analysis/analysis_claude_code.md b/docs/ideas/extensability/analysis/analysis_claude_code.md new file mode 100644 index 000000000..5f4673118 --- /dev/null +++ b/docs/ideas/extensability/analysis/analysis_claude_code.md @@ -0,0 +1,239 @@ +# Claude Code Harness Analysis for AGH Extensibility + +## Overview + +Claude Code is a TypeScript agentic CLI that bridges natural-language intent to code and shell operations. Its architecture is a seven-layer stack: Entry Points, Bootstrap/Configuration, Setup, UI Layer, QueryEngine (async-generator core loop), Tool System (50+ tools), and Services/State. The harness manages a disciplined cycle of "send message -> stream response -> execute tools -> loop" with everything else -- the TUI, the service container, the permission engine -- existing to feed that loop. + +This analysis identifies the key features, architectural patterns, and capabilities from Claude Code and classifies each as either **CORE** (essential for any agent OS minimal core) or **EXTENSION** (should be a plugin/extension on top of the core) for AGH's architecture. + +AGH's philosophy is a robust minimal core with a highly extensible plugin system. The classification below applies that lens: features that every ACP-compatible agent session needs regardless of agent type belong in core; features that are domain-specific, agent-specific, or can be composed from core primitives belong as extensions. + +## Key Features Analysis + +### Foundational Architecture + +| Feature | Classification | Rationale | +| --------------------------------------------------------------------------------------------------------------------- | -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| **Core Query/Agent Loop** (async turn cycle: normalize -> call model -> execute tools -> loop) | **CORE** | This is the beating heart of any agent OS. AGH's session package already owns session lifecycle; the turn-based execution loop with tool dispatch is the irreducible minimum for running any ACP agent. Every agent type needs this cycle. | +| **Tool Interface Contract** (uniform schema: identity, input schema, execution, permissions, concurrency metadata) | **CORE** | A uniform tool abstraction is what makes the system extensible without changing the core loop. AGH needs a `ToolDriver` interface (like `AgentDriver`) that all tools implement. The contract must include: name, input validation, execution, read-only/concurrency-safe flags, and permission check. | +| **Tool Registry with Dynamic Loading** | **CORE** | The registry that maps tool names to implementations and supports runtime registration (for MCP tools, plugin tools) is core infrastructure. Without it, extensibility requires recompilation. | +| **Tool Execution Pipeline** (validate -> permission check -> pre-hooks -> execute -> post-hooks -> result truncation) | **CORE** | The ordered pipeline through which every tool call passes is the single enforcement point for safety, validation, and extensibility. This is not optional -- it is how the core guarantees invariants for any extension. | +| **Tool Partitioning** (concurrent reads, serial writes) | **CORE** | Smart concurrency based on `isReadOnly()` and `isConcurrencySafe()` flags is a significant performance optimization that belongs in the core orchestrator. It halves wall-clock latency for read-heavy tool batches and prevents write races. | +| **Message Normalization** (role alternation, tool result hoisting, thinking block rules) | **CORE** | Every ACP provider will have message format requirements. The normalization layer that transforms internal state to provider-compatible format is essential infrastructure that sits between the session store and the model call. | +| **Streaming Response Handling** (async generator yielding events to observers) | **CORE** | AGH already has SSE for the web UI and the notifier pattern for fan-out. The streaming pipeline from model to observers is core -- it is how the daemon surfaces real-time events to all consumers (web UI, CLI, hooks). | +| **Result Truncation / Large Output Handling** (persist to disk, send preview + path) | **CORE** | Preventing a single tool result from consuming the entire context window is a safety invariant. The core should enforce per-tool `maxResultSize` and handle overflow to disk automatically. | + +### Permission and Security Model + +| Feature | Classification | Rationale | +| ------------------------------------------------------------------------------------------------------------------------- | -------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| **Multi-Level Permission Rule Cascade** (policy > user > project > local > session) | **CORE** | The layered permission system where higher-level sources cannot be overridden by lower ones is essential for any production agent OS, especially one targeting enterprise deployment. AGH's config package already does TOML merge -- extending it to permission rules with cascade semantics is a natural fit. | +| **Permission Decision Waterfall** (deny rules -> tool-specific check -> mode -> allow rules -> classifier -> user prompt) | **CORE** | The ordered evaluation that short-circuits on definitive answers is the enforcement mechanism. Without it, every tool either runs unchecked or requires manual approval. The waterfall structure belongs in core; specific classifiers can be extensions. | +| **Permission Modes** (default, plan, auto, acceptEdits, bypass) | **CORE** | Modes define baseline strictness and are critical for both interactive and automated use. `bypass` mode enables CI/automation; `plan` mode enables safe exploration; `auto` mode reduces prompt fatigue. These are fundamental operational modes, not domain-specific extensions. | +| **Plan Mode as Hard Constraint** (restricts tools to read-only, requires approval to escalate) | **CORE** | Plan mode enforced at the tool layer (not as a suggestion to the model) is a key safety pattern. The core must support tool-scope restriction based on session mode. | +| **Multi-Resolver Race Pattern** (parallel permission resolvers with first-safe-answer-wins) | **EXTENSION** | The sophisticated `createResolveOnce` pattern with parallel resolvers (user click, hook classifier, bridge UI) is an optimization. The core needs a permission resolution interface; the parallel race with multiple resolver types is an advanced capability. | +| **LLM-Based Safety Classifier** (transcript classifier for auto-approve) | **EXTENSION** | Using a separate LLM to classify tool safety is powerful but expensive and model-dependent. The core should define a `PermissionClassifier` interface; the LLM-based implementation is an extension. | +| **Iron Gate** (hardcoded categorical restrictions that no classifier can bypass) | **CORE** | Certain actions must be categorically refused regardless of any classifier, user setting, or mode. A small set of hardcoded deny rules that cannot be overridden is a safety floor that belongs in core. | +| **Permission Explanation** (LLM-generated risk assessment for user prompts) | **EXTENSION** | Natural-language command risk explanation is a UX enhancement that uses side-queries. Not essential for the minimal core. | + +### Hook System (Lifecycle Extensibility) + +| Feature | Classification | Rationale | +| --------------------------------------------------------------------------------------------------------------- | -------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| **Lifecycle Event Bus** (25+ events: PreToolUse, PostToolUse, SessionStart, SessionEnd, UserPromptSubmit, etc.) | **CORE** | The hook event taxonomy is the primary extensibility surface. AGH's notifier pattern is already a typed interface for fan-out -- extending it with a formal lifecycle event bus (pre/post tool, session lifecycle, prompt submission) is core infrastructure that enables all extensions. | +| **Hook Output Protocol** (structured JSON: continue/block, updatedInput, additionalContext, transformedResult) | **CORE** | The protocol by which hooks communicate decisions back to the core loop is the contract. Without a structured protocol, hooks are fire-and-forget side effects. With it, hooks can block, modify, and transform -- making them load-bearing extensibility points. | +| **Hook Types** (command, prompt, agent, http, function) | **EXTENSION** | The five execution engines for hooks are implementations of the hook contract. The core needs to define the `HookExecutor` interface and ship a basic `command` executor. `prompt`, `agent`, `http`, and `function` types are extensions that plug into the same interface. | +| **Hook Matcher Syntax** (regex/glob filters for event-specific keys like tool names) | **CORE** | Matchers determine which hooks fire for which events. A simple but expressive matching syntax (exact name, pipe-separated, glob) is core because it determines hook specificity. | +| **PreToolUse Blocking and Modification** (hooks can block execution or rewrite tool inputs) | **CORE** | The ability for pre-execution hooks to block or modify is essential for verification gates, policy enforcement, and input sanitization. This is not a nice-to-have -- it is how organizations enforce coding standards, security policies, and workflow rules. | +| **PostToolUse Result Transformation** (hooks can redact or augment tool results) | **CORE** | Result transformation enables secret redaction, output enrichment, and audit logging. This is a security-critical capability that belongs in core. | +| **Enterprise Hook Enforcement** (MDM-managed hooks that users cannot remove) | **EXTENSION** | MDM enforcement is an enterprise deployment concern. The core should support hook source precedence; the MDM-specific enforcement is an enterprise extension. | + +### Memory and Session Persistence + +| Feature | Classification | Rationale | +| -------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| **Event-Sourced Session Store** (append-only event log per session) | **CORE** | AGH already has this via `sessiondb` (per-session SQLite event store). This is the foundation of session persistence and replay. | +| **Session Resume / Replay** (restore conversation state from persisted events) | **CORE** | The ability to resume a session from persisted state is fundamental for the daemon model. AGH's transcript package already handles replay message assembly -- this belongs in core. | +| **Tiered Memory Architecture** (conversation history, session memory, instruction files, cross-session auto-memory, team memory) | **CORE (framework) / EXTENSION (implementations)** | The framework for tiered memory (AGH's `memory` package with dual-scope global + workspace) is core. The specific implementations -- auto-extraction subagent, Sonnet-based semantic recall, team memory sync -- are extensions that plug into the memory framework. | +| **Persistent Instruction File** (CLAUDE.md / project-level config loaded every session) | **CORE** | AGH's config package handles TOML loading. A mechanism for per-workspace instruction files that agents receive in their system prompt is core infrastructure. | +| **Background Memory Extraction** (forked subagent extracting facts after each turn) | **EXTENSION** | The extraction subagent is a specific implementation of the memory write path. The core needs a `MemoryWriter` interface; the LLM-based extraction is an extension (and AGH already has `dream consolidation` as its analog). | +| **AutoDream / Memory Consolidation** (periodic background merge, dedup, prune) | **EXTENSION** | AGH already has this in `internal/memory/consolidation`. The consolidation runtime is an extension that uses the core memory and session interfaces. The core provides the scheduling, locking, and memory access primitives. | +| **Semantic Recall** (LLM side-query to select relevant memories per turn) | **EXTENSION** | Using a separate model as a relevance filter is a specific recall strategy. The core defines a `MemoryRecaller` interface; LLM-based semantic recall is one implementation. | +| **Session Memory Summary** (structured summary maintained during conversation for compaction) | **EXTENSION** | Session memory as a pre-built summary for fast compaction is a specific optimization strategy. The core provides the compaction trigger; the SM-Compact strategy is an extension. | + +### Context Management + +| Feature | Classification | Rationale | +| ----------------------------------------------------------------------------------------------------------------------- | --------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| **Token Counting** (hybrid API + heuristic estimation) | **CORE** | Context budget awareness is essential for any long-running agent session. The core needs a token estimation facility -- even a rough heuristic -- to know when compaction or truncation is needed. | +| **Context Compaction Cascade** (5-layer: tool result budget -> snip -> microcompact -> context collapse -> autocompact) | **CORE (framework) / EXTENSION (strategies)** | The framework that runs compaction strategies in order of increasing cost/loss is core. The specific strategies (snip, microcompact, SM-compact, full conversation compaction) are extensions that register with the framework. The core provides: threshold detection, strategy ordering, circuit breaker, post-compact cleanup. | +| **Static/Dynamic System Prompt Split** (cached prefix + per-request dynamic tail) | **CORE** | Splitting the system prompt into a cacheable static portion and a per-request dynamic portion is a cost and latency optimization that benefits every session. This belongs in the core prompt-building pipeline. | +| **Tool Result Budget** (per-tool maxResultSize with overflow to disk) | **CORE** | Already classified above under Tool Execution Pipeline. | +| **Circuit Breaker** (halt compaction after N consecutive failures) | **CORE** | Preventing infinite retry on compaction failure is a safety mechanism. The core compaction framework should include a circuit breaker. | + +### Plugin and Skills System + +| Feature | Classification | Rationale | +| ---------------------------------------------------------------------------------------------------------- | -------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| **Three-Layer Plugin Reconciliation** (intent in settings, materialization on disk, activation at runtime) | **CORE** | The separation of what-the-user-wants from what-is-installed from what-is-active makes the plugin system robust to partial failures. AGH's skills package should adopt this pattern -- it is the foundation of reliable extension management. | +| **Plugin Lifecycle Operations** (install, uninstall, enable, disable, update) | **CORE** | The CRUD operations for plugins, including the enable/disable vs install/uninstall distinction, are core plugin management. | +| **Marketplace Discovery** | **EXTENSION** | The marketplace UI, browsing, and discovery pipeline are value-added features on top of the core plugin lifecycle. The core needs a plugin registry and loader; marketplace is an extension. | +| **Skills as Markdown Procedures** (SKILL.md with frontmatter for activation, not code) | **CORE** | The concept of skills as prompt-and-procedure pairs (not compiled code) is a key design insight. Skills occupy zero token budget at rest (only metadata visible until activated). AGH already has a skills catalog -- the SKILL.md contract with `description`, `when_to_use`, and `allowed-tools` frontmatter is the right abstraction for the core. | +| **Progressive Disclosure** (skill content materializes into context only when activated) | **CORE** | This is not just a nice optimization -- it is what makes a large skill library practical. The core skill loader must support lazy materialization based on activation, not eager loading. | +| **Skill Improvement** (background process watches for user corrections and proposes skill updates) | **EXTENSION** | Automatic skill refinement based on session corrections is an advanced feature. | +| **Agent Definitions** (markdown files declaring subagent identity, tools, prompts) | **CORE** | Agent definitions are how AGH will support heterogeneous agent types. The markdown-with-frontmatter format for declaring agent capabilities, tool scopes, and system prompts belongs in core. | +| **Plugin-Provided Hooks** | **CORE** | Plugins must be able to register hooks. This is a natural intersection of the plugin and hook systems. | +| **Plugin Policy Enforcement** (allowlist/blocklist per organization) | **EXTENSION** | Enterprise-grade plugin policy enforcement is an enterprise extension. | + +### MCP Integration + +| Feature | Classification | Rationale | +| ----------------------------------------------------------------------------------------- | --------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| **MCP Host Implementation** (connect to multiple servers, expose tools/resources/prompts) | **CORE** | AGH is designed as an ACP-based system. MCP is the standard protocol for agent-to-tool communication. Acting as an MCP host that can connect to external MCP servers and expose their tools identically to built-in tools is core infrastructure. | +| **Tool Namespacing** (`mcp____` convention) | **CORE** | When multiple servers can expose same-named tools, namespacing is essential for unambiguous dispatch. This is a core registry concern. | +| **Transport Abstraction** (stdio, SSE, HTTP, WebSocket, in-process) | **CORE (interface) / EXTENSION (transports)** | The transport interface is core. AGH should ship with `stdio` (most common for local tools) and `SSE/HTTP` (for remote). WebSocket and in-process transports are extensions. | +| **Session Recovery** (auto-reconnect on session expiry, 401 handling) | **CORE** | MCP sessions are stateful and servers restart. Transparent reconnection is essential for reliability in long-running daemon sessions. | +| **Output Size Management** (truncate large MCP results, persist to disk) | **CORE** | Already covered under result truncation -- applies uniformly to MCP and built-in tools. | +| **OAuth Flow for Remote MCP Servers** | **EXTENSION** | Browser-based OAuth for remote MCP servers is a specific authentication pattern. The core needs an MCP auth interface; OAuth is one implementation. | +| **MCP Server Approval Dialog** (user must approve new servers before connection) | **CORE** | Security boundary: preventing a malicious workspace config from silently launching subprocesses. The core must gate MCP server activation on explicit approval. | + +### Agent Swarm and Subagents + +| Feature | Classification | Rationale | +| ------------------------------------------------------------------------------------------------------- | ----------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| **Subagent Spawning** (fork a new agent loop with its own context, tools, and system prompt) | **CORE** | AGH's ACP layer already spawns agents as subprocesses. The ability to spawn subagent sessions -- whether as forked loops, separate processes, or separate ACP instances -- is core to an agent OS. | +| **Three Execution Models** (fork/cache-shared, teammate/process-isolated, worktree/filesystem-isolated) | **CORE (interface) / EXTENSION (models)** | The core needs a `SubagentExecutionModel` interface. The fork model (cache-shared, same process) is core for efficiency. Teammate (separate process with mailbox) and worktree (git isolation) are extensions. | +| **Tool Scope Restriction per Agent** (subagents get a filtered tool set) | **CORE** | Different agents need different capabilities. The ability to filter the tool registry per-agent based on definitions or mode is core. | +| **File-Based Mailbox** (inter-agent communication via filesystem) | **EXTENSION** | The specific IPC mechanism (filesystem mailbox vs UDS vs channels) is an implementation choice. AGH already has UDS for CLI IPC -- agent-to-agent communication can use the same mechanism. The mailbox pattern is an extension. | +| **Plan Approval Flow** (teammate requests leader approval before escalating to act mode) | **CORE** | The two-phase commit where a subagent must get approval before gaining destructive capabilities is a safety pattern. The core needs a mechanism for capability escalation requests between agents. | +| **Shared Task List** (TodoV2: create, update, list tasks across agents) | **EXTENSION** | Task coordination across agents is a specific orchestration pattern. The core provides session state and messaging; shared task management is an extension. | +| **Swarm UI** (terminal spinners, progress tracking for multiple concurrent agents) | **EXTENSION** | The visualization of multi-agent activity is a UI concern. The core emits events; the UI renders them. | +| **Agent Memory Snapshots** (persist/restore agent knowledge across sessions and worktrees) | **EXTENSION** | Memory snapshotting for subagent continuity is an advanced persistence feature. | + +### Settings System + +| Feature | Classification | Rationale | +| --------------------------------------------------------------------------------------------- | -------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| **Hierarchical Settings Cascade** (policy > flag > local > project > user) | **CORE** | AGH's config package already handles TOML loading and merge. Extending it with a formal precedence hierarchy that supports enterprise policy overrides is core infrastructure. | +| **Schema Validation** (Zod-style validation with per-rule error isolation) | **CORE** | Validating configuration against a schema and isolating individual rule errors (so one bad rule does not invalidate the file) is essential for reliability. AGH should use Go struct tags + validation, with the same per-rule isolation principle. | +| **Hot-Reload** (file watcher with stability windows, internal-write suppression) | **CORE** | Settings changes should take effect without daemon restart. The daemon model makes this especially important -- the daemon is long-lived and needs to react to config changes. | +| **MDM / Enterprise Policy Enforcement** (OS-level managed settings that cannot be overridden) | **EXTENSION** | Enterprise MDM integration is a deployment concern. The core supports the precedence hierarchy; MDM-specific readers (plist, registry) are extensions. | +| **Environment Variable Injection** (settings-driven env vars for tool subprocesses) | **CORE** | Tools that spawn subprocesses need controllable environment. The settings system injecting env vars from config is a core feature for corporate proxy, custom paths, and similar concerns. | + +### Observability and Diagnostics + +| Feature | Classification | Rationale | +| --------------------------------------------------------------------------------------------- | -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| **Event Recording** (structured event logging for every tool call, turn, and lifecycle event) | **CORE** | AGH's `observe` package already handles event recording and health metrics. This is core. | +| **Diagnostic Command** (single entry point showing installation health, conflicts, warnings) | **CORE** | A `doctor` equivalent that surfaces configuration errors, agent definition issues, permission conflicts, and health status is essential for operations. | +| **Telemetry Pipeline** (fan-out to multiple sinks) | **EXTENSION** | The specific telemetry sinks (Datadog, analytics collectors) are deployment-specific. The core provides structured events; telemetry export is an extension. | +| **Feature Flags** | **EXTENSION** | Remote feature flag evaluation is an operational concern, not a core requirement. The core can use build tags and config toggles. | +| **Auto-Update** | **EXTENSION** | Self-update mechanisms are distribution-specific and not part of the agent OS core. | +| **PII Redaction** (regex-based credential scrubbing before any data leaves the machine) | **CORE** | Any system that persists or transmits agent transcripts must scrub credentials. The redaction pipeline belongs in core. | + +### Remote and Bridge System + +| Feature | Classification | Rationale | +| ----------------------------------------------------------------------------------------- | -------------- | ----------------------------------------------------------------------------------------------------------------------------------------- | +| **Remote Session Control** (local CLI controllable from web UI via authenticated channel) | **EXTENSION** | Remote control is a specific deployment mode. The core exposes HTTP/SSE and UDS APIs; remote bridge is an extension that uses those APIs. | +| **Session Teleport** (move active session between environments) | **EXTENSION** | Session migration across environments is an advanced operational feature. | +| **SSH Tunnel Integration** | **EXTENSION** | SSH-based remote access is a specific transport. | + +### UI and Rendering + +| Feature | Classification | Rationale | +| -------------------------------------------------------------------------------- | -------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- | +| **Terminal UI** (React/Ink TUI with permission dialogs, progress, streaming) | **EXTENSION** | AGH uses a web SPA (React 19, Vite, TanStack). The specific UI technology is an extension concern. The core provides HTTP/SSE APIs that any UI consumes. | +| **Permission Dialog UX** (tool-specific approval UI with "always allow" options) | **EXTENSION** | The specific UI for permission requests is a frontend concern. The core provides the permission decision API. | + +## Architectural Patterns Worth Adopting + +### 1. Uniform Tool Interface as Core Abstraction + +**Pattern**: Every capability the agent can invoke -- filesystem, shell, web, MCP, custom -- implements the same interface with uniform schema, permissions, and execution semantics. + +**Why AGH should adopt this**: AGH's ACP layer handles agent spawning, but tool execution within agent sessions needs the same uniformity. Define a `ToolDriver` interface in Go: + +```go +type ToolDriver interface { + Name() string + InputSchema() Schema + IsReadOnly() bool + IsConcurrencySafe() bool + CheckPermissions(ctx context.Context, input any) PermissionDecision + Call(ctx context.Context, input any) (ToolResult, error) +} +``` + +This becomes the extension point for all tool implementations, including MCP-proxied tools. + +### 2. Lifecycle Hook Bus with Structured Protocol + +**Pattern**: A typed event bus with 25+ lifecycle events, where hooks can block, modify, or transform operations via a structured JSON output protocol. + +**Why AGH should adopt this**: AGH's notifier pattern is already a typed fan-out interface. Extending it to a formal hook bus with: + +- Pre/post execution events for tool calls +- Session lifecycle events (start, end, resume) +- Permission decision events +- Context management events (pre/post compact) + +The structured output protocol (`continue`, `stopReason`, `updatedInput`, `transformedResult`) is what transforms hooks from passive observers into active participants. This is the pattern that turns AGH from a product into a platform. + +### 3. Three-Layer Extension Reconciliation + +**Pattern**: Separate intent (what the user configured), materialization (what is installed on disk), and activation (what is live in the runtime). + +**Why AGH should adopt this**: AGH's skills catalog should adopt this exact pattern. A skill can be configured but not installed, installed but disabled, or active and running. Each layer reconciles independently, making the system robust to partial failures (corrupted skill file does not crash the daemon). + +### 4. Progressive Disclosure for Skills/Capabilities + +**Pattern**: Skills and agent definitions declare short metadata (name, description, when_to_use) that stays in context permanently. The full content materializes only when activated. + +**Why AGH should adopt this**: With dozens or hundreds of skills, eager loading would blow the context budget. AGH's skill loader should present only metadata to the agent until activation, keeping the per-turn token cost constant regardless of skill library size. + +### 5. Permission Cascade with Short-Circuit Evaluation + +**Pattern**: A waterfall of permission checks ordered from most-restrictive to most-permissive, where each stage can short-circuit with a definitive answer. + +**Why AGH should adopt this**: AGH needs a permission system for tool execution. The waterfall pattern (deny rules first, then tool-specific logic, then mode check, then allow rules, then user prompt) is the right structure because it guarantees that deny rules are always enforced and safe operations auto-approve without user interaction. + +### 6. Smart Concurrency via Tool Metadata + +**Pattern**: Partition tool calls by `isReadOnly()` and `isConcurrencySafe()` -- run reads in parallel, writes serially. + +**Why AGH should adopt this**: AGH manages agent sessions that invoke tools. When an agent requests multiple tool calls in one turn, the daemon should partition them using the same metadata flags. This is a low-effort, high-impact optimization. + +### 7. Static/Dynamic Prompt Split for Cache Efficiency + +**Pattern**: Split system prompts into a rarely-changing static prefix (cacheable) and a per-request dynamic tail. + +**Why AGH should adopt this**: The system prompt for ACP agents includes tool schemas, role instructions, and coding conventions (static) plus environment info, git status, and memory (dynamic). Splitting these lets the API cache the expensive static portion. + +## Extension System Insights + +### What Makes Claude Code's Extension Model Work + +1. **Small, stable core interfaces**: The `Tool` interface, the `HookJSONOutput` protocol, and the `SKILL.md` contract are small and stable. Extensions implement them without needing to understand the rest of the codebase. + +2. **Extensions cannot violate core invariants**: The permission waterfall, the tool execution pipeline, and the hook lifecycle all run in the core. Extensions plug into these pipelines -- they do not bypass them. A malicious plugin cannot skip the permission check because the check happens in the core pipeline, not in the plugin. + +3. **Extensions are declared, not coded (where possible)**: Skills are Markdown files. Agent definitions are Markdown files. Hook configurations are JSON in settings. Permission rules are strings in settings. This low-code approach to extensions makes the system accessible to non-developers and auditable by security teams. + +4. **Progressive complexity**: Simple extensions (a permission rule, a command hook) require zero code. Medium extensions (a skill with a procedure) require Markdown. Complex extensions (a plugin with MCP servers, tools, and hooks) require a manifest and code. The system supports all three levels without forcing everyone to the most complex level. + +5. **Fail-safe degradation**: Missing plugins do not crash the daemon. Failed hooks return errors but do not block the pipeline (unless they explicitly return `continue: false`). Unreadable memory files silently degrade to no-memory. The core is designed to keep running even when extensions fail. + +### Recommendations for AGH's Extension System + +1. **Define the extension contract in Go interfaces, not in plugin APIs**: AGH's extensions should implement Go interfaces (`ToolDriver`, `HookExecutor`, `MemoryRecaller`, `PermissionClassifier`). The daemon loads extensions that fulfill these interfaces. This is Go-native and avoids the complexity of a plugin framework. + +2. **Support declarative extensions via TOML/YAML/Markdown**: Not every extension needs compiled code. Skills (Markdown), permission rules (TOML config), hook commands (shell commands in TOML), and agent definitions (Markdown frontmatter) should all work without compilation. + +3. **Use the notifier pattern for the hook bus**: AGH's existing notifier pattern is the right foundation. Extend it with typed lifecycle events and the structured output protocol so hooks can participate in decisions, not just observe them. + +4. **Make MCP a first-class citizen**: Since AGH speaks ACP, it should also speak MCP for tool access. MCP tools should be indistinguishable from built-in tools in the tool registry, permission system, and hook pipeline. This is what makes the tool ecosystem open-ended. + +5. **Ship a minimal set of bundled tools and let everything else be extensions**: The core should ship with: file read, file write, file edit, shell execution, glob, grep, and MCP bridge. Everything else -- web fetch, web search, notebook editing, remote triggers -- should be extensions that demonstrate the tool interface. + +6. **Invest in the permission system early**: Claude Code's permission model is its most mature subsystem and arguably its most important. AGH should build the permission cascade and the hook bus before building advanced features, because every advanced feature depends on them. diff --git a/.compozy/tasks/extensability/analysis/analysis_cross_cutting.md b/docs/ideas/extensability/analysis/analysis_cross_cutting.md similarity index 76% rename from .compozy/tasks/extensability/analysis/analysis_cross_cutting.md rename to docs/ideas/extensability/analysis/analysis_cross_cutting.md index 7a1515ce6..3c54c2d50 100644 --- a/.compozy/tasks/extensability/analysis/analysis_cross_cutting.md +++ b/docs/ideas/extensability/analysis/analysis_cross_cutting.md @@ -12,9 +12,10 @@ These patterns appear in every agent framework surveyed and represent the irredu Every agent system, from the simplest ReAct implementation to full multi-agent orchestrations, relies on a tight cycle: receive input, construct prompt, call model, parse response, dispatch tool calls, append results, repeat. This loop is the execution primitive on which everything else is built. -AGH already implements this via its ACP subprocess model (spawn agent, JSON-RPC over stdio, event persistence). The loop itself must remain core. However, the loop's *policy* -- how many iterations are allowed, what happens on tool errors, when to compact context -- should be configurable per-session via the TOML config layer. +AGH already implements this via its ACP subprocess model (spawn agent, JSON-RPC over stdio, event persistence). The loop itself must remain core. However, the loop's _policy_ -- how many iterations are allowed, what happens on tool errors, when to compact context -- should be configurable per-session via the TOML config layer. **What specifically must be core:** + - The turn cycle (prompt construction -> model call -> response parsing -> tool dispatch -> observation append) - Step budgets and hard termination limits - Error classification (transient vs. permanent) and basic retry @@ -25,9 +26,10 @@ AGH already implements this via its ACP subprocess model (spawn agent, JSON-RPC Every framework implements a session/task state machine. A2A defines: submitted -> working -> input-required -> completed/failed/canceled. LangGraph has checkpointed graph state. CrewAI tracks task status per crew member. The state machine is universal because agents are inherently stateful processes. -AGH's `internal/session` package already owns this. The state machine must be core, but the *set of states* should be extensible. The base states (created, running, paused, completed, failed) are universal. Extensions should be able to register custom states (e.g., "awaiting-human-approval", "delegated-to-peer") without modifying the session package. +AGH's `internal/session` package already owns this. The state machine must be core, but the _set of states_ should be extensible. The base states (created, running, paused, completed, failed) are universal. Extensions should be able to register custom states (e.g., "awaiting-human-approval", "delegated-to-peer") without modifying the session package. **What specifically must be core:** + - State transitions with validation (no illegal jumps) - Session creation, suspension, resumption, termination - Event persistence per session (the `sessiondb` pattern AGH already has) @@ -38,6 +40,7 @@ AGH's `internal/session` package already owns this. The state machine must be co Tool dispatch is the action primitive. Every agent loop terminates in either a tool call or a final answer. The dispatch mechanism -- validate the call against a schema, execute it, capture the result -- is universal. JSON Schema for tool definitions has converged as the industry standard (MCP, OpenAI function calling, Anthropic tool use all use it). **What specifically must be core:** + - A tool registry that holds (name, description, JSON Schema, handler) tuples - Schema validation of tool call arguments before dispatch - Execution with timeout, result capture, and error wrapping @@ -45,6 +48,7 @@ Tool dispatch is the action primitive. Every agent loop terminates in either a t - Tool call audit logging (every invocation recorded) **What should NOT be core:** + - Specific tool implementations (file read, web search, etc.) -- these are extensions - Tool discovery from external sources -- this is a protocol concern (MCP) @@ -53,6 +57,7 @@ Tool dispatch is the action primitive. Every agent loop terminates in either a t Every production harness implements layered permissions: tool-level (which tools exist), path-level (allowed directories), command-level (banned operations), and approval gates (human confirmation for writes). The OWASP LLM Top 10 ranks "Excessive Agency" (LLM06) as a critical risk. Permission enforcement belongs in the core because it is the last defense against prompt injection escalation. **What specifically must be core:** + - Tool-level allow/deny lists - Approval gates that pause execution for external confirmation - Per-session capability scoping (an agent gets only the tools it needs) @@ -63,6 +68,7 @@ Every production harness implements layered permissions: tool-level (which tools Every framework requires tracing. OpenTelemetry GenAI semantic conventions are converging as the standard. AGH's `internal/observe` package already handles event recording. This must be core because debugging non-deterministic agent behavior is impossible without traces. **What specifically must be core:** + - Per-step event recording (tool calls, model calls, state transitions) - Token usage tracking per call and per session - Latency measurement per operation @@ -74,6 +80,7 @@ Every framework requires tracing. OpenTelemetry GenAI semantic conventions are c Context engineering is the highest-leverage skill in building production agents. The pipeline that assembles the model's context window -- system prompt + project docs + session state + tool results + user message -- is universal. The ACE framework (Agentic Context Engineering) formalizes this as selection -> formatting -> timing -> lifecycle. **What specifically must be core:** + - Layered context assembly (system prompt layer, memory layer, tool results layer, user message layer) - Token budget tracking and enforcement per layer - Compaction triggers (when occupancy exceeds threshold, compress older turns) @@ -97,6 +104,7 @@ Different agent runtimes (Claude Code, Codex, Gemini CLI, custom agents) each ha The knowledge base analysis reveals a universal pattern: pluggable memory backends behind a common interface. Mem0 (vector + graph), Zep (temporal KG), Letta (self-editing blocks), Redis (warm tier), pgvector (cold tier), SQLite+FTS5 (local), and file-based markdown wikis all serve as memory backends. AGH's `internal/memory` package should define the interface; backends are extensions. **Extension interface:** + ```go type MemoryBackend interface { Store(ctx context.Context, entry MemoryEntry) (string, error) @@ -109,10 +117,12 @@ type MemoryBackend interface { **Why extension, not core:** The diversity of backends (vector stores, knowledge graphs, file-based wikis, cloud services) is enormous and growing. Each makes different trade-offs (latency vs durability, semantic search vs keyword search, graph traversal vs flat retrieval). The consolidation algorithm (dream triggers, importance-weighted pruning, hierarchical compression) also varies by deployment. **Backends AGH should ship:** + - SQLite+FTS5 (local default, already aligned with AGH's SQLite architecture) - File-based markdown (for the Karpathy pattern / CLAUDE.md approach) **Backends that should be external plugins:** + - Vector store integration (Chroma, Qdrant, pgvector) - Knowledge graph (Zep/Graphiti, Neo4j) - Cloud memory services (Mem0, OpenMemory) @@ -122,6 +132,7 @@ type MemoryBackend interface { Tools are the most natural extension point. MCP has proven that tools can be exposed as independent servers with JSON-RPC + JSON Schema. AGH should treat every tool as a provider that registers with the core tool registry. **Extension types:** + - **Built-in tools:** File operations, shell execution, basic search -- compiled into the binary - **MCP servers:** External processes exposing tools via MCP protocol - **Plugin tools:** Dynamically loaded tool implementations (Go plugins or subprocess) @@ -132,7 +143,7 @@ Tools are the most natural extension point. MCP has proven that tools can be exp Agent Architecture Patterns reveals at least seven orchestration patterns: ReAct, Plan-and-Execute, Orchestrator-Workers, Evaluator-Optimizer, Routing, Parallelization, and Reflection. Each is a different policy for how the agentic loop operates at the multi-step level. -**Why extension, not core:** The basic loop is core. The strategy that governs *how* the loop runs (single agent vs. orchestrated multi-agent, sequential vs. parallel, with or without replanning) varies by task type. AGH should provide a simple default (single ReAct loop) and allow orchestration strategies to be plugged in. +**Why extension, not core:** The basic loop is core. The strategy that governs _how_ the loop runs (single agent vs. orchestrated multi-agent, sequential vs. parallel, with or without replanning) varies by task type. AGH should provide a simple default (single ReAct loop) and allow orchestration strategies to be plugged in. **Extension interface pattern:** An orchestration strategy receives a task description and produces a plan (sequence of steps, potentially with parallelism). The core loop executes each step. The strategy can observe results and replan. @@ -153,13 +164,14 @@ How events and results reach the outside world (HTTP/SSE for web UI, UDS for CLI ## Protocol Layer Recommendations -Protocols are standardized interfaces that sit between core and extensions. They define *how* communication happens without specifying *what* is communicated. AGH should implement protocol support in core and let extensions implement specific protocol endpoints. +Protocols are standardized interfaces that sit between core and extensions. They define _how_ communication happens without specifying _what_ is communicated. AGH should implement protocol support in core and let extensions implement specific protocol endpoints. ### Protocol 1: MCP (Model Context Protocol) -- IMPLEMENT IN CORE MCP is the universal agent-to-tool protocol. Cross-vendor adoption (Anthropic, OpenAI, Google, Microsoft, GitHub) means it is the de facto standard. AGH should be an MCP host that can connect to any MCP server. **Core responsibilities:** + - MCP client implementation (JSON-RPC 2.0 over stdio and SSE/Streamable HTTP) - Connection lifecycle management (initialize, capability exchange, operation, shutdown) - Tool schema discovery from MCP servers and registration in the core tool registry @@ -173,6 +185,7 @@ MCP is the universal agent-to-tool protocol. Cross-vendor adoption (Anthropic, O A2A handles inter-agent communication: discovery via Agent Cards, task delegation with lifecycle management, streaming results, and push notifications. AGH should define the interface for agent-to-agent communication in core but implement the actual A2A protocol handler as an extension. **Core interface:** + ```go type AgentPeer interface { Discover(ctx context.Context, query CapabilityQuery) ([]AgentCard, error) @@ -181,13 +194,14 @@ type AgentPeer interface { } ``` -**Why interface-in-core, implementation-as-extension:** A2A is still maturing (v0.3 as of 2026). AGH should not couple its core to a protocol that may evolve significantly. But the *concept* of peer agent communication is universal -- the interface should be stable. +**Why interface-in-core, implementation-as-extension:** A2A is still maturing (v0.3 as of 2026). AGH should not couple its core to a protocol that may evolve significantly. But the _concept_ of peer agent communication is universal -- the interface should be stable. ### Protocol 3: Agent Card / Capability Manifest -- DEFINE IN CORE Every discovery protocol (A2A Agent Cards, AGNTCY, ANP) requires that agents publish a capability manifest. AGH should define its own agent card format that describes what an AGH-managed agent can do, compatible with A2A Agent Card structure. **Core responsibilities:** + - Generate Agent Cards from agent configuration (capabilities, skills, supported input/output modes) - Publish Agent Cards via well-known URI (`.well-known/agent-card.json`) - Parse incoming Agent Cards for peer discovery @@ -197,6 +211,7 @@ Every discovery protocol (A2A Agent Cards, AGNTCY, ANP) requires that agents pub When one agent hands off to another, context must transfer. The analysis of handoff patterns reveals four strategies: full history pass-through, summary, structured snapshot, and schema-typed payload. AGH should define a handoff protocol that supports all four. **Core responsibilities:** + - Handoff primitive (transfer control + context from session A to session B) - Context packing (serialize current session state into a transferable format) - Context unpacking (deserialize received context into a new session's starting state) @@ -206,6 +221,7 @@ When one agent hands off to another, context must transfer. The analysis of hand OpenTelemetry GenAI semantic conventions are the emerging standard for agent observability. AGH's event recording should emit data in OTel-compatible format so traces can flow to Langfuse, Grafana, Datadog, or any OTel-compatible backend. **Core responsibilities:** + - OTel-compatible span emission for model calls, tool calls, and state transitions - Standard attribute naming (`gen_ai.system`, `gen_ai.request.model`, `gen_ai.usage.input_tokens`, etc.) - Trace context propagation across session boundaries and agent delegations @@ -218,24 +234,24 @@ The memory analysis reveals a three-tier architecture, a four-type cognitive tax ### Three-Tier Memory Hierarchy -| Tier | Latency | Contents | AGH Implementation | -|------|---------|----------|-------------------| -| **Hot (in-context)** | 0ms | Current turn, recent tool results, active scratchpad | Managed by the context assembly pipeline in core | -| **Warm (session-scoped)** | <10ms | Conversation history, session state, recent memories | SQLite per-session DB (AGH's existing `sessiondb`) | -| **Cold (persistent)** | 10-100ms | User preferences, project knowledge, cross-session facts | Global memory store via `MemoryBackend` interface | +| Tier | Latency | Contents | AGH Implementation | +| ------------------------- | -------- | -------------------------------------------------------- | -------------------------------------------------- | +| **Hot (in-context)** | 0ms | Current turn, recent tool results, active scratchpad | Managed by the context assembly pipeline in core | +| **Warm (session-scoped)** | <10ms | Conversation history, session state, recent memories | SQLite per-session DB (AGH's existing `sessiondb`) | +| **Cold (persistent)** | 10-100ms | User preferences, project knowledge, cross-session facts | Global memory store via `MemoryBackend` interface | ### Four Memory Types (CoALA Taxonomy) -| Type | What It Stores | AGH Mapping | -|------|---------------|-------------| -| **Working memory** | Current context window contents | Core: context assembly pipeline | -| **Episodic memory** | Specific past events with timestamps | Extension: event store + recall queries | -| **Semantic memory** | General facts and knowledge | Extension: knowledge graph or vector store | -| **Procedural memory** | Reusable skills and workflows | Extension: skills package + bundled skills | +| Type | What It Stores | AGH Mapping | +| --------------------- | ------------------------------------ | ------------------------------------------ | +| **Working memory** | Current context window contents | Core: context assembly pipeline | +| **Episodic memory** | Specific past events with timestamps | Extension: event store + recall queries | +| **Semantic memory** | General facts and knowledge | Extension: knowledge graph or vector store | +| **Procedural memory** | Reusable skills and workflows | Extension: skills package + bundled skills | ### Memory Consolidation as a Core Concern -The "dream" consolidation pattern (AGH's `internal/memory/consolidation`) is correctly placed. Consolidation -- the process of extracting high-value facts from raw session transcripts, merging duplicates, resolving contradictions, and pruning stale entries -- is a cross-cutting concern that every memory backend benefits from. The consolidation *trigger* (when to run) and *pipeline* (extract -> merge -> prune -> store) should be core. The specific *algorithm* (LLM-based summarization, importance-weighted pruning, hierarchical compression) should be configurable. +The "dream" consolidation pattern (AGH's `internal/memory/consolidation`) is correctly placed. Consolidation -- the process of extracting high-value facts from raw session transcripts, merging duplicates, resolving contradictions, and pruning stale entries -- is a cross-cutting concern that every memory backend benefits from. The consolidation _trigger_ (when to run) and _pipeline_ (extract -> merge -> prune -> store) should be core. The specific _algorithm_ (LLM-based summarization, importance-weighted pruning, hierarchical compression) should be configurable. ### Dual-Scope Memory (Global + Workspace) @@ -295,29 +311,29 @@ AGH's architecture already follows this principle (small interfaces, dependency ## Summary Decision Matrix -| Concept | Classification | Rationale | -|---------|---------------|-----------| -| Agentic loop (turn cycle) | **CORE** | Universal execution primitive | -| Session state machine | **CORE** | Every agent needs lifecycle management | -| Tool dispatch + schema registry | **CORE** | Universal action primitive | -| Permission model | **CORE** | Security is non-negotiable | -| Event recording + observability | **CORE** | Debugging non-deterministic behavior requires traces | -| Context assembly pipeline | **CORE** | Highest-leverage quality factor | -| Token budget management | **CORE** | Cost control and context rot prevention | -| Basic context compaction | **CORE** | Required for any session > 10 turns | -| Memory consolidation triggers | **CORE** | Cross-cutting concern for all memory backends | -| Agent drivers (Claude, Codex, etc.) | **EXTENSION** | Unbounded set, each independent | -| Memory backends (SQLite, vector, KG) | **EXTENSION** | Diverse trade-offs per deployment | -| Tool implementations | **EXTENSION** | Unbounded catalog | -| Orchestration strategies | **EXTENSION** | Policy varies by task type | -| Skill packages | **EXTENSION** | Domain-specific content | -| Notification channels | **EXTENSION** | Output format varies by consumer | -| MCP client (host) | **PROTOCOL (core)** | Industry-standard tool integration | -| A2A agent-to-agent | **PROTOCOL (interface in core, impl as extension)** | Still maturing; interface is stable | -| Agent Card / capability manifest | **PROTOCOL (core)** | Self-description is always needed | -| Context transfer / handoff | **PROTOCOL (core)** | Required for any multi-agent scenario | -| OTel observability format | **PROTOCOL (core)** | Cross-vendor tracing standard | -| Agent discovery / registry | **PROTOCOL (deferred)** | Phase 3 concern | -| DID / Verifiable Credentials | **PROTOCOL (deferred)** | Phase 3 concern | -| Payment protocols | **PROTOCOL (deferred)** | Phase 3+ concern | -| Multi-agent memory consistency | **PROTOCOL (deferred)** | Phase 3 concern; design interface now | +| Concept | Classification | Rationale | +| ------------------------------------ | --------------------------------------------------- | ---------------------------------------------------- | +| Agentic loop (turn cycle) | **CORE** | Universal execution primitive | +| Session state machine | **CORE** | Every agent needs lifecycle management | +| Tool dispatch + schema registry | **CORE** | Universal action primitive | +| Permission model | **CORE** | Security is non-negotiable | +| Event recording + observability | **CORE** | Debugging non-deterministic behavior requires traces | +| Context assembly pipeline | **CORE** | Highest-leverage quality factor | +| Token budget management | **CORE** | Cost control and context rot prevention | +| Basic context compaction | **CORE** | Required for any session > 10 turns | +| Memory consolidation triggers | **CORE** | Cross-cutting concern for all memory backends | +| Agent drivers (Claude, Codex, etc.) | **EXTENSION** | Unbounded set, each independent | +| Memory backends (SQLite, vector, KG) | **EXTENSION** | Diverse trade-offs per deployment | +| Tool implementations | **EXTENSION** | Unbounded catalog | +| Orchestration strategies | **EXTENSION** | Policy varies by task type | +| Skill packages | **EXTENSION** | Domain-specific content | +| Notification channels | **EXTENSION** | Output format varies by consumer | +| MCP client (host) | **PROTOCOL (core)** | Industry-standard tool integration | +| A2A agent-to-agent | **PROTOCOL (interface in core, impl as extension)** | Still maturing; interface is stable | +| Agent Card / capability manifest | **PROTOCOL (core)** | Self-description is always needed | +| Context transfer / handoff | **PROTOCOL (core)** | Required for any multi-agent scenario | +| OTel observability format | **PROTOCOL (core)** | Cross-vendor tracing standard | +| Agent discovery / registry | **PROTOCOL (deferred)** | Phase 3 concern | +| DID / Verifiable Credentials | **PROTOCOL (deferred)** | Phase 3 concern | +| Payment protocols | **PROTOCOL (deferred)** | Phase 3+ concern | +| Multi-agent memory consistency | **PROTOCOL (deferred)** | Phase 3 concern; design interface now | diff --git a/docs/ideas/extensability/analysis/analysis_goclaw.md b/docs/ideas/extensability/analysis/analysis_goclaw.md new file mode 100644 index 000000000..bd6f5e621 --- /dev/null +++ b/docs/ideas/extensability/analysis/analysis_goclaw.md @@ -0,0 +1,348 @@ +# GoClaw Analysis for AGH Extensibility + +## Overview + +GoClaw is a multi-tenant AI agent gateway written in Go 1.26 that routes end-user messages through a think-act-observe agent loop, executes tools against pluggable LLM providers, and streams responses back into multiple messaging channels (Telegram, Feishu, Zalo, Discord, WhatsApp). It is structured as a single control-plane binary with a six-layer stack: Client, Gateway, Agent Execution, Provider Bridge, Storage, and Shared Infrastructure. + +**Key differentiators from AGH:** + +- GoClaw is a multi-tenant SaaS gateway; AGH is a single-user local daemon +- GoClaw owns the LLM provider bridge (direct API calls); AGH spawns ACP-compatible agents as subprocesses +- GoClaw uses PostgreSQL + pgvector; AGH uses SQLite +- GoClaw has a central event bus (`bus.MessageBus`); AGH uses a typed Notifier pattern with direct function calls +- GoClaw runs its own agent loop in-process; AGH delegates execution to external agent processes (Claude Code, Codex, etc.) + +**What makes GoClaw especially relevant:** Both are Go single-binary systems. GoClaw has solved many extensibility problems (dynamic tools, channel adapters, hook systems, MCP bridging, skills discovery) that AGH will need as it grows through Phase 2 (Memory/Skills/State) and Phase 3 (Agent network protocol). + +--- + +## Key Features Analysis + +| Feature | GoClaw Implementation | Classification for AGH | Rationale | +| ------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| **Think-Act-Observe Loop** | `internal/agent/loop.go` -- in-process LLM call + tool execution cycle with parallel tool dispatch, iteration limits, budget guards | **N/A (different model)** | AGH delegates execution to external agents via ACP/JSON-RPC over stdio. AGH does not own the agent loop -- the spawned agent (Claude Code, etc.) does. However, the iteration/budget guard patterns are worth borrowing for session-level cost control. | +| **Tool Registry** | `tools.Registry` -- unified `Tool` interface (`Name/Schema/Invoke`) for built-in, dynamic, and MCP-sourced tools | **CORE** | AGH already has a tools concept through ACP. A unified tool registry interface that normalizes tools regardless of source (built-in, MCP, dynamic) should be core infrastructure. | +| **MessageBus (Event Bus)** | `internal/bus/bus.go` -- buffered channels (1000-slot), inbound/outbound message routing, event broadcasting with per-subscriber filtering | **CORE (limited)** | AGH explicitly rejects a generic event bus in its architecture principles ("no event bus, no NATS"). AGH uses a typed Notifier pattern instead. However, the _specific patterns_ from GoClaw's bus -- deduplication helpers, debounce helpers, non-blocking publish with drop-on-full -- are worth adopting as utilities within AGH's existing Notifier. | +| **Channel Adapter System** | `Channel` interface (`Start/Stop/Send/Health`) with 5 implementations (Telegram, Discord, etc.), `ChannelManager` orchestrator, `RunContext` for streaming state | **EXTENSION** | AGH's primary interfaces are HTTP/SSE (web UI) and UDS (CLI). Messaging platform adapters are clearly extension territory -- they add reach without changing core behavior. The `Channel` interface pattern is excellent for plugin design. | +| **Hook System (Loop-Level)** | Typed function-pointer fields on `Loop` struct (`EnsureUserFilesFunc`, `SeedUserFilesFunc`, `ContextFileLoaderFunc`, `BootstrapCleanupFunc`) with nil-check invocation | **CORE** | AGH should adopt this pattern for session lifecycle hooks. Function-pointer fields are compile-time safe, zero-reflection, and fit AGH's "direct function calls through interfaces" principle. Perfect for hooks like `OnSessionStart`, `OnSessionEnd`, `OnEventRecorded`, `OnMemoryConsolidation`. | +| **Hook System (Handler-Level)** | Pre/post hooks on RPC method handlers (`preValidate`, `postTurn`) | **CORE** | Handler-level hooks for the API layer (HTTP/UDS) are core infrastructure. AGH's `api/httpapi` and `api/udsapi` should support pre/post hooks for audit, analytics, and custom validation. | +| **Dynamic/Custom Tools** | `DynamicTool` wrapping `CustomToolDef` -- shell command templates with `{{.key}}` substitution, per-tool timeouts, encrypted env vars | **EXTENSION** | Shell-command-based tools are an extension mechanism, not core. AGH should provide a `DynamicTool` plugin point but not bake shell execution into the core. The template rendering and shell escaping patterns are reusable. | +| **MCP Bridge** | `mcp.Manager` with connection pooling, three transports (stdio/SSE/streamable-HTTP), tool namespacing (`mcp__{server}__{tool}`), hybrid search mode (40-tool threshold + BM25 lazy loading), per-agent/user access grants | **CORE** | AGH already spawns ACP agents via stdio -- MCP bridge is a natural extension of the same pattern. Tool namespacing, connection management, health monitoring, and hybrid search mode should be core infrastructure since MCP is becoming the standard interop protocol. | +| **Hybrid Tool Search** | BM25 search over deferred tools when tool count > 40, with `mcp_tool_search` and `mcp_tool_activate` meta-tools | **CORE** | Critical for scaling. As AGH accumulates tools from multiple MCP servers, the context budget pressure becomes real. The search-then-activate pattern should be core. | +| **Memory (Vector Embeddings)** | pgvector-backed semantic search, configurable chunking with overlap, cosine similarity retrieval, top-K injection into system prompt | **CORE** | AGH already has `internal/memory` with dual-scope persistent memory. GoClaw's chunking strategy (configurable chunk size + overlap), dedup-by-hash, and the integration pattern (search at run-start, inject into context) validate AGH's approach. | +| **Knowledge Graph** | LLM-based entity/relation extraction, PostgreSQL storage, BFS path finding, fuzzy entity dedup | **EXTENSION** | Knowledge graphs are expensive to build and maintain (require LLM calls for extraction). This is a Phase 2+ extension that sits on top of the memory system. AGH should provide the interface but not bundle the implementation. | +| **Skills System** | Document-based skills (`SKILL.md` with YAML frontmatter), five-tier loader hierarchy, BM25 + pgvector hybrid discovery, hot-reload, agent self-evolution | **CORE (loader + discovery) / EXTENSION (self-evolution)** | AGH already has `internal/skills` with catalog and loader. GoClaw validates the search-then-load pattern and the separation of skills (procedural knowledge) from tools (executable capabilities). The loader hierarchy and discovery engine are core. Self-evolution (nudges at 70%/90% budget) is an extension. | +| **LLM Provider Bridge** | `Provider` interface (`Chat/ChatStream/DefaultModel/Name`), 5 implementations, `providers.Registry`, provider-specific workarounds (thinking passback, token clamping, synthetic streaming) | **N/A (different model)** | AGH does not own the LLM call -- it delegates to ACP agents. However, the `Registry` pattern (lazy map + RWMutex, O(1) lookup) and the encrypted credential storage pattern are directly applicable to AGH's agent/driver management. | +| **Agent Teams and Delegation** | Subagents (self-cloned goroutines), delegation (permission-gated inter-agent handoffs via AgentLinks), team coordination (Kanban task boards with Lead/Member roles) | **EXTENSION** | Multi-agent coordination is Phase 3 territory. AGH should define the interfaces (delegate, handoff, team) but implement them as extensions. The subagent pattern (spawn a background goroutine running the same loop) maps to AGH spawning additional ACP sessions. | +| **Cron and Scheduling** | In-process scheduler with three modes (`cron`/`at`/`every`), JSON file persistence, exponential backoff retry, 200-entry ring buffer log | **EXTENSION** | Scheduling is an extension. AGH could expose a `Scheduler` interface in core but the cron implementation should be a plugin. GoClaw's pattern of dispatching scheduled jobs through the same agent loop (as synthetic `RunRequest`) is elegant and worth copying. | +| **Heartbeat System** | Per-agent periodic self-check with `HEARTBEAT.md` checklist, `HEARTBEAT_OK` suppression, stagger offset, active-hours window | **EXTENSION** | Specialized scheduling for agent self-monitoring. Extension built on top of the scheduler interface. The `HEARTBEAT_OK` suppression pattern is clever for silent monitoring. | +| **Context Files and Agent Identity** | `SOUL.md`, `IDENTITY.md`, `USER.md`, `BOOTSTRAP.md` -- virtual filesystem interceptor routes agent file reads/writes to DB, per-user vs shared scoping | **EXTENSION** | AGH already has workspace management. The context file interception pattern (virtual FS layer that redirects specific filenames to a different backend) is interesting but heavy. AGH's simpler approach of injecting context through the ACP protocol is more appropriate for its architecture. | +| **Shell Execution Security** | Four-gate pipeline: deny patterns, credentialed binary detection, approval flow, sandbox routing. Output scrubbing with `ScrubCredentials`. Docker sandbox with `--read-only --cap-drop ALL --network none`. | **EXTENSION** | AGH delegates execution to external agents, so shell security is the agent's responsibility. However, if AGH adds dynamic tool execution, the deny-pattern and credential-scrubbing patterns should be borrowed. | +| **Text-to-Speech** | `tts.Manager` with 4 provider backends (OpenAI, ElevenLabs, Edge, MiniMax), AutoMode triggers, `TtsTool` for agent-initiated synthesis | **EXTENSION** | Clearly an extension. No impact on AGH's core. | +| **RBAC and Security** | 5-layer permission cascade (role hierarchy, API key scopes, global tool policy, per-agent tool policy, owner-only tools), AES-256-GCM encryption at rest, input guard (detection-only) | **CORE (partial)** | AGH needs authentication and authorization for its HTTP/UDS APIs. The role hierarchy pattern, API key hashing (SHA-256), and encrypted credential storage are core. The full 5-layer cascade is overkill for a single-user daemon but the patterns are sound for when AGH supports multiple users. | +| **Audit Logging** | Append-only `audit_logs` table, structured `slog` output, tenant-scoped, queryable via API | **CORE** | AGH already has `internal/observe` for event recording. Audit logging of security-relevant actions (config changes, session management) should be core. | +| **Rate Limiting** | Per-IP/per-token token-bucket rate limiter at the gateway | **EXTENSION** | Single-user daemon does not need rate limiting. Extension for when AGH supports remote access. | +| **Multi-Tenant PostgreSQL** | `context.Context` propagation of `tenant_id`, RLS on all tables, encrypted columns for secrets | **N/A** | AGH is local-first, single-tenant. The context propagation pattern is good Go practice but multi-tenancy is out of scope. | +| **OpenAI-Compatible API** | `POST /v1/chat/completions` drop-in replacement for OpenAI clients | **EXTENSION** | Useful for interoperability but not core to AGH's mission. Could be a thin extension layer over AGH's HTTP API. | +| **WebSocket v3 Protocol** | Frame-based protocol with `RequestFrame`/`ResponseFrame`/`EventFrame`, method router, per-client write channels | **N/A** | AGH uses HTTP/SSE + UDS, not WebSocket RPC. The event frame pattern and per-client write channel pattern are already addressed by AGH's SSE implementation. | +| **Inbound Debounce** | Per-chat-ID debounce timer (500ms) to consolidate rapid user messages | **CORE** | Debouncing is essential for AGH's HTTP/SSE interface. When a user types rapidly in the web UI, debouncing prevents N session runs. Should be a small utility in core. | +| **Message Dedup** | Content-hash dedup with 5-second window to prevent duplicate processing on reconnects | **CORE** | Important for AGH's SSE reconnection scenarios. A small utility. | +| **Connection Health Monitoring** | Per-channel/per-MCP-server health checks with status tracking, reconnection with exponential backoff | **CORE** | AGH spawns subprocesses -- monitoring their health, detecting crashes, and reconnecting is core infrastructure. The health check pattern with `ChannelHealth` struct and the exponential backoff retry are directly applicable. | + +--- + +## Architectural Patterns Worth Adopting + +### 1. Registry Pattern with Lazy Loading and TTL Cache + +GoClaw's `agent.Router` is a lazy-loading cache keyed by agent ID with a 10-minute TTL: + +```go +type Router struct { + agents map[string]*agentEntry + mu sync.RWMutex + resolver ResolverFunc // lazy-create from DB + ttl time.Duration +} +``` + +**Applicability to AGH:** AGH's `session.Manager` could adopt this pattern for agent driver caching. When AGH spawns an ACP agent, the driver instance could be cached and reused across sessions for the same agent type, with TTL-based eviction for config changes. + +**Classification: CORE pattern** -- fits AGH's existing `session/` package. + +### 2. Typed Function-Pointer Hooks (Not Event Bus) + +GoClaw uses function-pointer fields on structs for lifecycle hooks: + +```go +type Loop struct { + ensureUserProfile EnsureUserProfileFunc + seedUserFiles SeedUserFilesFunc + loadContextFiles ContextFileLoaderFunc +} +``` + +Nil-check before invocation makes hooks optional. No reflection, no event bus, compile-time type safety. + +**Applicability to AGH:** This is exactly aligned with AGH's "direct function calls through interfaces" and "no event bus" principles. AGH's `session.Manager`, `observe.Recorder`, and `memory.Manager` should expose typed hook fields for extension points like: + +- `OnSessionCreated func(ctx, session) error` +- `OnEventRecorded func(ctx, event) error` +- `OnConsolidationComplete func(ctx, results) error` + +**Classification: CORE pattern** -- directly implements AGH's architectural principles. + +### 3. Tool Interface Unification + +GoClaw normalizes all tools (built-in, dynamic shell, MCP-sourced) behind a single interface: + +```go +type Tool interface { + Name() string + Schema() json.RawMessage + Invoke(ctx context.Context, args map[string]any) (string, error) +} +``` + +The agent loop does not know where a tool came from. This is achieved through wrapper types like `BridgeTool` for MCP and `DynamicTool` for shell commands. + +**Applicability to AGH:** AGH communicates tools to agents via ACP protocol, but it still needs to manage tool registries for MCP bridging, skill-provided tools, and dynamic tools. A unified `Tool` interface in AGH would normalize these sources before exposing them to ACP agents. + +**Classification: CORE pattern** -- essential for Phase 2 extensibility. + +### 4. Parallel Execution with Deterministic Ordering + +GoClaw dispatches tool calls in parallel but sorts results back to original order: + +```go +for i, tc := range toolCalls { + go func(tc, idx int) { + result := executor.Invoke(ctx, tc) + resultsChan <- indexedResult{idx: idx, result: result} + }(tc, i) +} +sort.Slice(results, func(i, j int) bool { + return results[i].idx < results[j].idx +}) +``` + +**Applicability to AGH:** Useful when AGH needs to execute multiple MCP tool calls or process multiple events concurrently. The `indexedResult` pattern preserves ordering cheaply. + +**Classification: CORE utility** -- small helper in `internal/procutil` or similar. + +### 5. Non-Blocking Publish with Drop-on-Full + +GoClaw's `TryPublishInbound()` is a non-blocking variant that drops messages when the buffer is full rather than blocking producers: + +```go +select { +case bus.inbound <- msg: + return true +default: + slog.Warn("inbound buffer full, message dropped") + return false +} +``` + +**Applicability to AGH:** AGH's Notifier pattern should support this for SSE event delivery. A slow web client should not back-pressure the session execution. AGH's SSE helpers in `api/core` could adopt this. + +**Classification: CORE pattern** -- protects core from slow consumers. + +### 6. Stagger Offset for Periodic Tasks + +GoClaw uses MD5 hash of agent ID to deterministically spread periodic tasks across a time window, preventing thundering herd: + +```go +func StaggerOffset(agentID string) time.Duration { + hash := md5.Sum([]byte(agentID)) + offset := binary.BigEndian.Uint32(hash[:4]) % 30 + return time.Duration(offset) * time.Second +} +``` + +**Applicability to AGH:** Useful for AGH's dream consolidation triggers when multiple workspaces need consolidation around the same time. + +**Classification: CORE utility** -- small helper for scheduling. + +### 7. Context Propagation Over Global State + +GoClaw propagates tenant ID through `context.Context` rather than a global singleton: + +```go +func WithTenantID(ctx context.Context, id uuid.UUID) context.Context { + return context.WithValue(ctx, ctxKeyTenantID, id) +} +``` + +**Applicability to AGH:** AGH already uses `context.Context` as first argument everywhere. This validates the approach. AGH should consider propagating session ID, workspace ID, and request ID through context for observability. + +**Classification: CORE pattern** -- already partially adopted. + +--- + +## Extension System Insights + +### Dynamic Tools: Shell-Command Extension Point + +GoClaw's `DynamicTool` is the most accessible extension mechanism -- operators define tools as shell command templates stored in the database: + +``` +Command: "curl -s {{.url}} | jq '.results[]'" +Parameters: {"url": {"type": "string"}} +TimeoutSeconds: 30 +``` + +**Insight for AGH:** AGH should provide a similar mechanism where users can define tools via TOML config that get exposed to ACP agents through the protocol. The key security patterns to borrow: + +- Shell escaping via single-quote wrapping +- Per-tool configurable timeouts with process-group kill +- Encrypted environment variables for credential injection +- Output scrubbing with both static patterns (API key regexes) and dynamic patterns (injected credential values) + +**Recommendation:** Define a `DynamicToolProvider` extension interface in AGH that can be implemented by a shell-command plugin, an HTTP-webhook plugin, or a WASM plugin. + +### Channel Adapters: The Minimal Interface + +GoClaw's `Channel` interface is remarkably small: + +```go +type Channel interface { + Name() string + Start(ctx context.Context) error + Stop(ctx context.Context) error + Send(ctx context.Context, msg OutboundMessage) error + Health() ChannelHealth +} +``` + +**Insight for AGH:** This is the gold standard for a plugin interface -- four methods, clear lifecycle (`Start`/`Stop`), a single operation (`Send`), and a health probe. AGH should define similarly minimal interfaces for its extension points: + +- `AgentDriver` (already exists in `session/` -- `Start/Stop/SendMessage`) +- `ToolProvider` -- `ListTools/InvokeTool/Health` +- `MemoryBackend` -- `Store/Search/Delete/Health` +- `NotificationSink` -- `Send/Health` + +The `Health()` method returning a struct with `Status`, `LastError`, `LastActivity` is a pattern worth standardizing across all AGH extensions. + +### Hook/Event System: Function Pointers > Event Bus + +GoClaw's hook system uses two complementary patterns: + +1. **Loop-level hooks** -- typed function fields on structs, nil-checked before invocation +2. **Bus-level broadcasting** -- buffered channels with subscriber filtering + +**Insight for AGH:** AGH's explicit rejection of event buses is correct for its scope. The function-pointer hook pattern is the right choice. However, AGH should formalize the hook taxonomy: + +| Lifecycle Point | Hook Signature | Where | +| --------------- | --------------------------------------- | ---------------------- | +| Session created | `func(ctx, *Session) error` | `session.Manager` | +| Session ended | `func(ctx, *Session) error` | `session.Manager` | +| Event recorded | `func(ctx, *Event) error` | `observe.Recorder` | +| Memory stored | `func(ctx, *MemoryEntry) error` | `memory.Manager` | +| Dream triggered | `func(ctx, *DreamRequest) error` | `memory/consolidation` | +| Skill loaded | `func(ctx, *Skill) error` | `skills.Catalog` | +| Agent spawned | `func(ctx, *AgentProcess) error` | `acp.Driver` | +| Agent crashed | `func(ctx, *AgentProcess, error) error` | `acp.Driver` | + +Each hook is a `func` field on the owning struct, set via a `With*` functional option at construction time. Nil hooks are no-ops. + +### MCP Bridge: Connection Pooling and Hybrid Search + +GoClaw's MCP bridge solves three problems AGH will face: + +1. **Connection management** -- pooling server connections across sessions, health monitoring with exponential backoff, cleanup on server crash +2. **Tool namespacing** -- `mcp__{server}__{tool}` prevents collisions when multiple MCP servers expose tools with the same name +3. **Context budget management** -- when tool count > 40, switch to hybrid mode where only the top 40 are inline and the rest are searchable via BM25 + +**Insight for AGH:** AGH already spawns ACP agents via stdio -- the same transport used for MCP stdio servers. The `mcp.Manager` pattern (server state with atomic connected flag, reconnection with backoff, health checks every 30s) maps directly to AGH's `acp.Driver` lifecycle. Key recommendations: + +- Adopt `mcp__{server}__{tool}` namespacing for tool deduplication +- Implement the 40-tool hybrid search threshold -- AGH's ACP agents have finite context windows +- Pool MCP server connections across sessions in the `daemon/` composition root +- Use the `BridgeTool` wrapper pattern to present MCP tools through AGH's native tool interface + +### Skills: Search-Then-Load Pattern + +GoClaw's skills are document-based (`SKILL.md`) rather than code-based. Discovery uses BM25 + pgvector hybrid search. Loading injects the skill content into the agent's context window. + +**Insight for AGH:** AGH already has `internal/skills` with a catalog and loader, plus `internal/skills/bundled` for built-in skills. GoClaw validates that skills should be: + +- Filesystem-based (markdown with YAML frontmatter) +- Discoverable via search (not eagerly loaded) +- Injected as context (not executed as code) +- Hierarchical (workspace > project > global > bundled) + +The self-evolution mechanism (agent creates new skills from execution history) is fascinating but should be an extension -- it requires monitoring agent execution patterns and triggering skill creation, which is complex orchestration that does not belong in AGH's minimal core. + +### Deduplication and Debounce Helpers + +GoClaw provides two small but critical utilities: + +1. **`DedupeHelper`** -- content-hash dedup with configurable time window (5s default) +2. **`InboundDebounceHelper`** -- per-key debounce timer (500ms default) that consolidates rapid inputs + +**Insight for AGH:** These should be standalone utilities in `internal/` (perhaps `internal/rateutil` or alongside `internal/procutil`). They protect AGH from: + +- SSE reconnection storms (dedup) +- Rapid user input in the web UI (debounce) +- Duplicate webhook deliveries from external systems + +Both are small, self-contained, and have zero dependencies -- perfect for AGH's core. + +### Health Monitoring Pattern + +GoClaw standardizes health across all subsystems: + +```go +type ChannelHealth struct { + Status string // "connected" | "connecting" | "disconnected" | "error" + LastError string + LastActivity time.Time + MessageCount int64 +} +``` + +**Insight for AGH:** AGH should define a standard `Health` struct in a shared package and require every subsystem to implement it: + +- ACP agent processes: is the process alive, last event time, error count +- MCP server connections: connected/disconnected, last tool call, reconnect attempts +- SQLite databases: writable, size, last vacuum +- Memory system: consolidation status, entry count + +This feeds directly into AGH's `/health` endpoint and the `observe` package. + +--- + +## Summary: What AGH Should Take from GoClaw + +### Adopt as CORE (build into AGH's minimal robust core) + +1. **Typed function-pointer hooks** on `session.Manager`, `observe.Recorder`, `memory.Manager`, `acp.Driver` +2. **Unified Tool interface** for normalizing MCP tools, built-in tools, and dynamic tools +3. **MCP bridge with connection pooling**, health monitoring, namespacing, and hybrid search +4. **Dedup and debounce helpers** as standalone utilities +5. **Standardized Health struct** across all subsystems +6. **Non-blocking publish with drop-on-full** for SSE event delivery +7. **Parallel execution with deterministic ordering** as a utility +8. **Skills search-then-load pattern** (validates AGH's existing approach) + +### Adopt as EXTENSION (plugin/extension system) + +1. **Channel adapters** -- define the `Channel` interface, let extensions implement Telegram/Discord/etc. +2. **Dynamic shell tools** -- define `DynamicToolProvider`, let a shell plugin implement it +3. **Knowledge graph** -- define the interface, let an extension provide LLM-based extraction +4. **Cron/Scheduling** -- define `Scheduler` interface, let an extension implement it +5. **Agent teams/delegation** -- define coordination interfaces for Phase 3 +6. **TTS** -- pure extension, no core impact +7. **Heartbeat system** -- extension on top of scheduler +8. **Skills self-evolution** -- extension on top of skills core +9. **Rate limiting** -- extension for multi-user scenarios +10. **OpenAI-compatible API** -- thin extension layer + +### Key Design Principles Validated by GoClaw + +- **Small interfaces win.** GoClaw's `Channel` (4 methods), `Provider` (4 methods), and `Tool` (3 methods) interfaces are the right granularity. AGH should target 3-5 methods per extension interface. +- **Nil-check hooks beat event buses.** GoClaw's function-pointer hooks are zero-overhead when unused, compile-time safe, and require no subscription management. This aligns perfectly with AGH's "no event bus" principle. +- **Namespace everything.** GoClaw's `mcp__{server}__{tool}` pattern prevents collisions as the tool catalog grows. AGH should adopt this early. +- **Health is not optional.** Every subsystem in GoClaw reports health. AGH should make `Health()` a required method on every extension interface. +- **Search beats eager loading.** GoClaw's BM25 hybrid search for both skills and MCP tools keeps context budgets manageable. AGH should adopt this pattern before the tool/skill catalog grows large. diff --git a/docs/ideas/extensability/analysis/analysis_hermes.md b/docs/ideas/extensability/analysis/analysis_hermes.md new file mode 100644 index 000000000..6e6522263 --- /dev/null +++ b/docs/ideas/extensability/analysis/analysis_hermes.md @@ -0,0 +1,183 @@ +# Hermes Agent Analysis for AGH + +## Overview + +Hermes is a Python-based self-improving AI agent by Nous Research, designed as a long-lived background process reachable from any channel (CLI, Telegram, Discord, Slack, WhatsApp, Signal, Email, Matrix, Home Assistant). Its hub-and-spoke architecture centers on a synchronous `AIAgent` core driven by a five-phase `run_conversation()` loop, surrounded by a self-registering tool system, SQLite+FTS5 session store, eight messaging platform adapters, six terminal execution backends, a cron scheduler, a learning loop (persistent memory + skills + session recall), and an ACP adapter for IDE integration. + +Hermes represents a "maximalist kitchen-sink" approach: everything is built in, everything shares the same process, and every interface uses the same registry, session database, memory, and skills directory. This contrasts with AGH's philosophy of a robust minimal core with a highly extensible plugin system. + +### Key Architectural Differences from AGH + +| Dimension | Hermes | AGH | +| --------------- | ----------------------------------------------------- | -------------------------------------------------------- | +| Language | Python (synchronous core, async gateway) | Go (single binary) | +| Agent coupling | One hub class (`AIAgent`) that does everything | Separate packages wired via daemon composition root | +| Extension model | Module-level self-registration singletons | Go interfaces + dependency injection | +| Session store | Single SQLite file shared across all interfaces | Per-session event store + global catalog | +| Memory | Flat markdown file (`memory.md`) + optional Honcho | Dual-scope (global + workspace) with dream consolidation | +| Skills | agentskills.io markdown format in `~/.hermes/skills/` | Bundled skill definitions + catalog/loader | +| Communication | Direct subprocess or gateway adapters | ACP over JSON-RPC/stdio | +| Observability | `/insights` command + session cost accounting | Event recording, health metrics, query engine | + +## Key Features Analysis + +| # | Feature | Hermes Implementation | Classification | Rationale | +| --- | -------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| 1 | **Self-registering tool registry** | Module-level singleton; tools register at import time with name, toolset, schema, handler, `check_fn()`, `requires_env` | **CORE** | AGH already has a tools concept via ACP. A typed Go tool registry with availability gating (`check_fn` equivalent) and toolset composition should be core -- it is the primary extensibility surface. | +| 2 | **Toolset composition** | Named groups (`web`, `research`, `full_stack`) with recursive `includes` for bulk enable/disable | **CORE** | Toolset grouping with enable/disable per-session is load-bearing for agent safety and flexibility. Core registry should support grouping. | +| 3 | **Availability gating via check_fn** | Tools withheld from model when API keys missing or deps unavailable -- model never sees tools it cannot use | **CORE** | Critical reliability property. AGH's tool definitions should include an availability predicate. Prevents hallucinated calls. | +| 4 | **Session store with FTS5** | Single SQLite `state.db` with `sessions`, `messages`, `messages_fts` tables; WAL mode; write retries with jitter | **CORE** | AGH already has SQLite stores. Adding FTS5 for cross-session search is a core capability for any agent OS -- it enables recall without external search infra. The schema pattern (FTS5 virtual table with content-external triggers) is directly portable to Go+SQLite. | +| 5 | **Cross-session recall (session_search)** | FTS5 query -> group by session -> LLM summarization per session group | **EXTENSION** | The two-stage pipeline (FTS5 retrieval + LLM summarization) is an opinionated recall strategy. AGH core should expose the FTS5 search primitive; the LLM summarization layer should be an extension. | +| 6 | **Persistent memory (memory_tool)** | Flat `memory.md` file with categorized facts; injected into system prompt every turn | **CORE** | AGH already has `internal/memory` with dual-scope memory. Hermes validates that simple persistent facts injected into system prompts is table-stakes. Keep in core. | +| 7 | **Memory provider plugins** | `BuiltinMemoryProvider` (markdown) vs `HonchoMemoryProvider` (external API) via `MemoryManager` abstraction | **CORE pattern, EXTENSION providers** | The provider interface pattern belongs in core. Specific providers (Honcho, vector DB, etc.) are extensions. | +| 8 | **Skills pipeline (agentskills.io)** | Markdown files with YAML frontmatter; slash-command activation; injected as user message; auto-proposal from trajectories | **CORE** | AGH already has `internal/skills`. Hermes reinforces that skills should be: (a) markdown-based, (b) frontmatter-indexed, (c) injected as context not system prompt, (d) discoverable via tools. Keep in core. | +| 9 | **Skill auto-proposal** | Agent calls `skill_manage(action="propose")` after complex tasks to create new skills from completed trajectories | **EXTENSION** | Self-improvement is powerful but opinionated. The skill CRUD API should be core; the auto-proposal heuristic ("detect complex task completion and propose a skill") should be an extension/hook. | +| 10 | **Context compression** | 5-step algorithm: prune old tool results, protect head/tail, LLM-summarize middle, rebuild, chain sessions via `parent_session_id` | **CORE** | Context management is fundamental for long-running sessions. AGH should have a compaction interface in core with the default implementation. Session chaining via parent references is a good schema pattern. | +| 11 | **Prompt caching (Anthropic)** | `apply_anthropic_cache_control()` marks system prompt + last 3 messages with `cache_control` breakpoints | **CORE** | Provider-specific optimization, but the concept of marking stable context for caching is universal. AGH's transcript assembly should support cache-hint annotations. | +| 12 | **Gateway / platform adapters** | 8 adapters (Telegram, Discord, Slack, WhatsApp, Signal, Email, Matrix, Home Assistant) via `BaseAdapter` interface | **EXTENSION** | Definitively an extension. The `BaseAdapter` contract (connect, start, send_text, send_message, edit_message, delete_message, on_message, on_command) is a good interface to define in core and let extensions implement. | +| 13 | **Unified command registry** | `COMMAND_REGISTRY` shared across CLI and all gateway platforms; `cli_only` / `gateway_only` flags | **CORE** | A shared command dispatch table is core infrastructure. Commands registered once should be available across all interfaces. | +| 14 | **Cron / scheduled automations** | Natural-language tasks on cron schedule; delivery to any platform; `[SILENT]` marker convention | **EXTENSION** | Scheduling is not part of a minimal agent OS core. It is a compelling extension that uses core primitives (session creation, agent execution, platform delivery). | +| 15 | **Terminal execution backends** | 6 pluggable backends (Local, Docker, SSH, Modal, Daytona, Singularity) via `BaseEnvironment` interface | **CORE interface, EXTENSION backends** | The interface (`execute(command, cwd, timeout) -> {output, returncode}`) belongs in core. Individual backends are extensions. AGH already handles this via ACP subprocess spawning, but a pluggable execution environment concept is valuable. | +| 16 | **Subagent delegation** | `delegate_task` spawns isolated child `AIAgent` in ThreadPoolExecutor; zero-context-cost for parent; blocked tools prevent recursion | **CORE** | Agent composition is fundamental. AGH should support spawning child sessions with isolated contexts, restricted tool access, and independent iteration budgets. The depth limit and blocked-tool pattern are good safety defaults. | +| 17 | **ACP adapter (IDE integration)** | JSON-RPC server exposing `initialize`, `tools/list`, `tools/call`, `completion/complete`, `resources/read`; stateful sessions with code context | **CORE** | AGH already has ACP as its primary agent communication protocol. Hermes validates the pattern: expose the same tool registry and session semantics over JSON-RPC for IDE integration. | +| 18 | **Security: dangerous command detection** | Regex patterns for destructive commands; command normalization (ANSI stripping, null byte removal, NFKC); Tirith binary scanner | **CORE** | Command safety is essential for any agent that executes shell commands. AGH should have a core command-approval interface with default regex patterns. | +| 19 | **Approval state machine** | Three scopes: once / session / permanent; CLI interactive prompts; gateway async approval via chat buttons | **CORE** | The approval interface (check -> prompt -> remember) belongs in core. The persistence scope hierarchy is a good pattern. | +| 20 | **SSRF / URL safety** | Block private IP ranges, cloud metadata endpoints, user-defined blocklist | **CORE** | Network safety for agent web requests is core security. | +| 21 | **Gateway authorization** | Priority chain: platform allow-all -> DM pairing -> platform allowlist -> global allowlist -> global allow-all -> deny | **EXTENSION** | Gateway auth is specific to multi-user messaging scenarios. Extension responsibility. | +| 22 | **DM pairing system** | Cryptographic pairing codes for granting messaging platform access | **EXTENSION** | Platform-specific access control. Extension. | +| 23 | **Process management** | `ProcessRegistry` for background processes; spawn/poll/wait/kill/read_log; PTY support; crash recovery via checkpoint file | **EXTENSION** | Background process tracking is valuable but not core agent OS. The registry pattern and lifecycle management are good extension material. | +| 24 | **Persistent shell state** | `PersistentShellMixin` maintains shell state across tool calls; SSH ControlMaster | **EXTENSION** | Implementation detail of terminal execution. Extension. | +| 25 | **Token accounting & cost estimation** | `CanonicalUsage` tracks input/output/cache/reasoning tokens; per-model pricing; session-level cost rollup | **CORE** | Usage tracking is core observability. AGH's `internal/observe` should track token economics per session. | +| 26 | **Diagnostic tools (doctor/status)** | `hermes doctor` validates config, deps, tools; `hermes status` shows component health; `InsightsEngine` for analytics | **EXTENSION** | Diagnostics are important but not core agent loop. Good extension that uses core health/metrics APIs. | +| 27 | **Batch processing / trajectory generation** | `BatchRunner` with multiprocessing; toolset distribution sampling; JSONL trajectory output; `TrajectoryCompressor` | **EXTENSION** | Training-data generation is Nous-specific. Not core agent OS. | +| 28 | **RL training environments** | Atropos integration, `HermesAgentBaseEnv` | **EXTENSION** | Research-specific. | +| 29 | **Voice / TTS system** | Multi-provider STT (faster-whisper, Groq, OpenAI) + TTS (Edge, ElevenLabs, OpenAI); Discord voice channels | **EXTENSION** | Modality-specific. Extension with provider plugin interface. | +| 30 | **Honcho user modeling** | Dialectic user modeling via external API; semantic search, peer cards, configurable write strategies | **EXTENSION** | External memory provider. Extension. | +| 31 | **Authentication / provider system** | 11 providers; 4 auth types (OAuth device code, OAuth external, API key, external process); credential resolution chain | **CORE interface, EXTENSION providers** | A provider resolution interface (credential lookup chain, model validation) belongs in core. Individual provider implementations are extensions. | +| 32 | **MCP server integration** | External MCP servers discovered at startup; tools namespaced under server name | **CORE** | MCP tool integration is part of the standard agent protocol ecosystem. AGH should support discovering and proxying MCP servers as a core capability. | +| 33 | **User plugins** | `~/.hermes/plugins/` directory; Python modules loaded at startup; register tools via the same registry | **CORE mechanism** | User-authored tool extensions via a plugin directory is a core extensibility mechanism. | +| 34 | **Streaming response delivery** | `StreamingResponse` class: buffer 500 chars or 2s timeout; progressive message editing on platforms that support it | **EXTENSION** | Platform-specific delivery optimization. Extension. | +| 35 | **System prompt builder** | Ordered concatenation of stable sections (identity, platform hints, skills index, memory, context files, guidance blocks) for cache stability | **CORE** | System prompt assembly order matters for caching. AGH should have a structured prompt builder with ordered sections. | + +## Architectural Patterns Worth Adopting + +### 1. Learning Loop (Memory -> Skills -> Session Recall) + +Hermes implements a three-layer learning loop that feeds back into every subsequent session: + +- **Persistent memory**: durable facts saved via tool call, injected into system prompt +- **Skills**: procedural knowledge crystallized from completed tasks, invocable on demand +- **Session recall**: FTS5 search + LLM summarization across all historical sessions + +**AGH relevance**: AGH already has dual-scope memory with dream consolidation and a skills catalog. The key pattern to adopt is the **closed feedback loop**: the agent should be able to save memories, create skills, and search past sessions -- all via tool calls within the same conversation. The dream consolidation AGH already has goes beyond Hermes (which has no automatic consolidation). The FTS5 cross-session search pattern is the missing piece AGH should add to its `internal/store` layer. + +**Recommendation**: Add FTS5 indexing to sessiondb event content. Expose a `session_search` capability as a core tool. Let the LLM summarization of results be an extension point. + +### 2. Cron / Scheduled Automations + +Hermes runs a 60-second tick loop in its gateway process, checking `~/.hermes/cron/jobs.json` for due jobs. Each job carries a natural-language command, a cron-syntax trigger, and a delivery target. + +**AGH relevance**: For an Agent OS, scheduled execution is a strong differentiator. An agent that can autonomously perform tasks on schedule, route output to platforms, and suppress noise with `[SILENT]` markers transforms from a reactive tool to a proactive assistant. + +**Recommendation**: Implement as an **extension** that registers with the daemon. Core should expose: (a) a way to create sessions programmatically (already exists), (b) a delivery/notification interface for routing output, (c) a timer/scheduler hook in the daemon lifecycle. The cron extension then uses these primitives. + +### 3. Gateway / Platform Adapters + +Hermes uses a `BaseAdapter` interface with 8 implementations. All adapters normalize incoming messages to `MessageEvent` and route through the same dispatch pipeline. + +**AGH relevance**: AGH already has HTTP/SSE and UDS interfaces. Adding messaging platform support should follow the adapter pattern. + +**Recommendation**: Define a `PlatformAdapter` interface in core (`internal/api/contract`). Each platform is a separate extension package. The shared command registry pattern (commands registered once, available everywhere) is excellent and should be adopted. + +### 4. Pluggable Execution Environments + +Hermes separates "what command to run" from "where to run it" via `BaseEnvironment`. The terminal tool delegates to the active backend without knowing whether it is local, Docker, SSH, or serverless. + +**AGH relevance**: AGH spawns ACP-compatible agents as subprocesses. The execution environment concept could extend this: agents could run in Docker, on remote machines, or in serverless environments. + +**Recommendation**: Define an `ExecutionEnvironment` interface in core. The current local subprocess spawning becomes the default implementation. Docker, SSH, and serverless backends become extensions. This is lower priority than other patterns but valuable for Phase 3 (agent network protocol). + +### 5. Approval / Security Pipeline + +Hermes implements defense-in-depth: command normalization -> regex detection -> Tirith scanning -> approval callback -> execution backend isolation -> file write safety. The three-scope approval state machine (once/session/permanent) is particularly well-designed. + +**AGH relevance**: AGH will need command safety as it supports more agent types. The layered approach is the right architecture. + +**Recommendation**: Core should define: (a) a `CommandApproval` interface, (b) default regex patterns for dangerous commands, (c) a scope-based approval memory (once/session/permanent). The Tirith scanner and SSRF protection can be extensions or built-in. + +## Extension System Insights + +### Skills Pipeline Design + +Hermes validates several skills design decisions that AGH should consider: + +1. **User-message injection over system-prompt modification**: Skill content injected as a user message preserves prompt caching. System prompt stays stable; only the skill body pays fresh token cost. AGH should adopt this pattern. + +2. **Frontmatter-indexed, body-injected**: Only the skill index (names + descriptions) goes into the system prompt. Full skill content is loaded on demand. This keeps system prompts compact. + +3. **Platform-conditional skills**: Skills declare which platforms and tools they require via `conditions` in frontmatter. Unavailable skills are withheld. AGH should support skill conditions. + +4. **Auto-proposal loop**: After complex tasks, the agent proposes new skills from the trajectory. This should be an opt-in extension, not forced behavior. + +### Tool Registry Design + +Key patterns from Hermes' tool registry for AGH: + +1. **Single-file tool registration**: Each tool is a self-contained file that registers itself. In Go, this maps to an `init()` function or a registry-builder pattern. AGH should make adding a tool a one-package operation. + +2. **check_fn for availability gating**: Tools that cannot run (missing API keys, missing deps) are withheld from the model's tool list. This is the single most important reliability property. AGH must implement this. + +3. **Toolset composition with recursive includes**: Toolsets compose other toolsets. `resolve_toolset("full_stack")` recursively expands to all leaf tools. This is valuable for configuration ergonomics. + +4. **Hidden tools**: Tools that exist in the registry for programmatic use but are not exposed to the model. Useful for internal orchestration tools. + +5. **Tool output conventions**: Consistent `{"success": true, "data": {...}}` / `{"error": "..."}` shape. Models learn to parse and retry. AGH should standardize tool result format. + +6. **MCP + user plugins as registry citizens**: External MCP servers and user plugin directories are discovered at startup and register into the same registry as built-in tools. AGH should treat MCP tools and user-authored tools as first-class registry entries. + +### What AGH Should NOT Copy + +1. **Module-level mutable globals** (`_last_resolved_tool_names`): A fragile pattern that causes bugs with subagents. AGH should thread resolved tool context through function parameters. + +2. **Synchronous core with async bridging hacks** (`_run_async()`): Hermes' sync/async impedance mismatch creates complexity. AGH's Go concurrency model (goroutines + channels) avoids this entirely. + +3. **Single-file session database shared across all interfaces**: AGH's split (global catalog + per-session event store) is architecturally cleaner. It avoids the write contention Hermes must hack around with jitter retries. + +4. **Import-time side effects for registration**: In Go, prefer explicit registration in the composition root (`internal/daemon`) rather than relying on `init()` functions. + +5. **Flat memory.md without consolidation**: AGH already has dream consolidation, which is superior to Hermes' approach of trusting the model to manually manage memory quality. + +6. **Kitchen-sink monolith**: Hermes bundles 8 platform adapters, 6 terminal backends, voice/TTS, RL training, and batch processing into one package. AGH should keep these as extensions. + +## Summary: Core vs Extension Classification + +### Core (what AGH should build into its minimal robust foundation) + +- Tool registry with availability gating, toolset composition, and standardized result format +- FTS5 cross-session search in the session/event store +- Context compression interface with default LLM-summarization implementation +- Prompt caching hints in transcript assembly +- Structured system prompt builder with ordered stable sections +- Command approval interface with scope-based memory (once/session/permanent) +- Provider resolution interface for LLM credentials +- Token accounting and cost estimation in the observe layer +- Subagent delegation with isolated contexts and restricted tools +- MCP tool discovery and proxy +- Unified command dispatch table across interfaces +- Skills: frontmatter-indexed, user-message injected, platform-conditional + +### Extension (what AGH should support via its plugin system) + +- Gateway platform adapters (Telegram, Discord, Slack, etc.) +- Cron/scheduled automations +- Terminal execution backends (Docker, SSH, Modal, etc.) +- Voice/TTS pipeline +- Batch processing and trajectory generation +- RL training environments +- Honcho user modeling +- Diagnostic commands (doctor, status, insights) +- Skill auto-proposal heuristics +- DM pairing and gateway authorization +- Process management (background process registry) +- Persistent shell state +- Streaming response delivery optimization diff --git a/docs/ideas/extensability/analysis/analysis_openclaw.md b/docs/ideas/extensability/analysis/analysis_openclaw.md new file mode 100644 index 000000000..d13bc2686 --- /dev/null +++ b/docs/ideas/extensability/analysis/analysis_openclaw.md @@ -0,0 +1,233 @@ +# OpenClaw Analysis for AGH Extensibility + +## Overview + +OpenClaw is a personal AI assistant that runs locally on a user's devices and fans out to 20+ messaging platforms through a single control plane. Its architecture follows a **hub-and-spoke control model**: a long-lived **Gateway** daemon owns every channel connection, session, tool invocation, and device pairing, while a separate **assistant runtime** (the Pi agent) performs inference and tool reasoning over WebSocket RPC. + +The project is implemented in TypeScript/Node.js with 70+ bundled extensions, 20+ channel adapters, native apps for macOS/iOS/Android, and a Plugin SDK that isolates extensions from core through a narrow typed boundary. OpenClaw targets a single trusted operator who wants one assistant reachable from any device and any chat platform. + +### Key Architectural Differences from AGH + +| Aspect | OpenClaw | AGH | +| ------------------- | -------------------------------------------- | ---------------------------------------------------------------- | +| Language | TypeScript/Node.js | Go single-binary | +| Communication | WebSocket RPC between Gateway and Assistant | JSON-RPC over stdio between daemon and agents | +| Session persistence | JSONL files on disk | SQLite (globaldb + per-session eventdb) | +| Extension model | npm-based Plugin SDK with manifest discovery | Go interfaces with dependency injection | +| Channel surface | 20+ messaging platform adapters | HTTP/SSE (web UI) + UDS (CLI) | +| Scope | Personal assistant across many platforms | Agent session management via ACP | +| Assistant runtime | Stateless Pi agent over WS RPC | ACP-compatible agents (Claude Code, Codex, etc.) as subprocesses | + +--- + +## Key Features Analysis + +### Feature Classification Table + +| Feature | OpenClaw Implementation | Classification for AGH | Rationale | +| -------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| **Gateway/Assistant Split** | Stateful Gateway control plane + stateless inference runtime communicating over WS RPC | **CORE** | AGH already has this via daemon + ACP subprocess model. The pattern of keeping all state in the daemon and treating agents as stateless between turns is foundational. | +| **Plugin SDK with Manifest-First Discovery** | Typed boundary (`plugin-sdk/`), `openclaw.plugin.json` manifests, discovery before code execution, four capability types (channel, provider, tool, skill) | **CORE** | AGH needs a plugin registration contract. Manifest-first discovery (inspect metadata without executing code) is a critical safety and performance pattern for any extensible system. | +| **Channel Adapter Matrix** | 20+ messaging adapters (WhatsApp, Telegram, Slack, Discord, etc.) behind a uniform `ChannelPlugin` interface with normalized `InboundMessage` shape | **EXTENSION** | Individual channel adapters are clearly extensions. But the normalized message contract and channel health monitoring model should inform AGH's API layer design. | +| **Skills System (AgentSkills format)** | YAML frontmatter + Markdown files, five-tier precedence (workspace > project > personal > managed > bundled), ClawHub registry, slash commands | **CORE (format) / EXTENSION (individual skills)** | AGH already has a skills package. The five-tier precedence model and the AgentSkills standard format are worth adopting as core. Individual skills and the registry (ClawHub equivalent) are extensions. | +| **Tool System with Catalog** | Self-describing JSON Schema tools, `tools.catalog` discovery, tool profiles (coding/research/creative/dangerous/none), allow/deny/alsoAllow composition | **CORE** | Tool catalog with self-describing schemas, profile-based defaults, and allow/deny composition rules should be core. The pattern of tools advertising their own contracts is essential for any agent-facing tool system. | +| **Approval Flow for High-Risk Operations** | Per-invocation approval with UUID tracking, broadcast to all operators, timeout + deny/approve, iOS push delivery | **CORE** | AGH must have an approval mechanism for dangerous tool invocations. The state machine (request > broadcast > wait > approve/deny/timeout) is simple and effective. | +| **DM Scope Policies** | Four policies (main, per-peer, per-channel-peer, per-account-channel-peer) preventing cross-user context leakage | **EXTENSION** | AGH is currently single-user/single-session focused. DM scope becomes relevant only if AGH exposes channel adapters or multi-user surfaces. The pattern is worth noting for future extension. | +| **Context Compaction** | LLM-powered summarization with proactive and reactive triggers, token estimation, tool-result stripping, identifier preservation, write-lock safety | **CORE** | AGH already has consolidation in its memory package. OpenClaw's approach (reactive on overflow + proactive guard, configurable compaction model, summarization that preserves identifiers) provides useful refinements. | +| **Device Pairing and Node Capabilities** | Fingerprint-pinned tokens, capability advertisement (`camera`, `canvas`, `screen`, `location`, `voice`), capability-based routing | **EXTENSION** | Device pairing and node capabilities are entirely about multi-device reach. Not relevant for AGH's current scope but a clean extension point if AGH adds device/node support. | +| **ACP Bridge for IDEs** | stdio-to-WS translator process (`openclaw acp`), session mapping, prompt/cancel/listSessions translation | **CORE** | AGH already has ACP as its primary protocol. OpenClaw's bridge pattern validates AGH's approach. The session-mapping strategy (per-client default, explicit override) is a good pattern. | +| **Canvas UI Rendering (A2UI)** | Agent-controlled HTML/CSS/JS workspace + structured A2UI v0.8 protocol, per-session file storage, deep-link scheme back to agent loop | **EXTENSION** | A rich visual surface is a powerful capability but not essential for AGH's core. Should be an extension that any agent can use if available. | +| **Voice and Speech Stack** | Wake-word detection, Talk Mode, STT/TTS provider matrix with fallback chains, global wake-word sync across devices | **EXTENSION** | Voice is a premium feature that adds complexity. Should be a cleanly separated extension with provider interfaces. | +| **Browser Automation** | Multi-profile CDP control, SSRF protection, node-host proxying, Chrome Extension Relay for user sessions, accessibility tree snapshots | **EXTENSION** | Browser automation is a powerful tool but clearly an extension. The SSRF protection pattern and profile-based isolation are worth noting. | +| **Model Provider System** | Auth profiles, auto-discovery (Ollama, Bedrock, Vertex), OAuth token management, auth profile rotation with cooldown, `models.json` pipeline, three-level parameter merge | **CORE (provider interface) / EXTENSION (individual providers)** | AGH needs a provider abstraction. The auth profile rotation with cooldown (don't thrash a rate-limited key) and the three-level parameter merge (global > model-specific > agent-specific) are patterns worth adopting in core. | +| **Cron/Webhooks (Proactive Agent)** | Cron scheduler for periodic agent jobs, webhook endpoints for HTTP-triggered runs | **EXTENSION** | Proactive agent triggers (scheduled jobs, external webhooks) are extensions on top of the core session model. Good extension candidates. | +| **Sandboxing (Docker)** | Three specialized images (generic, browser, common base), per-invocation container spawn, resource limits, network policy, nested sandboxing | **EXTENSION** | Docker-based sandboxing is an isolation strategy. AGH should define a sandboxing interface in core but let the Docker implementation be an extension. | +| **Security Audit System** | `openclaw security audit` CLI command, automated checks for filesystem permissions, gateway config, sandbox config, channel policies, skill code safety, tool policy | **CORE** | A security audit surface that validates configuration against best practices should be part of AGH's core. The pattern of automated security assessment at CLI time is valuable. | +| **Onboard Wizard** | Interactive six-step setup (model/auth, workspace, gateway, channels, daemon, health), non-interactive mode for CI, idempotent reconfiguration | **CORE** | AGH needs a clean first-boot experience. The pattern of wizard-writes-config (not hidden state) and idempotent `configure --section` reconfiguration is good. | +| **Session Middleware/Hooks** | `before_compaction`, `after_compaction`, `session.load`, `context.assemble` hooks with exec:// handlers | **CORE** | Lifecycle hooks at well-defined points (session load, context assembly, pre/post-compaction) enable extensions without core changes. AGH should define these hook points. | +| **Idempotency Keys** | Per-request UUID for side-effecting methods, retry-safe collapse of duplicate messages | **CORE** | Essential for any system where messages can be delivered at-least-once. AGH should adopt idempotency keys for state-mutating operations. | +| **Event Fan-Out / Broadcast** | Every agent event broadcasts to all authorized subscribers, enabling multi-client observation | **CORE** | AGH already has this via SSE. The pattern of every connected client seeing the same event stream is fundamental for observability. | +| **Sub-Agent System** | `sessions_spawn` tool, sub-agent registry with lifecycle tracking, thread-bound sessions, announcement/delivery pipeline with exactly-once semantics | **EXTENSION** | Multi-agent orchestration is an advanced feature. The `sessions_spawn` unified entry point and the sub-agent registry pattern are worth studying for AGH's future phases. | +| **Deployment Topologies** | Six deployment modes (local, Tailscale Serve, SSH tunnel, Tailscale Funnel, Docker, Fly.io) with explicit migration paths | **EXTENSION** | Each deployment topology beyond local is an extension concern. But the health endpoint contract (`/healthz`, `/readyz`) should be core. | + +--- + +## Architectural Patterns Worth Adopting + +### 1. Manifest-First Plugin Discovery + +OpenClaw's strongest extensibility pattern is the split between **manifest discovery** (read metadata, no code execution) and **code loading** (dynamic import after validation). This means: + +- `openclaw plugins status` can list all plugins without executing any plugin code +- Requirements can be checked, missing dependencies flagged, before any risk +- Disabled plugins are never loaded + +**AGH recommendation**: Define a plugin manifest format (TOML or JSON) that AGH reads at daemon startup before loading any plugin Go code. This enables `agh plugins list` without importing plugin packages. + +### 2. Four-Capability Plugin Model + +OpenClaw defines exactly four plugin capabilities: **channels**, **providers**, **tools**, **skills**. Every extension implements one or more of these through typed contracts. This keeps the plugin surface finite and comprehensible. + +**AGH recommendation**: Define AGH's plugin capabilities explicitly. Candidates: + +- **AgentDriver** (already exists as an interface in `session/`) +- **Tool** (agent-callable capabilities with JSON Schema) +- **Skill** (YAML+Markdown instruction files) +- **Observer** (event consumers for observability/integrations) + +### 3. Tool Profiles with Allow/Deny Composition + +OpenClaw's tool profile system (`coding`, `research`, `creative`, `dangerous`, `none`) provides sensible defaults. The composition rule (`deny` always wins, `alsoAllow` adds, `allow` replaces) is simple and predictable. + +**AGH recommendation**: Adopt this for AGH's tool configuration. It avoids the complexity of inheritance trees while giving users enough control. The rule "deny always wins" is the right safety default. + +### 4. Skill Precedence Tiers + +Five tiers (workspace > project-agents > personal-agents > managed > bundled) with higher tiers winning. This lets users override bundled behavior without forking. + +**AGH recommendation**: AGH already has bundled skills. Adding workspace-level and personal-level tiers would let users customize without modifying the binary. The precedence model is simple: scan each tier, build a name-to-definition map, higher tiers overwrite. + +### 5. Lifecycle Hooks at Defined Points + +OpenClaw exposes hooks at `before_prompt_build`, `before_compaction`, `after_compaction`, `session.load`, and `context.assemble`. These are not a generic event bus -- they are specific, named lifecycle points where extensions can inject behavior. + +**AGH recommendation**: Define AGH's lifecycle hook points explicitly in the daemon package. Candidates: + +- `session.create` / `session.resume` +- `context.assemble` (before building the prompt for the agent) +- `event.record` (after an event is persisted) +- `agent.start` / `agent.done` +- `consolidation.before` / `consolidation.after` + +### 6. Health Endpoints as Core Contract + +Every OpenClaw deployment mode (local, Docker, Fly.io, Tailscale) uses the same `/healthz` and `/readyz` HTTP endpoints. Supervisors, load balancers, and health monitors all converge on these two URLs. + +**AGH recommendation**: AGH should expose `/healthz` (liveness) and `/readyz` (readiness) on the HTTP API as a core contract. These are cheap to implement and universally useful. + +### 7. Idempotency Keys for Side-Effecting Operations + +OpenClaw requires idempotency keys on every state-mutating WS RPC method. This collapses duplicates from at-least-once delivery and makes retries safe. + +**AGH recommendation**: Adopt idempotency keys for AGH's HTTP API endpoints that mutate state (session creation, event submission, config changes). Store recent keys in an LRU cache with TTL. + +### 8. Normalized Message Shape + +OpenClaw compresses 20+ platform-specific message formats into one `InboundMessage` type: `{senderId, channelId, accountId, threadId, groupId, text, timestamp, attachments}`. Every downstream consumer works with this single shape. + +**AGH recommendation**: If AGH adds input channels beyond HTTP/UDS, define a canonical internal message type early. Even for HTTP/UDS, a normalized request shape simplifies the pipeline. + +--- + +## Extension System Insights + +### ClawHub (Skills Registry) + +OpenClaw's ClawHub (clawhub.ai) is a centralized public registry for skills, modeled after npm: + +- **Install**: `openclaw skills install github` +- **Update**: `openclaw skills update --all` +- **Search**: `openclaw skills search weather` +- **Version pinning**: `openclaw skills install github@1.2.3` + +Skills are distributed as git repos or npm packages with semver tags. The registry is **optional** -- users can point `skills.load.extraDirs` at any local directory and skip ClawHub entirely. + +**Insight for AGH**: A skill registry is a Phase 2-3 concern. For now, AGH should ensure its skills format is portable (the AgentSkills standard is shared across multiple agent frameworks). When a registry is needed, the npm-like CLI UX (`agh skills install/update/search`) is the right model. The critical design decision is making the registry optional -- air-gapped and enterprise deployments must work without it. + +### Native Apps (Node Mode) + +OpenClaw's native apps (macOS, iOS, Android) connect as **node-role WebSocket clients** that expose device capabilities back to the Gateway. They are NOT plugins (no in-process registration). Instead: + +1. Connect to Gateway WS with `role: "node"` +2. Advertise capabilities: `["camera", "canvas", "screen", "location", "voice"]` +3. Gateway indexes capabilities by device +4. Agent calls `nodes.invoke({command: "camera.snap"})` and Gateway routes to the right device + +The distinction between **plugins** (in-process, Plugin SDK boundary) and **nodes** (external process, WS protocol boundary) is clean and important. + +**Insight for AGH**: If AGH adds device/node support, adopt this two-tier model: + +- **Extensions/plugins**: Go interfaces, in-process, compiled into the binary or loaded at startup +- **Nodes/clients**: External processes connecting via HTTP/WS/UDS, advertising capabilities, receiving routed commands + +### Channel Adapters + +Each channel adapter implements five concerns: + +1. **Transport** -- how it connects +2. **Normalization** -- platform-native to internal message type +3. **Send/receive** -- round-trip delivery +4. **Auth/accounts** -- credential management +5. **Health monitoring** -- state machine with reconnect backoff + +The key insight is that every adapter follows the same interface, and core never special-cases bundled vs. third-party adapters. + +**Insight for AGH**: AGH's HTTP/SSE and UDS "channels" already follow this pattern implicitly. If AGH adds more input surfaces (CLI stdio, WebSocket, platform-specific adapters), formalizing the adapter interface would be valuable. The five-concern decomposition is a good checklist. + +### Plugin Configuration Pattern + +OpenClaw separates plugin config into three layers: + +- **`config`**: passed to plugin setup code, referenced in prompts (no raw secrets) +- **`env`**: injected as process environment at tool invocation time (for secrets) +- **`enabled`**: toggle without removing config + +The split between `config` (agent-visible) and `env` (execution-only, never in prompt) prevents secret leakage through the LLM. + +**Insight for AGH**: When AGH's skill/tool extensions need configuration, adopt this config/env split. Never let extension secrets appear in the context sent to the LLM. + +### Extension Loading Order + +OpenClaw enforces a deterministic loading order: providers before channels before skills. This prevents a channel from registering before its required provider is loaded. + +**Insight for AGH**: AGH's daemon package (the composition root) should document and enforce an explicit initialization order for extensions. Go's `init()` functions are not sufficient -- explicit ordering through the daemon's boot sequence is needed. + +--- + +## Patterns to Explicitly Avoid + +### 1. WebSocket RPC Between Gateway and Assistant + +OpenClaw uses WS RPC because the Gateway and assistant can be on different hosts. AGH uses stdio JSON-RPC because agents are subprocesses. AGH's approach is simpler and more appropriate for its single-binary model. Do not adopt OpenClaw's WS split. + +### 2. In-Process Channel Adapters (at scale) + +OpenClaw runs 20+ channel adapters inside the Gateway process. This is fine for Node.js's event-loop model but would be problematic in Go if each adapter needed goroutines with complex lifecycle management. If AGH adds channel adapters, consider subprocess isolation rather than in-process loading. + +### 3. JSONL Session Persistence + +OpenClaw uses append-only JSONL files for session transcripts. AGH already uses SQLite, which is strictly better for structured queries, concurrent access, and crash recovery. Do not regress to JSONL. + +### 4. 70+ Bundled Extensions + +OpenClaw ships 70+ extensions in its binary. AGH's philosophy is "robust minimal core" -- keep the binary lean, let extensions be separately compiled or loaded. Do not bundle everything. + +--- + +## Summary of Recommendations + +### Must-Adopt (Core) + +1. **Manifest-first plugin discovery** -- read metadata before executing code +2. **Typed plugin capability model** -- enumerate the finite set of extension types +3. **Tool profiles with allow/deny composition** -- sensible defaults, predictable overrides +4. **Lifecycle hooks at named points** -- not a generic bus, but specific extension points +5. **Approval flow for dangerous operations** -- per-invocation, with timeout +6. **Health endpoints** -- `/healthz` and `/readyz` as core contract +7. **Idempotency keys** -- for all state-mutating API operations +8. **Security audit CLI** -- automated configuration validation + +### Should-Adopt (Near-term Extension Design) + +1. **Five-tier skill precedence** -- workspace > project > personal > managed > bundled +2. **Config/env split for extension secrets** -- never leak secrets into LLM context +3. **Deterministic extension loading order** -- enforce in daemon boot sequence +4. **Normalized internal message type** -- prepare for multiple input surfaces + +### Worth-Studying (Future Phases) + +1. **ClawHub-style registry** -- when AGH has enough extensions to warrant discovery +2. **Node capability advertisement** -- when AGH supports multi-device +3. **Sub-agent orchestration** -- `sessions_spawn` pattern for Phase 3 agent networks +4. **A2UI-style structured surfaces** -- if AGH adds visual output beyond web UI +5. **Channel adapter matrix** -- if AGH moves beyond HTTP/SSE + UDS diff --git a/docs/ideas/extensability/analysis/analysis_openfang.md b/docs/ideas/extensability/analysis/analysis_openfang.md new file mode 100644 index 000000000..72454c5ff --- /dev/null +++ b/docs/ideas/extensability/analysis/analysis_openfang.md @@ -0,0 +1,287 @@ +# OpenFang Analysis for AGH Extensibility + +## Overview + +OpenFang is a Rust-based Agent Operating System comprising 14 crates (~137K LoC) that runs as a persistent daemon managing AI agent sessions. It shares AGH's fundamental design philosophy -- single binary, SQLite persistence, daemon model, local-first -- but takes a maximalist approach: 53 builtin tools, 40 channel adapters, 7 bundled Hands, 60+ skills, 130+ model catalog entries, and 25 MCP templates all compiled into one ~32 MB binary. + +Where AGH follows "robust minimal core + extensible plugins", OpenFang follows "bundle everything into the binary". This is the central tension in the analysis: OpenFang validates many features AGH should eventually support, but its monolithic compilation strategy is the opposite of AGH's extension-first philosophy. The features are proven; the packaging strategy should be inverted. + +### Key Similarities to AGH + +- Single-binary daemon with SQLite (WAL mode, `synchronous = FULL`) +- Kernel-as-composition-root pattern (OpenFang's `OpenFangKernel` ~ AGH's `daemon/`) +- Agent lifecycle state machine with session persistence +- TOML configuration with env var interpolation +- JSON-RPC over stdio for agent communication (MCP/ACP) +- Strict dependency direction enforced by module boundaries +- CLI that doubles as HTTP client when daemon is running + +### Key Differences from AGH + +- Rust vs Go (crate boundaries vs package boundaries) +- Everything compiled in vs extension-first architecture +- 140+ HTTP endpoints vs AGH's focused API surface +- Custom OFP wire protocol vs AGH's ACP-based approach +- In-process LLM drivers vs subprocess-spawned ACP agents +- Built-in web dashboard (Alpine.js) vs AGH's separate React SPA + +--- + +## Key Features Analysis + +### Agent Runtime & Execution + +| Feature | OpenFang Implementation | Classification | Rationale | +| -------------------------------------------------------- | ------------------------------------------------------------------------------------------------- | -------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| **Agent loop (recv/recall/call/execute)** | `run_agent_loop()` in openfang-runtime, 50-iteration cap, bounded by loop guard | **CORE** | This is the fundamental execution model. AGH already has this via ACP subprocess agents, but the loop-bounding, stop-reason taxonomy, and structured result type are worth hardening. | +| **Loop guard (cycle detection)** | SHA256 fingerprinting of recent tool calls, detects ping-pong patterns, forces conclusion | **CORE** | Critical safety mechanism. Any agent that can loop must have cycle detection. AGH should implement this in the session/observe layer as a cross-cutting concern, not per-agent. | +| **Context budget allocator** | Token allocation across system/tools/history/response regions, 70% compaction, 90% emergency trim | **CORE** | Essential for long-running sessions. AGH's `transcript` package should own this, with configurable thresholds per agent. | +| **Session repair (7-phase validation)** | Validates message continuity, tool call completeness, role alternation, deduplication, timestamps | **CORE** | Critical for crash recovery of long-running agents. AGH's session store should validate on load. This prevents corrupt state from cascading. | +| **Three LLM drivers (Anthropic, Gemini, OpenAI-compat)** | Native HTTP clients with provider-specific adaptation | **EXTENSION** | AGH delegates LLM interaction to ACP agents (Claude Code, Codex, Gemini CLI). AGH should NOT embed LLM drivers -- the ACP model is superior because it delegates provider-specific logic to purpose-built agents. | +| **Provider routing with fallback chain** | ModelRouter with complexity scoring, auth cooldown, fallback traversal | **EXTENSION** | If AGH ever routes between multiple ACP agents based on task complexity, this pattern is useful. But it belongs as an extension, not core -- AGH's philosophy is that the agent handles its own model selection. | +| **Model catalog (130+ models with pricing)** | Static catalog compiled into binary, cost per million tokens | **EXTENSION** | Useful for metering, but should be a loadable resource file, not compiled in. AGH's config system can reference an external catalog. | + +### Scheduling & Automation + +| Feature | OpenFang Implementation | Classification | Rationale | +| --------------------------------------------- | ---------------------------------------------------------------------------------- | -------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| **Cron scheduler** | POSIX 5-field cron expressions, `BackgroundExecutor` with per-schedule Tokio tasks | **CORE** | A daemon that runs 24/7 needs scheduled execution. AGH should support cron-triggered sessions in the daemon package. Simple: parse cron, sleep until next fire, dispatch session. | +| **Event-driven triggers** | `TriggerEngine` subscribes to EventBus, matches event kind + regex + fire limits | **CORE** | Reactive execution is the complement to cron. AGH's `observe` package already has event recording; adding pattern-matching trigger dispatch is a natural extension of that. | +| **Fire limits (rate limiting triggers)** | Rolling hourly counter prevents thundering-herd from high-frequency events | **CORE** | Without fire limits, a misconfigured webhook can spawn hundreds of sessions per second. This is a safety mechanism that belongs in core. | +| **Missed fire policy (skip, don't backfill)** | Deliberate: no catch-up on missed cron fires after daemon restart | **CORE** | Good design decision. Backfilling is complex and budget-dangerous. AGH should adopt the same policy. | + +### Workflow Engine + +| Feature | OpenFang Implementation | Classification | Rationale | +| ---------------------------------------------- | ------------------------------------------------------------------------------------ | -------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| **Multi-step workflow pipelines** | `WorkflowEngine` with sequential, parallel (fan-out), condition, loop, collect modes | **EXTENSION** | Powerful but complex. This should be an extension that composes on top of AGH's session primitives. Core should provide the building blocks (session dispatch, result collection); the workflow engine wires them together. | +| **Variable interpolation between steps** | `{{step_output}}`, `{{global_var}}`, `{{input}}` expansion in step prompts | **EXTENSION** | Implementation detail of the workflow engine extension. | +| **Error handlers per step (retry/skip/abort)** | Exponential backoff retries, skip-and-continue, abort-workflow | **EXTENSION** | Belongs with the workflow engine extension. | +| **Visual workflow builder** | Alpine.js canvas with drag-and-drop step nodes | **EXTENSION** | Frontend concern, definitely an extension. | + +### Hands (Autonomous Agent Packages) + +| Feature | OpenFang Implementation | Classification | Rationale | +| ------------------------------------------------------------- | ---------------------------------------------------------------------------------------------- | -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| **Hand concept (packaged autonomous agents)** | HAND.toml manifest + SKILL.md + system prompt + cron schedule + dashboard metrics + guardrails | **EXTENSION** | Brilliant packaging concept. AGH should support a similar "agent package" format as an extension -- a directory with manifest, instructions, schedule, and tool allowlist. But it should NOT be compiled into the binary. | +| **Hand lifecycle state machine** | Discovered -> Dormant -> Active -> Running -> Paused -> Completed -> Error | **CORE** | The lifecycle state machine itself is a core pattern that AGH's session manager already partially implements. The state transitions and persistence-across-restart behavior should be part of AGH's session package. | +| **Dependency verification (binary, env var, API key checks)** | `check_requirements()` validates system state before activation | **EXTENSION** | Useful for agent packages but not core. Extensions that need external binaries should declare and check their own deps. | +| **Hand persistence across daemon restarts** | JSON state files at `~/.openfang/hands/.json`, recovered at boot | **CORE** | AGH should persist active session configurations so they survive daemon restarts. This is part of the daemon lifecycle, not an extension. | + +### Memory & Knowledge + +| Feature | OpenFang Implementation | Classification | Rationale | +| -------------------------------------------------- | -------------------------------------------------------------------------------- | --------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| **Tri-part memory (session + semantic + KG)** | Structured KV, semantic text search, entity-relation-fact triples, all in SQLite | **CORE (session) + EXTENSION (semantic, KG)** | Session storage is core (AGH has this). Semantic recall and knowledge graph are extensions. AGH already has `memory/` with dual-scope memory and dream consolidation -- this aligns well. The KG is a natural extension of AGH's memory system. | +| **Knowledge graph (entity-relation-fact triples)** | Three SQLite tables, confidence scoring, BFS traversal, per-agent scoping | **EXTENSION** | Structured knowledge is powerful for long-running agents but adds schema complexity. Should be an opt-in extension that agents can activate. | +| **Session compaction (70% threshold)** | LLM-based summarization of old user/assistant pairs, chars/4 heuristic | **CORE** | Long-running AGH sessions need compaction. The threshold-based approach with graceful degradation belongs in `transcript/`. | +| **Memory consolidation on agent clone** | Dedup entities, merge KV stores, merge KG, report conflicts | **EXTENSION** | Useful but not core. Agent cloning is an advanced feature. | + +### Channel Adapters + +| Feature | OpenFang Implementation | Classification | Rationale | +| ----------------------------------------- | -------------------------------------------------------------------------------- | -------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| **Channel adapter trait** | `ChannelAdapter` (inbound stream) + `MessageAdapter` (outbound send) | **EXTENSION** | The trait design is excellent -- clean separation of inbound/outbound with platform-agnostic message envelope. AGH should define a similar interface in its extension system. | +| **40 messaging platform adapters** | Telegram, Discord, Slack, WhatsApp, Signal, Matrix, Email, etc. | **EXTENSION** | Obviously extensions. Each adapter is a plugin that implements the channel interface. | +| **Message routing with 5-level priority** | Bindings -> direct routes -> user defaults -> channel defaults -> system default | **EXTENSION** | Routing logic belongs with the channel system extension, not core. | +| **Per-channel policies** | DM policy, group policy, output format, rate limits, user allow/block lists | **EXTENSION** | Configuration for channel extensions. | +| **Hot-reloadable channel config** | Adapter restart without daemon restart on config change | **CORE** | Hot reload of extension configuration is a core daemon capability. AGH should support this generically for all extensions. | + +### Peer Networking (OFP) + +| Feature | OpenFang Implementation | Classification | Rationale | +| ----------------------------------------------- | ----------------------------------------------------------------------------- | -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| **OFP wire protocol** | Custom TCP binary protocol with length-prefix + JSON, HMAC-SHA256 mutual auth | **EXTENSION** | Agent-to-agent networking is a Phase 3 feature for AGH. When it arrives, it should be an extension, not a custom wire protocol. AGH should prefer standard protocols (HTTP, gRPC, or A2A spec) over inventing a new one. | +| **Peer discovery (gossip)** | PeerDiscovery payload exchange for transitive endpoint discovery | **EXTENSION** | Network topology management is clearly an extension. | +| **Heartbeat and health monitoring** | 30s heartbeat with Healthy/Degraded/Unhealthy/Disconnected classification | **EXTENSION** | Peer health is part of the networking extension. | +| **Inter-agent tools (agent_send, agent_spawn)** | 5 tools for cross-agent and cross-node communication | **EXTENSION** | Agent delegation and orchestration tools are extensions that compose on top of the session system. | +| **Recursion guard (MAX_DEPTH=5)** | `task_local!` depth counter prevents infinite agent delegation chains | **CORE** | If AGH supports agent-to-agent delegation, the recursion guard is a safety mechanism that belongs in core. Unbounded recursion is a cost and stability risk. | + +### Security + +| Feature | OpenFang Implementation | Classification | Rationale | +| ---------------------------------------------- | ------------------------------------------------------------------------------------------------ | -------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| **RBAC capability gates** | Per-agent tool allowlist, deny-by-default, child inherits subset of parent | **CORE** | Essential for multi-agent safety. AGH should enforce tool/capability scoping per session. | +| **Approval manager (human-in-the-loop)** | Tool risk levels (Low/Medium/High/Critical), oneshot channels for blocking approval, 60s timeout | **EXTENSION** | Approval gates are important for autonomous agents but should be an extension. Core provides the hook point; the approval logic is pluggable. | +| **Merkle hash-chain audit log** | SHA256 chaining of every significant action, tamper detection, append-only | **EXTENSION** | Powerful for compliance but overkill for most AGH deployments. Should be an optional extension that wraps AGH's observe/event system. | +| **WASM sandbox (fuel + epoch + watchdog)** | Triple-metered Wasmtime sandbox for untrusted skills | **EXTENSION** | AGH doesn't execute untrusted code in-process (ACP agents are subprocesses). If AGH adds a WASM skill runtime, this belongs there. | +| **SSRF protection** | Block private IPs, metadata endpoints, DNS rebinding on web_fetch | **CORE** | If AGH ever exposes web-fetch capabilities, SSRF protection is non-negotiable. But since AGH delegates to ACP agents that have their own sandboxing, this may be the agent's responsibility. | +| **Subprocess sandbox (env_clear + allowlist)** | Clear environment, selective passthrough for child processes | **CORE** | AGH already spawns ACP agents as subprocesses. Environment isolation is a core safety property. | +| **Secret zeroization** | `Zeroizing` wrapper that scrubs memory on drop | **CORE** | All credential handling in AGH should use Go's equivalent pattern (explicit zeroing of byte slices). | +| **Prompt injection scanner** | Scan user messages for instruction overrides, delimiter injection | **EXTENSION** | Defense against prompt injection is valuable but should be a pluggable middleware, not hardcoded. | +| **Taint tracking** | Newtype wrappers that label secret data through the call chain | **EXTENSION** | Sophisticated but heavyweight. Go doesn't have Rust's type-level guarantees for this. Should be an extension if implemented. | + +### Cost & Metering + +| Feature | OpenFang Implementation | Classification | Rationale | +| ------------------------------------------------- | ---------------------------------------------------------------------- | -------------- | ----------------------------------------------------------------------------------------------------------------------- | +| **Per-agent, per-provider, global cost tracking** | DashMap for per-agent, per-provider, AtomicU64 for global | **CORE** | Cost tracking is essential for any agent system. AGH's `observe` package should track token usage and cost per session. | +| **Budget enforcement (daily + monthly limits)** | Pre-flight check on every LLM call, halt agent or all agents on breach | **CORE** | Cost runaway is the top operational risk for autonomous agents. Budget gates must be in core. | +| **Cost-aware rate limiting** | GCRA token bucket where expensive models draw more tokens | **EXTENSION** | Sophisticated but an optimization. Basic rate limiting is core; cost-weighted rate limiting is an extension. | + +### MCP Integration + +| Feature | OpenFang Implementation | Classification | Rationale | +| -------------------------------------------- | ------------------------------------------------------------------------------ | -------------- | -------------------------------------------------------------------------------------------------------------------------------------------------- | +| **MCP client (tool discovery + dispatch)** | Connect to external MCP servers, discover tools, merge into agent tool catalog | **CORE** | AGH already has ACP for agent communication. MCP tool discovery and dispatch should be a core capability since it's the standard for tool interop. | +| **MCP server mode (expose agents as tools)** | OpenFang agents consumable by external MCP clients | **EXTENSION** | Exposing AGH agents as MCP tools is valuable for interop but not essential for core operation. | +| **25 bundled MCP templates** | Pre-configured MCP server configs for GitHub, Slack, Notion, etc. | **EXTENSION** | Templates are definitionally extensions. AGH should ship none bundled and let users install from a registry. | +| **Tool namespacing (mcp\_ prefix)** | Prevent collisions between builtins, skills, and MCP tools | **CORE** | Any system that merges tools from multiple sources needs namespacing. This is a core protocol concern. | + +### Configuration & CLI + +| Feature | OpenFang Implementation | Classification | Rationale | +| -------------------------------------------- | --------------------------------------------------------------------------- | -------------- | --------------------------------------------------------------------------------------------------------------------- | +| **Dual execution mode (daemon + ephemeral)** | Same binary works as daemon or single-shot CLI | **CORE** | AGH should support this. `agh chat "question"` should work without a running daemon by booting an ephemeral session. | +| **Config hot reload** | Whitelist of reloadable fields, POST /api/config/reload | **CORE** | Essential for operational flexibility. AGH's daemon should support reloading config subsections without restart. | +| **Config validation endpoint** | POST /api/config/validate for dry-run validation | **EXTENSION** | Nice-to-have but not essential for core. | +| **Credential vault (AES-256-GCM)** | Encrypted secret storage with Argon2 key derivation, OS keyring integration | **EXTENSION** | AGH should integrate with OS keyrings or external secret managers, but a custom vault implementation is an extension. | +| **OAuth2 PKCE flow** | Built-in OAuth for Google, GitHub, Slack integrations | **EXTENSION** | Authentication flows for third-party services are clearly extensions. | + +--- + +## Architectural Patterns Worth Adopting + +### 1. Kernel-as-Composition-Root with Explicit Subsystem Fields + +OpenFang's `OpenFangKernel` holds ~35 fields representing every subsystem. AGH's `daemon/` package serves the same role. The key insight: make every subsystem visible as a named field, not hidden behind a service locator or DI container. + +**AGH implication**: The daemon struct should explicitly list session manager, store, observer, memory, skills, config, etc. as typed fields. New extensions register through the daemon at boot, not through a generic registry. + +### 2. Strict Dependency Direction (Foundation -> Subsystems -> Kernel -> API -> CLI) + +OpenFang's 14-crate workspace enforces no circular dependencies at compile time. The DAG flows: types (leaf) -> subsystems -> runtime -> kernel -> API -> CLI. + +**AGH implication**: AGH already follows this (`daemon/` imports all; nothing imports `daemon/`). Maintain this rigorously as extensions are added. Extensions should depend on core interfaces, never on the daemon or API packages. + +### 3. EventBus with Typed Events for Cross-Subsystem Reactions + +OpenFang's `EventBus` with correlation IDs connects the metering engine, audit log, trigger engine, and workflow engine without coupling them directly. + +**AGH implication**: AGH's `observe` package records events. Adding a pub-sub dispatch mechanism (typed observer/notifier pattern, not a generic bus) would enable the trigger engine and cost tracking to react to session events without importing each other. + +### 4. KernelHandle Trait for Testability + +OpenFang's `KernelHandle` trait lets the runtime call kernel methods without importing the kernel directly. This enables testing the runtime with a mock kernel. + +**AGH implication**: AGH's `session/` package defines `AgentDriver` (implemented by `acp/`). Extend this pattern: define a `KernelHandle` or `DaemonHandle` interface that the session package and extensions use to call back into the daemon. This breaks the dependency arrow and enables testing. + +### 5. Dual Execution Mode (Daemon + Ephemeral) + +The same binary can boot a full daemon or run a single-shot operation. This is critical for scripting, testing, and CLI ergonomics. + +**AGH implication**: AGH should support `agh chat "question"` without a running daemon. The daemon package should expose an ephemeral boot path that initializes just enough state for one session. + +### 6. Agent Package Format (HAND.toml Analog) + +OpenFang's Hands package system prompt + skills + manifest + schedule + guardrails into a single activatable unit. + +**AGH implication**: AGH should define an "agent package" format (TOML manifest, instruction file, tool allowlist, schedule, resource quotas) that extensions can install, activate, and manage. This is the primary extensibility surface for end users. + +### 7. Stop Reason Taxonomy + +OpenFang's `StopReason` enum (Completed, MaxIterations, LoopDetected, Timeout, QuotaExceeded, BudgetExceeded, Error) gives precise observability into why an agent loop terminated. + +**AGH implication**: AGH's session state machine should capture terminal states with the same granularity. This feeds directly into observability, debugging, and billing. + +--- + +## Extension System Insights + +### What Should Be the Extension Interface? + +OpenFang has no runtime extension loading -- everything is compiled in. This is the opposite of what AGH wants. However, the _boundaries_ between OpenFang's subsystems reveal the natural extension points: + +1. **Tool providers** -- anything that adds tools to the agent's catalog (MCP servers, skill runtimes, builtin tools). Interface: tool definition + execute function. + +2. **Channel adapters** -- anything that bridges external messaging to agent sessions. Interface: inbound message stream + outbound send. + +3. **Memory backends** -- anything that extends the memory substrate (semantic search, knowledge graph, vector DB). Interface: store + recall. + +4. **Scheduling triggers** -- anything that dispatches sessions on events or time. Interface: event pattern + session dispatch. + +5. **Workflow orchestrators** -- anything that composes multiple sessions into pipelines. Interface: step definition + execution engine. + +6. **Security layers** -- anything that adds safety checks to the execution pipeline. Interface: pre-execution hook + post-execution hook. + +### Workflow Engine as Extension Pattern + +The workflow engine is the best example of a feature that should be an extension, not core. It composes the core session dispatch primitive into multi-step pipelines with fan-out, conditionals, and loops. The key design lesson: + +- **Core provides**: session dispatch, result collection, event emission on completion +- **Extension provides**: step ordering, variable interpolation, parallel dispatch, error handling +- **Extension consumes**: only the core interfaces (session dispatch + event bus), never kernel internals + +This pattern generalizes: any complex orchestration (workflow engine, Hand lifecycle, channel bridge) should compose on top of core primitives through defined interfaces. + +### OFP Peer Network: Cautionary Tale + +OpenFang invented a custom TCP wire protocol for agent-to-agent communication. While technically sound (HMAC auth, nonce replay protection), it creates a compatibility island -- only OpenFang instances can speak OFP. + +**AGH recommendation**: Do NOT invent a custom protocol. Use the A2A protocol specification (Google/Linux Foundation) or plain HTTP. Agent networking should be an extension that speaks standard protocols, ensuring interoperability with non-AGH systems. + +### Hands/Tools/Skills Layering + +OpenFang's four-layer tool taxonomy is instructive: + +| Layer | Scope | Sandbox | Example | +| --------- | ------------------- | --------------- | --------------------------- | +| Builtins | Core functionality | In-process | file_read, memory_store | +| MCP tools | External interop | Subprocess/HTTP | GitHub, Slack, Notion | +| Skills | Domain expertise | WASM/subprocess | Data analysis, web scraping | +| Hands | Autonomous packages | Agent-level | Researcher, Lead, Collector | + +**AGH equivalent layering**: + +| Layer | AGH Scope | Mechanism | Example | +| --------------- | ------------------ | ---------------- | ------------------------------ | +| ACP agent tools | Core functionality | ACP protocol | Claude Code, Codex, Gemini CLI | +| MCP tools | External interop | MCP protocol | GitHub, Linear, Notion servers | +| Skills | Domain expertise | Bundled SKILL.md | AGH's existing skills package | +| Agent packages | Autonomous units | Package manifest | Researcher, analyst, monitor | + +The key difference: AGH pushes tool execution to ACP agents instead of executing in-process. This is architecturally superior for isolation but means AGH's extension system focuses on configuration and composition rather than runtime execution. + +### What AGH Can Skip + +Several OpenFang features are consequences of its monolithic design and are unnecessary for AGH: + +1. **In-process LLM drivers** -- AGH delegates to ACP agents. No need to implement Anthropic/OpenAI/Gemini HTTP clients. +2. **WASM sandbox** -- AGH doesn't execute untrusted code in-process. ACP agents run as sandboxed subprocesses. +3. **40 channel adapters compiled into the binary** -- These should be installable extensions, not compiled in. +4. **Custom OFP protocol** -- Use standard A2A/HTTP. +5. **Built-in web dashboard** -- AGH already has a separate React SPA. Better separation of concerns. +6. **130+ model catalog** -- AGH's ACP agents handle their own model selection. A config-loadable pricing catalog is sufficient for metering. + +--- + +## Summary: Priority Features for AGH + +### Immediate (Core) + +1. **Budget enforcement** -- per-session and global cost limits with pre-flight checks +2. **Session stop reason taxonomy** -- precise terminal state classification +3. **Cron scheduler** -- POSIX cron for scheduled session dispatch +4. **Event-driven triggers** -- pattern-matching on session events with fire limits +5. **Loop/recursion guard** -- cycle detection for agent delegation chains +6. **Session compaction** -- threshold-based context trimming for long sessions +7. **Session repair on load** -- validate session state integrity after crash +8. **Dual execution mode** -- ephemeral single-shot alongside persistent daemon +9. **Config hot reload** -- reload extension configs without daemon restart +10. **Subprocess environment isolation** -- env_clear + allowlist for ACP agent spawning + +### Near-term (Extension interfaces) + +1. **Tool provider interface** -- for MCP servers and custom tool sources +2. **Channel adapter interface** -- for messaging platform bridges +3. **Agent package format** -- manifest + instructions + schedule + guardrails +4. **Workflow engine** -- multi-step session orchestration with fan-out +5. **Memory extension interface** -- for knowledge graph, semantic search backends + +### Later (Extensions) + +1. **Knowledge graph engine** -- entity-relation-fact triples with confidence scoring +2. **Approval manager** -- human-in-the-loop gates for high-risk operations +3. **Audit log (Merkle chain)** -- tamper-evident action logging +4. **Agent-to-agent networking** -- standard A2A protocol, not custom wire protocol +5. **Credential vault** -- encrypted secret storage with OS keyring integration diff --git a/docs/ideas/extensability/analysis/analysis_pi_mono.md b/docs/ideas/extensability/analysis/analysis_pi_mono.md new file mode 100644 index 000000000..a27c6ddde --- /dev/null +++ b/docs/ideas/extensability/analysis/analysis_pi_mono.md @@ -0,0 +1,239 @@ +# Pi-Mono Analysis for AGH Extensibility Design + +## Overview + +Pi-Mono is a TypeScript monorepo by Mario Zechner that implements an "aggressively extensible" AI coding agent framework. It consists of seven packages organized in three tiers: a foundation LLM API (`pi-ai`), infrastructure packages (`pi-agent-core`, `pi-tui`), and application-level consumers (`pi-coding-agent`, `pi-mom`, `pi-web-ui`, `pi-pods`). The project's guiding thesis is that coding agents should ship a minimal core with comprehensive extension points, letting users compose exactly the features they need rather than accepting a monolithic feature set. + +Pi-Mono's philosophy directly opposes "batteries-included" tools: it ships only 4 default tools (read, write, edit, bash), keeps its system prompt under 1,000 tokens, and deliberately omits MCP support, sub-agents, permission systems, plan mode, built-in todos, and background bash -- all of which can be rebuilt via its extension system. This minimalism is driven by a concrete technical constraint: context windows are finite, and every token consumed by framework overhead is unavailable for the user's actual task. + +**Relevance to AGH**: Pi-Mono validates the "robust minimal core + extensible plugin system" philosophy that AGH already pursues. It provides a detailed case study of where that boundary should be drawn and what extension surface area looks like in practice for an agent operating system. + +--- + +## Key Features Analysis + +### Feature Classification Table + +| # | Feature | Pi-Mono Implementation | AGH Classification | Rationale | +| --- | -------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| 1 | **Unified LLM Streaming API** | `pi-ai`: single `stream()` call over 20+ providers, 10 API protocols, lazy-loaded provider modules, canonical message types | **EXTENSION** | AGH's ACP protocol already abstracts agent communication via JSON-RPC over stdio. AGH spawns complete agent binaries (Claude Code, Codex, Gemini CLI) that handle their own LLM provider connections. A unified LLM API would be useful for future "native" agents AGH spawns directly, but should remain an optional provider package rather than core. | +| 2 | **Extension System (TypeScript modules with lifecycle hooks)** | `ExtensionAPI` with 50+ hooks: tools, commands, shortcuts, events, UI components, providers, state persistence. Extensions loaded via jiti transpiler. | **CORE (design pattern)** | The extension system design is the single most important pattern to adopt. AGH needs a Go-native equivalent: a plugin/extension interface with lifecycle hooks at every boundary (session start/end, tool execution, message streaming, compaction). This should be the core abstraction, not an afterthought. | +| 3 | **Skills (on-demand capability packages)** | Markdown files with YAML frontmatter following AgentSkills.io spec. Progressive disclosure: only name+description in system prompt, full content loaded on demand. | **CORE** | AGH already has `internal/skills` with bundled skill definitions. Pi validates this approach and adds the key insight of progressive disclosure (load-on-demand to save context tokens). AGH's skills loader should follow this pattern. | +| 4 | **Prompt Templates** | Markdown files with bash-style variable substitution (`$1`, `$@`, `${@:N:L}`). Expanded via `/name args`. | **EXTENSION** | Useful but not essential to the daemon core. Should be an extension that prompt template directories can register with the skills system. AGH's workspace/config system can discover these. | +| 5 | **Session Tree (JSONL with branching)** | Append-only JSONL where each entry has `id`/`parentId`, forming a tree. Branching via `leafId` pointer. No data ever deleted. | **CORE (adapt)** | AGH already uses SQLite for session events (`store/sessiondb`). The tree-branching concept (navigate to any point, branch without losing history) is valuable for AGH's session model. The `leafId`-based branching pattern could be adapted to SQLite with a `parent_event_id` column. | +| 6 | **Context Compaction** | Structured summarization when context exceeds threshold. Walks backwards to find cut point preserving recent tokens. Iterative compaction builds on previous summaries. | **CORE** | Critical for long-running sessions. AGH's `memory/consolidation` package handles dream consolidation, but per-session compaction for context window management is a separate concern that belongs in core. The structured summary format (Goal, Progress, Key Decisions, Next Steps) is a good template. | +| 7 | **Auto-generated Model Catalog** | Build-time script scrapes provider APIs, writes `models.generated.ts` with type-safe model definitions including pricing, context windows, capabilities. | **EXTENSION** | AGH delegates model selection to agent binaries. If AGH ever needs to route to specific models, this could be a useful extension. Not core. | +| 8 | **Cross-Provider Message Handoffs** | `transform-messages.ts` converts thinking blocks, normalizes tool call IDs, repairs orphaned tool calls, sanitizes Unicode when switching models mid-conversation. | **EXTENSION** | Relevant only if AGH manages LLM connections directly. Currently agents handle their own provider connections. Could become relevant for Phase 3 agent network protocol. | +| 9 | **TUI Framework (differential rendering)** | Standalone package with component tree, differential rendering (only redraws changed regions), synchronized output (CSI 2026), overlay system, Kitty keyboard protocol. | **N/A (not applicable)** | AGH uses a React 19 SPA for its web UI and UDS for CLI. A TUI framework is not needed. However, the differential rendering concept is instructive for SSE-based UI updates. | +| 10 | **Theme System (hot-reloadable)** | 51 color tokens in JSON, hot-reload via `fs.watch` with debounce, terminal capability detection, syntax highlighting integration. | **EXTENSION** | Visual customization belongs in the web UI layer, not the daemon core. AGH's web UI already uses Tailwind/shadcn. | +| 11 | **Package Manager (npm/git/local)** | `pi install`, `pi remove`, `pi update`. Packages bundle extensions, skills, prompts, themes. Supports npm, git, and local sources. Auto-install on startup from `.pi/settings.json`. | **CORE (adapted)** | AGH needs a package/plugin distribution mechanism. The concept of bundling skills, extensions, and config into installable packages is essential for the extension ecosystem. Should be adapted to Go (e.g., git-based plugin repos with `agh install`). | +| 12 | **Pi Mom (Self-managing Slack Bot)** | Headless agent deployment in Slack. Per-channel isolation with separate workspaces, MEMORY.md files, skills directories. Docker sandbox. Events system (immediate, one-shot, periodic/cron). | **EXTENSION** | Demonstrates a powerful application pattern: the same agent core deployed headlessly into a chat platform. AGH should enable this via its API layer, not by building it into core. The events system (cron-based agent triggers) is a good extension candidate. | +| 13 | **Steering & Follow-up Message Queues** | Two-queue system: steering messages redirect agent mid-turn, follow-up messages queue for after completion. Drain modes: "one-at-a-time" vs "all". | **CORE** | Essential for interactive agent sessions. AGH's session manager should support injecting messages into running sessions with priority semantics (interrupt vs. queue). This maps directly to AGH's HTTP/SSE API. | +| 14 | **Tool Execution Pipeline (parallel/sequential with hooks)** | Preflight sequential, execute parallel, finalize in source order. `beforeToolCall` can block, `afterToolCall` can modify results. File mutation queue for concurrent writes. | **CORE (design pattern)** | AGH delegates tool execution to agent subprocesses, but the hook pattern (before/after with block/modify capability) is relevant for the observe layer and for extensions that want to intercept tool calls visible via ACP events. | +| 15 | **Custom Message Types (declaration merging)** | TypeScript `CustomAgentMessages` interface widened via declaration merging. Custom messages in transcript but filtered from LLM context by `convertToLlm`. | **CORE (adapted)** | AGH's event store should support custom event types from extensions. The pattern of storing extension-specific data in the event stream (but excluding it from agent context) is directly applicable to `store/sessiondb`. | +| 16 | **OAuth Provider System** | 5 built-in OAuth providers (Anthropic, OpenAI Codex, GitHub Copilot, Gemini CLI, Antigravity). `AuthStorage` with file-based locking. Token auto-refresh. | **EXTENSION** | Auth management for LLM providers. AGH delegates this to agent binaries, so not needed in core. Could be an extension for future native agent support. | +| 17 | **Web UI Components (mini-lit)** | Web components for chat interfaces: ChatPanel, AgentInterface, MessageList, artifacts system, sandboxed iframe execution, IndexedDB storage, custom tool renderers. | **N/A (parallel)** | AGH has its own React 19 SPA. Not directly adoptable, but the artifact system (LLM creates/modifies files rendered interactively) and custom tool renderer registry are patterns worth replicating in AGH's web UI. | +| 18 | **GPU Pod Management** | CLI for deploying vLLM on remote GPU pods. SSH-based provisioning, model lifecycle, health monitoring, OpenAI-compatible endpoints. | **EXTENSION** | Infrastructure automation for self-hosted LLMs. Clearly an extension/plugin, not core to an agent OS. | +| 19 | **Context File Discovery (AGENTS.md)** | Loads context files from global, parent directories, and current directory. Both `AGENTS.md` and `CLAUDE.md` recognized. Injected into system prompt. | **CORE** | AGH's workspace resolver already handles this pattern. Validates the approach. The progressive discovery (walk up from cwd to root) is the right pattern. | +| 20 | **RPC Mode (JSONL over stdin/stdout)** | Headless mode using LF-delimited JSONL for IDE integration. Extension UI forwarded as typed requests. | **CORE (validates)** | AGH already has UDS for CLI IPC. Pi's RPC mode validates that a structured protocol over stdio is essential for embedding agents in IDEs and other host processes. | +| 21 | **Dual-Scope Memory (Global + Channel/Workspace)** | MEMORY.md files at global and per-channel levels. Read before every response, injected into system prompt. Editable by both human and agent. | **CORE** | AGH already has `internal/memory` with global + workspace scope. Pi's implementation via plain Markdown files validates the approach and emphasizes that memory should be human-readable and editable. | +| 22 | **Events/Scheduling System** | Three event types: immediate, one-shot (timestamp), periodic (cron). File-based triggers (`events/` directory). Queue cap per channel. Silent completion for no-op periodic checks. | **EXTENSION** | Scheduled agent triggers are a powerful pattern but belong as an extension. AGH's daemon could expose a scheduling API that extensions register with. | +| 23 | **Cost Tracking** | Per-message `Usage` object with token counts and dollar costs. `calculateCost()` from model pricing metadata. Real-time display in TUI/web UI. | **CORE** | Observable cost tracking is essential for an agent OS. AGH's `observe` package should track token usage and cost per session, derived from ACP events that report usage. | + +--- + +## Architectural Patterns Worth Adopting + +### 1. Layered Package Architecture with Strict Dependency Flow + +Pi-Mono's three-tier architecture is its most important structural decision: + +``` +Foundation: pi-ai (zero internal deps) +Infrastructure: pi-agent-core (depends on pi-ai), pi-tui (standalone) +Application: pi-coding-agent, pi-mom, pi-web-ui, pi-pods +``` + +**Key rules**: Dependencies flow strictly downward. No package imports `pi-coding-agent` (the top-level app). The foundation layer has zero internal dependencies. Infrastructure packages depend only on foundation. Application packages pull together lower layers. + +**AGH parallel**: AGH already follows this with `daemon/` as sole composition root and downward-only dependency flow. This validates AGH's approach. The additional insight is that AGH's `internal/api/` packages should never be imported by core domain packages (`session/`, `memory/`, `skills/`), which AGH already enforces. + +### 2. Progressive Disclosure for Context Optimization + +Pi's most impactful design insight is that context windows are finite and expensive. Every feature decision is filtered through "what does this cost in tokens?" + +- Skills inject only name+description into the system prompt; full content is loaded on-demand when the agent decides it's relevant. +- No elaborate system prompts -- under 1,000 tokens. +- No MCP tool definitions burning context tokens whether used or not. +- Compaction keeps recent tokens intact while summarizing older ones. + +**AGH adoption**: AGH should adopt progressive disclosure as a first-class principle in its skills system. When AGH sends context to agents, skills should be listed as brief descriptors, with the full skill content available via a "read skill" mechanism. This directly reduces the system prompt overhead per agent session. + +### 3. Single-File Session Trees + +Pi stores entire conversation histories, including all branches, in a single append-only JSONL file. Branching is achieved by appending entries with `parentId` pointing to earlier entries rather than the current leaf. Nothing is ever deleted. + +**Benefits**: No multi-file branch management, complete audit trail, standard format parseable by any tool, no data loss from aborts or crashes (append-only). + +**AGH adaptation**: AGH uses SQLite per-session, which is more powerful but less inspectable. Consider adding a `parent_event_id` column to session events to enable tree-structured branching. The append-only guarantee maps naturally to SQLite's INSERT-only pattern. The key insight is that branching should be a core session primitive, not an afterthought. + +### 4. Extension Points as First-Class API Surface + +Pi's extension API exposes 50+ hooks organized into clear categories: + +- **Session lifecycle**: start, before_switch, before_fork, before_compact, compact, shutdown +- **Agent lifecycle**: before_agent_start, agent_start/end, turn_start/end +- **Message lifecycle**: message_start, message_update, message_end +- **Tool lifecycle**: tool_call (can block), tool_result (can modify), tool_execution_start/update/end +- **Input**: transform user input before agent processing +- **Context**: modify messages before LLM call +- **Resources**: contribute additional skill/prompt/theme paths + +The critical patterns: + +- **`tool_call` can block execution** -- extensions can implement permission gates by returning `{ block: true, reason: "..." }` +- **`tool_result` chains like middleware** -- each extension handler can modify results, patches merge sequentially +- **`before_agent_start` can inject messages** -- extensions add context without modifying core logic + +**AGH adoption**: This is the blueprint for AGH's extension system. In Go, these hooks should be typed interfaces that extension packages implement. The `Notifier` pattern AGH already uses is a good foundation; it needs to be extended with blocking/modification semantics for tool call interception. + +### 5. Conflict Resolution Rules + +Pi has deterministic rules for when extensions collide: + +- **Shortcuts**: Reserved keybindings cannot be overridden. Non-reserved conflicts generate warnings. +- **Commands**: Built-in always wins. Extension-vs-extension: first-registered wins, duplicates get numeric suffixes. +- **Tools**: Built-in conflicts produce warnings. First registration wins for extension-vs-extension. +- **Providers**: Can override built-in by ID. Unregister restores defaults. + +**AGH adoption**: AGH needs explicit conflict resolution policies defined before the extension system is built. The "built-in always wins" rule is sensible. The "first-registered wins with warnings" approach avoids silent breakage. + +### 6. Lazy Loading and Registration + +Pi never eagerly imports heavy dependencies. Provider modules are loaded via dynamic `import()` only when first used, with `||=` caching to ensure single-load semantics. Errors during lazy load are encoded as events in the stream, never thrown as unhandled exceptions. + +**AGH adoption**: In Go, this translates to lazy initialization of extension packages. Extensions should register intent at startup (name, capabilities, hooks) but defer heavy initialization (database connections, subprocess spawning) until first use. Errors should be captured and reported through the observe layer, not panic. + +--- + +## Extension System Insights + +### Architecture: Hook-Based with Full API Surface + +Pi's extension system is its defining feature. The core insight is that the extension API should be **exactly as powerful as the internal API**. Extensions import the same packages the agent uses. There is no restricted sandbox, no capability manifest, no permission model. This is a deliberate design choice: the target audience is developers who already run arbitrary code. + +**Extension loading flow**: + +1. Discovery: scan `~/.pi/agent/extensions/`, `.pi/extensions/`, and package manifests +2. Transpile: use jiti (just-in-time TypeScript transpiler) with virtual modules for bundled packages +3. Execute: call each extension's default export function with `ExtensionAPI` +4. Bind: wire real action methods into the runtime after initialization +5. Dispatch: route events through `ExtensionRunner` which sits between `AgentSession` and extensions + +**For AGH in Go**: The equivalent would be: + +- Discovery: scan `~/.agh/extensions/`, `.agh/extensions/`, and registered plugin directories +- Load: Go plugins (`plugin.Open()`) or, more practically, subprocess-based plugins communicating via gRPC/JSON-RPC +- Register: each plugin exports a registration function that receives an `ExtensionAPI` interface +- Bind: wire hooks into daemon lifecycle after all plugins register +- Dispatch: route events through an extension runner that sits between `session.Manager` and extensions + +### The Four Extension Surfaces + +Pi provides four distinct extension mechanisms, each targeting a different user sophistication level: + +| Surface | Complexity | Capability | Token Cost | +| ----------------------------- | ---------- | ------------------------ | -------------------- | +| **Context files** (AGENTS.md) | Zero code | Persistent instructions | Always loaded | +| **Skills** (SKILL.md) | Zero code | On-demand procedures | Loaded when relevant | +| **Prompt Templates** (\*.md) | Zero code | Reusable shortcuts | Loaded on invocation | +| **Extensions** (\*.ts) | TypeScript | Full runtime integration | No token cost | + +**AGH mapping**: + +- Context files: AGH workspace already supports this via `CLAUDE.md` / config +- Skills: AGH's `internal/skills` package -- validate with progressive disclosure +- Prompt Templates: New extension type, low priority +- Extensions: Primary focus for AGH's extension system design + +### Multi-Package Extensibility (Pi Packages) + +Pi's package system bundles all four extension surfaces into distributable units: + +```json +{ + "pi": { + "extensions": ["./extensions"], + "skills": ["./skills"], + "prompts": ["./prompts"], + "themes": ["./themes"] + } +} +``` + +Packages support three source types (npm, git, local), two scopes (global and project-local), version pinning, selective resource loading via glob patterns, and offline mode. + +**AGH design implications**: + +- AGH packages should bundle: extensions (Go plugins or subprocess handlers), skills (Markdown), config templates, and web UI components +- Source types: git repositories (primary), local directories (development) +- Scopes: global (`~/.agh/packages/`) and workspace-local (`.agh/packages/`) +- The `agh install ` command installs a package by cloning the repo and registering its contents +- A manifest file (`agh-package.toml` or similar) declares what the package provides +- Auto-install from workspace config ensures team consistency + +### Security Model: Full Trust with Escape Hatches + +Pi runs extensions with full trust -- no sandbox, no capability restrictions. The rationale: once an agent can read, write, and execute code, preventing exfiltration while maintaining utility is impossible. Security theater (permission popups) provides false assurance. + +Real security comes from: + +- **Containers**: Run in Docker/VM for genuine isolation +- **Scope limitation**: Project-local extensions only affect that project +- **Audit**: Package provenance via npm/git audit tools +- **Extension permission gates**: Extensions themselves can add confirmation flows + +**AGH consideration**: AGH should follow the same model for extension trust. Since AGH runs as a daemon, the security boundary is the daemon's process permissions. Extensions run in the daemon's process (or as supervised subprocesses) and inherit its permissions. The real security boundary is the container/VM that runs the daemon. + +### Event System Design + +Pi's event system has two critical properties: + +1. **Listeners are awaited sequentially** -- a slow listener blocks subsequent listeners and the loop itself. This is by design: it makes `message_end` processing a barrier before tool preflight, ensuring state consistency. + +2. **State is updated before listeners fire** -- when an event arrives, internal state (messages, pending tool calls, streaming state) is updated first, then listeners are invoked. Listeners always see consistent state. + +**AGH adoption**: The `observe` package's notifier pattern should follow both rules. Events should be dispatched synchronously through registered handlers in registration order, with state mutations committed before notification. For blocking operations (like permission gates), the handler should be able to return a result that the caller inspects. + +--- + +## Summary of Recommendations for AGH + +### Adopt as CORE + +1. **Extension hook system** with lifecycle events at session, agent, tool, and message boundaries +2. **Progressive disclosure** for skills (name+description in context, full content on demand) +3. **Session branching** via parent-event relationships in SQLite +4. **Context compaction** with structured summarization and iterative updates +5. **Steering/follow-up message queues** for injecting messages into running sessions +6. **Package distribution** mechanism for bundling extensions, skills, and config +7. **Cost tracking** integrated into the observe layer +8. **Conflict resolution** policies defined upfront for extension collisions + +### Adopt as EXTENSION + +1. Unified LLM API (for future native agent support) +2. Prompt templates (Markdown with variable substitution) +3. Theme/visual customization +4. Scheduled event triggers (cron-based agent wake-ups) +5. OAuth provider management +6. Chat platform integrations (Slack, Discord, etc.) +7. GPU pod management / self-hosted model deployment +8. Cross-provider message transformation + +### Do Not Adopt + +1. TUI framework (AGH uses web UI) +2. TypeScript-specific patterns (declaration merging, jiti transpiler) +3. "No MCP" stance (AGH should support MCP as an extension surface since it spawns external agents that may use MCP) +4. Single-file JSONL storage (AGH's SQLite approach is better for the daemon model) diff --git a/.compozy/tasks/extensability/analysis/analysis_research_extensibility.md b/docs/ideas/extensability/analysis/analysis_research_extensibility.md similarity index 80% rename from .compozy/tasks/extensability/analysis/analysis_research_extensibility.md rename to docs/ideas/extensability/analysis/analysis_research_extensibility.md index 416204d6f..be748fce0 100644 --- a/.compozy/tasks/extensability/analysis/analysis_research_extensibility.md +++ b/docs/ideas/extensability/analysis/analysis_research_extensibility.md @@ -22,19 +22,19 @@ Key finding: The industry has converged on **two winning patterns** for 2025-202 ## Approach Comparison Matrix -| Dimension | Go Native Plugins | HashiCorp go-plugin (gRPC) | WebAssembly (Extism/wazero) | JSON-RPC over stdio | -|---|---|---|---|---| -| **Multi-language support** | Go only | Any language via gRPC (practical: Go, Python, Ruby) | 16+ languages compile to Wasm (Rust, Go, C, TS via AssemblyScript, JS via Javy) | Any language with JSON + stdin/stdout | -| **Performance** | Fastest (in-process, shared memory) | ~50-100us per RPC call over UDS | ~1-10us per call (in-process Wasm sandbox) | ~100-500us per call (JSON serialize + IPC) | -| **Developer experience** | Poor for non-Go devs; strict build env matching | Good Go SDK; gRPC boilerplate for other langs; protobuf required | Good (Extism PDKs for 7+ langs); single `.wasm` binary output | Excellent (any lang, simple JSON protocol, stdin/stdout) | -| **Security isolation** | None (shared process memory) | Strong (OS process isolation + optional TLS + checksums) | Very strong (Wasm linear memory sandbox, capability-based, deny-by-default) | Strong (OS process isolation) | -| **Crash safety** | Plugin crash kills host | Plugin crash isolated to subprocess | Wasm trap handled by runtime; host unaffected | Plugin crash isolated to subprocess | -| **Maturity** | Experimental (no Windows, no unloading, CGO required) | Battle-tested since 2012 (Terraform, Vault, Consul, Packer) | Maturing rapidly (Extism v1 GA 2024; WASI 0.2 stable; wazero 1.x stable) | Proven (LSP since 2016; MCP since 2024; 10+ years of JSON-RPC) | -| **Plugin distribution** | Platform-specific binaries; exact Go version match | Platform-specific binaries | Single portable `.wasm` file (runs anywhere) | Platform-specific binaries or scripts | -| **Binary size impact** | Minimal (shared libs) | Minimal (plugins are separate binaries) | ~5-10MB for wazero runtime in host binary | Minimal | -| **Bidirectional calls** | N/A (same process) | Yes (gRPC streams) | Yes (host functions) | Yes (JSON-RPC notifications) | -| **Hot reload** | Impossible (no plugin unloading) | Restart subprocess | Re-instantiate Wasm module (milliseconds) | Restart subprocess | -| **AGH alignment** | Low (contradicts single-binary goal) | Medium (proven but heavy for simple extensions) | High (in-process, portable, sandboxed) | Highest (already using JSON-RPC stdio for ACP) | +| Dimension | Go Native Plugins | HashiCorp go-plugin (gRPC) | WebAssembly (Extism/wazero) | JSON-RPC over stdio | +| -------------------------- | ----------------------------------------------------- | ---------------------------------------------------------------- | ------------------------------------------------------------------------------- | -------------------------------------------------------------- | +| **Multi-language support** | Go only | Any language via gRPC (practical: Go, Python, Ruby) | 16+ languages compile to Wasm (Rust, Go, C, TS via AssemblyScript, JS via Javy) | Any language with JSON + stdin/stdout | +| **Performance** | Fastest (in-process, shared memory) | ~50-100us per RPC call over UDS | ~1-10us per call (in-process Wasm sandbox) | ~100-500us per call (JSON serialize + IPC) | +| **Developer experience** | Poor for non-Go devs; strict build env matching | Good Go SDK; gRPC boilerplate for other langs; protobuf required | Good (Extism PDKs for 7+ langs); single `.wasm` binary output | Excellent (any lang, simple JSON protocol, stdin/stdout) | +| **Security isolation** | None (shared process memory) | Strong (OS process isolation + optional TLS + checksums) | Very strong (Wasm linear memory sandbox, capability-based, deny-by-default) | Strong (OS process isolation) | +| **Crash safety** | Plugin crash kills host | Plugin crash isolated to subprocess | Wasm trap handled by runtime; host unaffected | Plugin crash isolated to subprocess | +| **Maturity** | Experimental (no Windows, no unloading, CGO required) | Battle-tested since 2012 (Terraform, Vault, Consul, Packer) | Maturing rapidly (Extism v1 GA 2024; WASI 0.2 stable; wazero 1.x stable) | Proven (LSP since 2016; MCP since 2024; 10+ years of JSON-RPC) | +| **Plugin distribution** | Platform-specific binaries; exact Go version match | Platform-specific binaries | Single portable `.wasm` file (runs anywhere) | Platform-specific binaries or scripts | +| **Binary size impact** | Minimal (shared libs) | Minimal (plugins are separate binaries) | ~5-10MB for wazero runtime in host binary | Minimal | +| **Bidirectional calls** | N/A (same process) | Yes (gRPC streams) | Yes (host functions) | Yes (JSON-RPC notifications) | +| **Hot reload** | Impossible (no plugin unloading) | Restart subprocess | Re-instantiate Wasm module (milliseconds) | Restart subprocess | +| **AGH alignment** | Low (contradicts single-binary goal) | Medium (proven but heavy for simple extensions) | High (in-process, portable, sandboxed) | Highest (already using JSON-RPC stdio for ACP) | ### Verdict @@ -79,6 +79,7 @@ Key finding: The industry has converged on **two winning patterns** for 2025-202 **When to use:** Core functionality, bundled agent support, default store implementations. Only for code authored by the AGH team or trusted contributors whose code is reviewed and merged. **Pattern:** Standard Go interfaces with compile-time verification: + ```go // Defined in consuming package (Go-style) type AgentDriver interface { @@ -127,6 +128,7 @@ interface session-hooks { ``` **Security model:** + - Wasm linear memory isolation (plugin cannot access host memory) - No filesystem access by default (grant via explicit capability) - No network access by default (grant via host functions) @@ -144,6 +146,7 @@ interface session-hooks { **Protocol:** JSON-RPC 2.0 over stdio (stdin/stdout), with an MCP-inspired capability negotiation handshake. **Lifecycle:** + 1. AGH daemon launches extension binary as subprocess 2. Extension writes a handshake line to stdout: `{"jsonrpc":"2.0","method":"initialize","params":{...}}` 3. AGH responds with capabilities it supports @@ -155,20 +158,20 @@ interface session-hooks { ### Extension Point Catalog -| Extension Point | Layer | Direction | Description | -|---|---|---|---| -| `agent.driver` | Subprocess (L3) | Bidirectional | Custom agent driver (like ACP but for non-ACP agents) | -| `memory.backend` | Subprocess (L3) | Request/Response | Custom memory storage (vector DB, graph DB) | -| `api.route` | Subprocess (L3) | Request/Response | Add HTTP/SSE routes to the daemon API | -| `session.hook.pre_create` | Wasm (L2) | Sync call | Validate/modify session before creation | -| `session.hook.post_create` | Wasm (L2) | Sync call | React to session creation | -| `session.hook.pre_prompt` | Wasm (L2) | Sync call | Filter/transform prompt before sending to agent | -| `session.hook.post_event` | Wasm (L2) | Sync call | Transform/filter events before persistence | -| `message.validator` | Wasm (L2) | Sync call | Content safety, policy enforcement | -| `message.transformer` | Wasm (L2) | Sync call | Content rewriting, enrichment | -| `skill.preprocessor` | Wasm (L2) | Sync call | Transform skill content before injection | -| `observe.exporter` | Subprocess (L3) | Push | Export metrics/events to external systems | -| `config.provider` | Go-native (L1) | Sync call | Custom config sources (compiled in) | +| Extension Point | Layer | Direction | Description | +| -------------------------- | --------------- | ---------------- | ----------------------------------------------------- | +| `agent.driver` | Subprocess (L3) | Bidirectional | Custom agent driver (like ACP but for non-ACP agents) | +| `memory.backend` | Subprocess (L3) | Request/Response | Custom memory storage (vector DB, graph DB) | +| `api.route` | Subprocess (L3) | Request/Response | Add HTTP/SSE routes to the daemon API | +| `session.hook.pre_create` | Wasm (L2) | Sync call | Validate/modify session before creation | +| `session.hook.post_create` | Wasm (L2) | Sync call | React to session creation | +| `session.hook.pre_prompt` | Wasm (L2) | Sync call | Filter/transform prompt before sending to agent | +| `session.hook.post_event` | Wasm (L2) | Sync call | Transform/filter events before persistence | +| `message.validator` | Wasm (L2) | Sync call | Content safety, policy enforcement | +| `message.transformer` | Wasm (L2) | Sync call | Content rewriting, enrichment | +| `skill.preprocessor` | Wasm (L2) | Sync call | Transform skill content before injection | +| `observe.exporter` | Subprocess (L3) | Push | Export metrics/events to external systems | +| `config.provider` | Go-native (L1) | Sync call | Custom config sources (compiled in) | --- @@ -436,24 +439,24 @@ For lightweight hooks and transformers. TypeScript is compiled to Wasm via Assem ```typescript // @agh/extension-sdk (npm package) -import { Extension, ExtensionContext } from '@agh/extension-sdk'; +import { Extension, ExtensionContext } from "@agh/extension-sdk"; // Define a memory backend extension const ext = new Extension({ - name: 'pgvector-memory', - version: '0.2.1', - extensionPoints: ['memory.backend'], + name: "pgvector-memory", + version: "0.2.1", + extensionPoints: ["memory.backend"], }); // Register handlers for the memory.backend contract -ext.handle('memory/store', async (ctx: ExtensionContext, params: StoreParams) => { +ext.handle("memory/store", async (ctx: ExtensionContext, params: StoreParams) => { // Store to pgvector - await pgPool.query('INSERT INTO memories ...', [params.key, params.embedding]); + await pgPool.query("INSERT INTO memories ...", [params.key, params.embedding]); return { success: true }; }); -ext.handle('memory/recall', async (ctx: ExtensionContext, params: RecallParams) => { - const rows = await pgPool.query('SELECT * FROM memories WHERE ...', [params.query]); +ext.handle("memory/recall", async (ctx: ExtensionContext, params: RecallParams) => { + const rows = await pgPool.query("SELECT * FROM memories WHERE ...", [params.query]); return { entries: rows.map(toMemoryEntry) }; }); @@ -462,6 +465,7 @@ ext.start(); ``` **SDK internals:** + ```typescript // @agh/extension-sdk/src/extension.ts @@ -479,7 +483,7 @@ export class Extension { async start(): Promise { // 1. Perform JSON-RPC initialize handshake - await this.transport.sendRequest('initialize', { + await this.transport.sendRequest("initialize", { name: this.manifest.name, version: this.manifest.version, extensionPoints: this.manifest.extensionPoints, @@ -490,7 +494,7 @@ export class Extension { if (message.method && this.handlers.has(message.method)) { const result = await this.handlers.get(message.method)!( this.createContext(message), - message.params, + message.params ); await this.transport.sendResponse(message.id, result); } @@ -500,6 +504,7 @@ export class Extension { ``` **SDK structure:** + ``` @agh/extension-sdk/ src/ @@ -529,17 +534,17 @@ For lightweight, sandboxed extensions: // Written in AssemblyScript (TypeScript-like syntax that compiles to Wasm) // @agh/wasm-pdk (AssemblyScript PDK) -import { Host, JSON } from '@agh/wasm-pdk'; +import { Host, JSON } from "@agh/wasm-pdk"; export function on_session_creating(): i32 { const input = Host.inputString(); const ctx = JSON.parse(input); // Validate session - if (ctx.agentName === 'blocked-agent') { + if (ctx.agentName === "blocked-agent") { const result: HookResult = { allow: false, - message: 'This agent is not permitted', + message: "This agent is not permitted", }; Host.outputString(JSON.stringify(result)); return 0; @@ -572,6 +577,7 @@ extension-contracts/ ``` **Why both Protobuf and WIT?** + - Protobuf: For JSON-RPC subprocess extensions. Used to generate typed request/response structures. The actual wire format is JSON (not protobuf binary) -- protobuf serves as the schema definition language. - WIT: For Wasm extensions. WIT is the native interface description language for the WebAssembly Component Model. @@ -680,20 +686,21 @@ Phase 3 (GA): Mandatory signing for registry-published extensions; unsigned exte ### Threat Model -| Threat | Layer 1 (Go) | Layer 2 (Wasm) | Layer 3 (Subprocess) | -|---|---|---|---| -| **Malicious code execution** | N/A (compiled in) | Mitigated: Wasm sandbox, no syscalls | Mitigated: process isolation | -| **Memory corruption** | Risk: shared address space | Mitigated: linear memory isolation | Mitigated: separate process | -| **Filesystem access** | Full access | Denied by default; explicit capability grants | Full access (constrain via permissions) | -| **Network access** | Full access | Denied by default; explicit host function grants | Full access (constrain via permissions) | -| **Resource exhaustion (CPU)** | Risk: shared process | Mitigated: wazero fuel metering / timeouts | Mitigated: cgroups / process limits | -| **Resource exhaustion (memory)** | Risk: shared heap | Mitigated: wazero memory limits | Mitigated: process memory limits | -| **Supply chain attack** | Mitigated: code review | Mitigated: checksum verification + sandbox | Mitigated: checksum verification | -| **Host crash** | Risk: panic kills daemon | Safe: Wasm trap handled by runtime | Safe: subprocess crash isolated | +| Threat | Layer 1 (Go) | Layer 2 (Wasm) | Layer 3 (Subprocess) | +| -------------------------------- | -------------------------- | ------------------------------------------------ | --------------------------------------- | +| **Malicious code execution** | N/A (compiled in) | Mitigated: Wasm sandbox, no syscalls | Mitigated: process isolation | +| **Memory corruption** | Risk: shared address space | Mitigated: linear memory isolation | Mitigated: separate process | +| **Filesystem access** | Full access | Denied by default; explicit capability grants | Full access (constrain via permissions) | +| **Network access** | Full access | Denied by default; explicit host function grants | Full access (constrain via permissions) | +| **Resource exhaustion (CPU)** | Risk: shared process | Mitigated: wazero fuel metering / timeouts | Mitigated: cgroups / process limits | +| **Resource exhaustion (memory)** | Risk: shared heap | Mitigated: wazero memory limits | Mitigated: process memory limits | +| **Supply chain attack** | Mitigated: code review | Mitigated: checksum verification + sandbox | Mitigated: checksum verification | +| **Host crash** | Risk: panic kills daemon | Safe: Wasm trap handled by runtime | Safe: subprocess crash isolated | ### Security Controls by Extension Type **Wasm Extensions (Layer 2):** + - Linear memory sandbox (cannot read/write host memory) - No filesystem access unless explicitly granted via host functions - No network access unless explicitly granted via host functions @@ -702,6 +709,7 @@ Phase 3 (GA): Mandatory signing for registry-published extensions; unsigned exte - Deterministic execution (no threads, no random without capability) **Subprocess Extensions (Layer 3):** + - OS-level process isolation - Restricted environment variables (allowlist, matching AGH's existing `hookEnvAllowlist` pattern) - JSON-RPC protocol boundary (extension only sees what the host sends) @@ -710,6 +718,7 @@ Phase 3 (GA): Mandatory signing for registry-published extensions; unsigned exte - Optional: run in containers/namespaces for additional isolation (future) **All Extensions:** + - SHA-256 checksum verification at install time (matching AGH's existing `Provenance` system) - Manifest declares required permissions; daemon enforces at load time - Marketplace trust tiers: bundled > user-local > marketplace (matching AGH's existing `SkillSource` precedence) @@ -733,6 +742,7 @@ The OpenClaw "ClawHavoc" incident (early 2026) -- where 341 malicious skills wer **Architecture:** gRPC subprocess plugins via `hashicorp/go-plugin`. Each provider is a separate Go binary communicating over gRPC with protocol buffers. **What AGH should learn:** + - The **provider plugin framework** (`terraform-plugin-framework`) abstracts away gRPC boilerplate, giving provider authors a request/response API that feels like writing a Go HTTP handler. AGH should provide similarly high-level abstractions over JSON-RPC. - **Terraform Registry** uses a Git-based index with artifacts on GitHub Releases -- simple, scalable, and avoids operating a custom artifact server. AGH should follow the same model. - **Muxing** allows incremental migration: old SDK plugins and new framework plugins can coexist in the same provider. AGH should design for forward compatibility from day one. @@ -743,6 +753,7 @@ The OpenClaw "ClawHavoc" incident (early 2026) -- where 341 malicious skills wer **Architecture:** Extensions run in a separate Node.js "Extension Host" process, communicating with the main Electron renderer via IPC. Extensions declare capabilities in `package.json` (contribution points) and activate lazily based on events. **What AGH should learn:** + - **Lazy activation** is critical. VS Code loads extensions only when their activation events fire. AGH should similarly load Wasm extensions on first invocation rather than at daemon startup. - **Contribution points** (static declarations in `package.json`) provide a declarative way to extend UI and behavior without running code. AGH's `extension.toml` manifest serves the same purpose. - **No DOM access** -- extensions interact through a well-defined API, never directly with the UI layer. AGH should similarly ensure extensions interact only through defined contracts, never directly with internal data structures. @@ -753,6 +764,7 @@ The OpenClaw "ClawHavoc" incident (early 2026) -- where 341 malicious skills wer **Architecture:** Dual-layer plugin system. Frontend plugins (TypeScript/React) loaded via SystemJS. Backend plugins (Go) launched as subprocesses via `hashicorp/go-plugin` with gRPC communication. **What AGH should learn:** + - **Pipeline-based plugin loading** (Discovery -> Bootstrap -> Validation -> Initialization) is clean and testable. AGH should follow a similar staged loading pipeline. - **Health checks** -- Grafana exposes plugin health via HTTP API, allowing external monitoring. AGH should expose extension health status through its existing HTTP API. - **Instance management** -- Grafana passes all configuration in each request, allowing stateless plugin operation. AGH should consider a similar model for Wasm extensions (pass context per call rather than requiring plugins to maintain state). @@ -763,6 +775,7 @@ The OpenClaw "ClawHavoc" incident (early 2026) -- where 341 malicious skills wer **Architecture:** Multi-layer: in-process Lua (LuaJIT) for fast plugins, msgpack-RPC for remote/out-of-process plugins, provider system for delegating external capabilities. **What AGH should learn:** + - **Two-speed plugin model** -- fast in-process (Lua/LuaJIT) for common operations, slower out-of-process (RPC) for heavy lifting. Directly analogous to AGH's Wasm (fast, in-process) + subprocess (full-power) split. - **No manifest registration** -- plugins are discovered by filesystem convention. AGH's skill system already follows this pattern (scan `.agh/skills/` for `SKILL.md` files). - **Built-in subsystems as extension points** -- Neovim's LSP client, Treesitter, and diagnostics framework provide rich hook points for plugins. AGH should similarly expose session management, memory, and observation as hookable subsystems. @@ -773,6 +786,7 @@ The OpenClaw "ClawHavoc" incident (early 2026) -- where 341 malicious skills wer **Architecture:** Four-tier extension: Skills (markdown-based instruction injection), Hooks (shell commands at lifecycle events), MCP servers (external tool access), Plugins (bundled distribution). **What AGH should learn:** + - **Progressive disclosure** -- Skills load lazily; only names and descriptions are visible until invoked. This is critical for context window efficiency in AI agents. AGH should adopt the same approach: extension metadata is always available, but extension bodies/code load on demand. - **Skill-as-markdown** -- Claude Code's skills are just markdown files with YAML frontmatter, not compiled code. AGH already follows this for skills; the extension system should complement (not replace) this pattern. - **Hooks as shell commands** -- Simple, language-agnostic, zero-dependency. AGH already implements this in `HookRunner`. The extension system should extend this to Wasm hooks for sandboxed execution. @@ -783,6 +797,7 @@ The OpenClaw "ClawHavoc" incident (early 2026) -- where 341 malicious skills wer **Architecture:** JSON-RPC 2.0 over stdio (local) or Streamable HTTP (remote). Capability negotiation during `initialize` handshake. Tools, resources, and prompts as standardized extension primitives. **What AGH should learn:** + - **Protocol-first design** -- MCP defines the protocol, then SDKs implement it in multiple languages. AGH should define its extension protocol spec first, then build SDKs. - **Capability negotiation** -- The `initialize` handshake where both sides declare what they support is elegant and forward-compatible. AGH should use the same pattern for subprocess extensions. - **Transport agnosticism** -- The protocol works identically over stdio, HTTP+SSE, or WebSockets. AGH should similarly keep its extension protocol transport-agnostic, starting with stdio but designed to work over other transports. @@ -828,15 +843,15 @@ The OpenClaw "ClawHavoc" incident (early 2026) -- where 341 malicious skills wer ## Appendix: Key Technology Choices -| Decision | Choice | Rationale | -|---|---|---| -| Wasm runtime | wazero (via Extism) | Pure Go, zero CGO, zero dependencies -- fits AGH's single-binary constraint. Extism adds high-level SDK. | -| Subprocess protocol | JSON-RPC 2.0 over stdio | Already used by AGH (ACP), aligned with MCP/LSP ecosystems, trivial multi-language support. | -| Contract definition | Protobuf (for types) + WIT (for Wasm) | Protobuf gives typed code generation for all languages; WIT is native to Wasm Component Model. | -| TypeScript SDK transport | stdin/stdout JSON-RPC | Node.js subprocess managed by AGH daemon, identical lifecycle to ACP agents. | -| Extension registry | Git-based index + GitHub Releases | Proven by Terraform/Homebrew; no custom infrastructure to operate. | -| Checksum verification | SHA-256 | Matches AGH's existing `Provenance` system; simple, well-understood. | -| TypeScript-to-Wasm | AssemblyScript (primary), Javy (alternative) | AssemblyScript: TypeScript-like syntax, direct Wasm compilation. Javy: full JS support via QuickJS-in-Wasm. | +| Decision | Choice | Rationale | +| ------------------------ | -------------------------------------------- | ----------------------------------------------------------------------------------------------------------- | +| Wasm runtime | wazero (via Extism) | Pure Go, zero CGO, zero dependencies -- fits AGH's single-binary constraint. Extism adds high-level SDK. | +| Subprocess protocol | JSON-RPC 2.0 over stdio | Already used by AGH (ACP), aligned with MCP/LSP ecosystems, trivial multi-language support. | +| Contract definition | Protobuf (for types) + WIT (for Wasm) | Protobuf gives typed code generation for all languages; WIT is native to Wasm Component Model. | +| TypeScript SDK transport | stdin/stdout JSON-RPC | Node.js subprocess managed by AGH daemon, identical lifecycle to ACP agents. | +| Extension registry | Git-based index + GitHub Releases | Proven by Terraform/Homebrew; no custom infrastructure to operate. | +| Checksum verification | SHA-256 | Matches AGH's existing `Provenance` system; simple, well-understood. | +| TypeScript-to-Wasm | AssemblyScript (primary), Javy (alternative) | AssemblyScript: TypeScript-like syntax, direct Wasm compilation. Javy: full JS support via QuickJS-in-Wasm. | --- diff --git a/docs/ideas/qa-e2e/README.md b/docs/ideas/qa-e2e/README.md new file mode 100644 index 000000000..7f714f709 --- /dev/null +++ b/docs/ideas/qa-e2e/README.md @@ -0,0 +1,873 @@ +# AGH QA / E2E Playbook + +> Living document for manual and future automated end-to-end validation. +> Seeded from real daemon + web + `agent-browser` execution on 2026-04-14. + +## 1. Purpose + +This document defines the QA / E2E flows that must be exercised regularly for AGH. +It is intended to serve three purposes: + +1. A repeatable manual checklist for future QA rounds. +2. A source of truth for what "working" means at the product level. +3. A direct blueprint for future browser/CLI E2E automation. + +The emphasis is not "green tests at any cost". The emphasis is: + +- real runtime behavior +- real daemon startup and shutdown +- real web interactions +- real persistence and recovery +- real mutation flows +- real validation of failure modes + +When a check fails, the default assumption must be "product bug until disproven", not "test flake". + +--- + +## 2. Core QA Principles + +### 2.1 Non-negotiable rules + +- Always run against a real built binary, not only mocked frontend hooks. +- Always use an isolated `AGH_HOME` for QA so local user state is not polluted. +- Always cross-check critical UI mutations with CLI or persisted backend state. +- Always rerun the full repo gate with `make verify` before calling a round complete. +- Always treat raw `5xx`, stuck spinners, and silent no-op mutations as failures. +- Always capture enough evidence to prove persistence, not just optimistic UI updates. + +### 2.2 Evidence standard + +A flow is only considered validated when all of the following are true: + +- The action can be performed in the UI or public CLI surface. +- The backend accepts it without hidden errors. +- The resulting state is visible in at least one independent read path. +- The state survives the relevant lifecycle boundary: + - refresh + - deep link + - daemon restart + - session resume + +### 2.3 Permanent regression guardrails + +These regressions are now known product risks and must always be checked: + +- `Network` with `network.enabled = false` must render an explicit disabled state, not raw downstream `503` errors. +- `Automation` create/edit flows must never submit `retry.strategy = "none"` with non-zero `max_retries` or non-empty `base_delay`. +- Browser-visible mutations must not silently fail while leaving the editor open with no actionable error. + +--- + +## 3. Canonical Execution Profiles + +Use these profiles as named fixtures for future rounds and future E2E harnesses. + +### Profile A: Baseline default config + +Purpose: + +- Validate first-run and safe default behavior. + +Key expectations: + +- `network.enabled = false` +- app loads +- onboarding works +- `Network` page shows disabled state +- no hard failures on empty states + +### Profile B: Network-enabled stress profile + +Purpose: + +- Validate live channels, peers, messages, channel sessions, and recovery paths. + +Key expectations: + +- `network.enabled = true` +- at least one non-default channel can be created +- at least two peers can join +- messages can be sent and observed + +### Profile C: Bridge-provider profile + +Purpose: + +- Validate bridge discovery and delivery flows where a real provider is available. + +Key expectations: + +- at least one bridge provider is visible to the app +- bridge creation works +- test delivery covers at least one error path and one accepted path + +### Profile D: Multi-workspace profile + +Purpose: + +- Validate workspace scoping, active workspace switching, and add-dir agent visibility. + +Key expectations: + +- at least one repo workspace +- at least one extra registered workspace +- at least one agent only visible in the secondary workspace + +--- + +## 4. Mandatory Fresh Gate Before and After QA + +Every substantial QA round should start and end with: + +```bash +make verify +``` + +When the round includes web changes or frontend bug fixes, also keep these available for tighter loops: + +```bash +make web-lint +make web-typecheck +make web-test +``` + +Failure policy: + +- If `make verify` fails before QA starts, the round is already red. +- If `make verify` fails after a bug fix, the round is still red even if the browser flow looks fixed. + +--- + +## 5. Suite Matrix + +| Suite ID | Area | Priority | Cadence | Must be automatable later? | +| -------- | ------------------------------------------------------ | -------- | -------------------------------- | -------------------------- | +| `BASE` | Repo gate + build contract | P0 | Every round | Yes | +| `BOOT` | Install, daemon lifecycle, onboarding | P0 | Every round | Yes | +| `WS` | Workspaces, switching, navigation | P0 | Every round | Yes | +| `SES` | Sessions, prompting, transcript, resume | P0 | Every round | Yes | +| `AUTO` | Automation jobs/triggers and runs | P0 | Every round | Yes | +| `BRIDGE` | Provider discovery and delivery | P1 | Every round when provider exists | Yes | +| `NET` | Disabled mode, enabled mode, channels, peers, messages | P0 | Every round | Yes | +| `ROB` | Restart, reload, reconnect, recovery | P0 | Every round | Yes | +| `ERR` | Empty/loading/error states | P1 | Every round | Yes | +| `CONS` | CLI/UI consistency audit | P0 | Every round | Yes | + +--- + +## 6. Detailed Test Cases + +Each case below includes the behavior to exercise, the minimum assertions, and the edge cases that must stay covered. + +### `E2E-BASE-001` Fresh repository verification + +Goal: + +- Prove the repo is in a runnable state before product-level QA starts. + +Steps: + +1. Run `make verify`. + +Assertions: + +- lint passes +- typecheck passes +- unit/integration suite passes +- build completes +- package-boundary checks pass + +Always fail if: + +- any target is skipped silently +- warnings are ignored +- QA proceeds without a fresh pass + +--- + +### `E2E-BOOT-001` Isolated runtime bootstrap and daemon start + +Goal: + +- Prove AGH can bootstrap and run in a clean isolated home. + +Steps: + +1. Create a temporary `AGH_HOME`. +2. Run `./bin/agh install`. +3. Start the daemon. +4. Check `./bin/agh daemon status`. + +Assertions: + +- daemon reports `running` +- HTTP endpoint is present +- socket is present +- install writes valid config and home layout + +Edge coverage: + +- first boot from empty state +- no dependency on an already-populated `~/.agh` + +--- + +### `E2E-BOOT-002` First workspace onboarding in the web app + +Goal: + +- Prove a clean browser can onboard a repo workspace from the real web flow. + +Steps: + +1. Open the app in a real browser. +2. Complete onboarding for the target repo. + +Assertions: + +- workspace appears in the UI +- the app exits onboarding into the main shell +- sidebar and main pages load without reload loops + +Always fail if: + +- onboarding completes visually but the workspace is not persisted +- the shell loads but critical API calls fail + +--- + +### `E2E-WS-001` Active workspace switching and scope isolation + +Goal: + +- Prove the active workspace actually changes backend-visible state, not just UI highlight. + +Steps: + +1. Register at least two workspaces. +2. Make one workspace expose additional agent dirs unavailable in the other. +3. Switch active workspace in the app. +4. Open a flow that depends on workspace agent resolution, such as channel creation. + +Assertions: + +- agent lists change with workspace selection +- workspace-specific entities appear only where expected +- sidebar session grouping matches the active workspace filter + +Edge coverage: + +- workspace with `additional_dirs` +- repo workspace vs secondary temp workspace + +--- + +### `E2E-WS-002` Refresh, deep link, and sidebar continuity + +Goal: + +- Prove internal routes survive direct navigation and reload. + +Steps: + +1. Open internal routes directly: + - `/automation` + - `/network` + - `/session/` +2. Refresh each page. +3. Use browser back/forward where useful. + +Assertions: + +- route loads without redirect corruption +- selected entity detail panel rehydrates correctly +- sidebar remains usable after refresh + +Always fail if: + +- a deep link renders an empty shell without loading the target entity +- refresh destroys state that should have been server-backed + +--- + +### `E2E-SES-001` Create a session and send a first prompt + +Goal: + +- Prove the main chat flow works end-to-end from the web shell. + +Steps: + +1. Create a new session from the sidebar. +2. Send a simple deterministic prompt. + +Assertions: + +- session appears in the sidebar +- transcript shows user and agent messages +- message persists on refresh +- `session status` or transcript endpoint confirms the same session state + +Edge coverage: + +- initial empty transcript +- first message in a newly created session + +--- + +### `E2E-SES-002` Concurrent session prompts + +Goal: + +- Prove multiple sessions can be active and respond independently. + +Steps: + +1. Create at least two sessions in different workspaces or with different agents. +2. Send deterministic prompts to both without waiting for the first to finish. + +Assertions: + +- both sessions remain addressable +- both eventually produce the expected response +- one slow session does not starve the other + +Always fail if: + +- the second prompt cancels or corrupts the first +- events are attached to the wrong session + +--- + +### `E2E-SES-003` Stop and resume a session + +Goal: + +- Prove explicit interruption and resume work on a live session. + +Steps: + +1. Start a real session. +2. Stop it. +3. Confirm stopped state. +4. Resume it. +5. Send another deterministic prompt. + +Assertions: + +- stop reason is recorded correctly +- resumed session returns to `active` +- post-resume prompt produces the expected response +- transcript preserves both pre-stop and post-resume history + +--- + +### `E2E-SES-004` Session persistence after daemon restart + +Goal: + +- Prove session metadata and transcripts remain available after daemon shutdown. + +Steps: + +1. Create or reuse persisted sessions. +2. Stop the daemon. +3. Start it again. +4. Reopen the session from the sidebar or deep link. +5. Resume at least one stopped session. + +Assertions: + +- stopped sessions still appear in the browser shell +- deep link to `/session/` still loads transcript +- resumed session works after restart + +Important nuance: + +- `agh session list` without `--all` only shows active sessions. +- Use `agh session list --all` when checking persistence after shutdown. + +--- + +### `E2E-AUTO-001` Create an automation job from the UI + +Goal: + +- Prove the browser create flow persists a valid job. + +Steps: + +1. Open `Automation`. +2. Switch to the correct workspace scope if needed. +3. Create a new job from the UI. + +Assertions: + +- dialog closes on success +- job appears in the list and detail panel +- CLI returns the same job +- payload is valid for the backend contract + +Permanent regression assertions: + +- when `retry.strategy = "none"`, the payload must use: + - `max_retries = 0` + - `base_delay = ""` +- the create flow must not return `400 Bad Request` + +Always fail if: + +- button is enabled but clicking is a silent no-op +- the editor stays open after a successful mutation with no error shown + +--- + +### `E2E-AUTO-002` Create an automation trigger from the UI + +Goal: + +- Prove the trigger creation flow works with a valid contract payload. + +Steps: + +1. Switch to the `TRIGGERS` tab. +2. Create a new trigger with a real event. + +Assertions: + +- dialog closes on success +- trigger appears in the list and detail panel +- CLI returns the same trigger +- retry payload normalization matches backend requirements + +Edge coverage: + +- `webhook` event with webhook-only fields +- non-webhook event with webhook fields absent + +--- + +### `E2E-AUTO-003` Trigger a job and verify run history + +Goal: + +- Prove automation execution is real, not only CRUD. + +Steps: + +1. Trigger a real job via CLI or UI. +2. Wait for a run record to appear. +3. Inspect run history. +4. Inspect spawned session where applicable. + +Assertions: + +- a run ID is created +- status transitions are visible +- spawned session exists when expected +- run history persists after refresh + +--- + +### `E2E-AUTO-004` Automation validation errors + +Goal: + +- Prove invalid inputs are rejected cleanly and specifically. + +Minimum cases: + +- invalid trigger configuration: + - `event = ext.test.qa` + - non-empty `endpoint_slug` +- invalid retry payload: + - `strategy = none` + - non-zero `max_retries` + - non-empty `base_delay` + +Assertions: + +- backend returns validation error, not generic `500` +- UI surfaces a useful error state where applicable +- no partial entity is persisted + +--- + +### `E2E-BRIDGE-001` Bridge provider discovery and bridge creation + +Goal: + +- Prove the app can discover a real provider and create a bridge. + +Steps: + +1. Ensure at least one provider is installed or surfaced via extension. +2. Open `Bridges`. +3. Create a bridge. + +Assertions: + +- provider appears as selectable in the UI +- bridge is persisted and visible in UI and CLI +- scope and workspace binding are correct + +Caveat: + +- a synthetic adapter fixture may leave bridge status in `starting`. +- do not mark that as a product bug unless a real provider reproduces it. + +--- + +### `E2E-BRIDGE-002` Bridge test-delivery error and success paths + +Goal: + +- Prove bridge delivery validation and accepted delivery both work. + +Minimum cases: + +- invalid target mode: + - `direct-send` without peer or group target +- valid accepted delivery: + - reply mode with peer and thread IDs + +Assertions: + +- invalid request returns a specific validation error +- valid request returns accepted/resolved target data + +Always fail if: + +- invalid delivery is silently accepted +- accepted delivery returns malformed target metadata + +--- + +### `E2E-NET-001` Disabled network baseline + +Goal: + +- Prove the app behaves correctly under default network-disabled config. + +Steps: + +1. Run with default config where `network.enabled = false`. +2. Open `Network`. +3. Switch across tabs. + +Assertions: + +- app shows explicit disabled messaging +- no raw `Service Unavailable` or downstream `503` errors leak to the page +- navigation remains usable + +Permanent regression assertion: + +- channels/peers queries must be gated by network status. + +--- + +### `E2E-NET-002` Create a real channel and observe real peers + +Goal: + +- Prove channel creation starts real channel-bound sessions and peer membership. + +Steps: + +1. Run with `network.enabled = true`. +2. Open `Network`. +3. Create a channel with at least two agents. + +Assertions: + +- channel appears in the list +- peer count is non-zero +- peers tab lists the actual local peers +- session records show channel-bound sessions + +Edge coverage: + +- agent availability changes by workspace +- peers use display-name fallback correctly + +--- + +### `E2E-NET-003` Send a real message and verify metrics + +Goal: + +- Prove the network transport does real work and updates runtime state. + +Steps: + +1. Send a real network message through the public CLI. +2. Check channel and peer read paths. +3. Inspect network status counters. + +Assertions: + +- message ID is returned +- message count increments +- channel and peer listings remain consistent +- metrics show sent/received activity + +Optional stronger assertion: + +- one receiving session reacts in a way visible in its transcript or ledger + +--- + +### `E2E-NET-004` Rehydrate channels and peers after resume / restart + +Goal: + +- Prove network membership can be restored after lifecycle boundaries. + +Steps: + +1. Shut down the daemon or stop the channel sessions. +2. Start daemon again. +3. Resume the channel sessions. +4. Reopen `Network`. + +Assertions: + +- channel appears again +- local peer count returns +- peers list matches resumed sessions +- browser detail view rehydrates correctly + +Always fail if: + +- channel metadata survives but peers never come back after valid resume + +--- + +### `E2E-ROB-001` Daemon restart while the UI is in use + +Goal: + +- Prove the app survives server unavailability and eventual recovery. + +Steps: + +1. Keep the browser on an internal page. +2. Stop the daemon. +3. Start it again. +4. Reload or revisit the route. + +Assertions: + +- persisted entities remain visible after recovery +- deep links still work +- sessions can be resumed and used again + +Important checks: + +- no stale UI-only state masking backend loss +- no route becomes permanently broken after recovery + +--- + +### `E2E-ROB-002` Reload, reconnect, and browser navigation behavior + +Goal: + +- Prove the web shell does not depend on a single long-lived in-memory path. + +Steps: + +1. Navigate between pages: + - `Automation` + - `Bridges` + - `Network` + - `Session` +2. Refresh each page. +3. Use browser back and forward. + +Assertions: + +- page-specific detail state rehydrates +- selected item remains valid or falls back sanely +- no duplicated toasts or broken listeners appear after navigation + +--- + +### `E2E-ERR-001` Empty, loading, and error-state sweep + +Goal: + +- Prove each feature has intentional UX for non-happy paths. + +Minimum coverage: + +- `Automation`: empty list, loading, mutation error +- `Bridges`: no provider / no bridge / delivery validation error +- `Network`: disabled, empty, and enabled populated states +- `Sessions`: empty sidebar and persisted stopped sessions + +Assertions: + +- empty states are descriptive and actionable +- loading states are visible and finite +- error states are specific and not raw transport dumps where avoidable + +--- + +### `E2E-CONS-001` CLI / UI consistency audit + +Goal: + +- Prove the browser is showing server truth. + +Steps: + +1. Create or mutate entities through the UI. +2. Read the same entities through the CLI. +3. Create or mutate entities through the CLI. +4. Verify the browser reflects them. + +Must-cover entities: + +- sessions +- jobs +- triggers +- bridges +- channels +- peers + +Assertions: + +- IDs match +- scope/workspace bindings match +- state transitions match +- persisted timestamps and counts are plausible + +--- + +## 7. Always-Check Assertions By Surface + +### Sessions + +- Session IDs are stable across reads. +- Transcript survives refresh. +- Stop/resume transitions are visible in status and transcript. +- Restart does not destroy persisted stopped sessions. + +### Workspaces + +- Active workspace changes data, not only selection chrome. +- Workspace-scoped entities do not leak into unrelated scopes. +- Additional-dir agents appear only where they should. + +### Automation + +- Create/edit/delete are not silent no-ops. +- Retry contract is always valid. +- Triggered runs create observable backend state. + +### Bridges + +- Provider discovery is derived from installed runtime capabilities. +- Bridge creation is actually persisted. +- Delivery validation rejects malformed targets. + +### Network + +- Disabled mode is graceful. +- Enabled mode shows real channels and peers. +- Message send updates observable counters or artifacts. + +### Resilience + +- Restart semantics are explicit: + - active sessions may stop on shutdown + - persisted sessions must remain inspectable + - resumed sessions must become usable again + +--- + +## 8. Future Automation Strategy + +This document should eventually map to executable suites, not only manual rounds. + +### Recommended split + +1. `smoke-e2e` + - daemon boot + - onboarding + - create session + - send prompt + - open major routes + +2. `stateful-e2e` + - multiple workspaces + - stop/resume + - restart recovery + - deep links + +3. `network-e2e` + - disabled mode + - enabled mode + - channel creation + - peers + - message send + +4. `automation-e2e` + - create job + - create trigger + - trigger run + - validation failures + +5. `bridges-e2e` + - provider discovery + - create bridge + - delivery test paths + +### Recommended execution model + +- Use an isolated `AGH_HOME` fixture per suite or per worker. +- Use public CLI to seed data only where the UI is not the target under test. +- Use browser automation only for flows whose primary contract is UI behavior. +- Always validate critical mutations through a second read path. + +### Recommended artifacts to capture + +- `daemon status -o json` +- CLI entity listings in JSON +- browser snapshots or screenshots at key checkpoints +- HAR for failed mutation debugging +- route-specific console/errors when a UI mutation fails + +--- + +## 9. Round Exit Criteria + +A QA round is only complete when all of the following are true: + +- the planned P0 suites were exercised +- every discovered regression was either fixed or explicitly documented as a blocker +- the fix, if any, was verified in the real browser/runtime path +- `make verify` passed after the last code change +- no result is based only on mocked confidence + +If any of those are missing, the round is incomplete. + +--- + +## 10. Suggested First Automation Backlog + +If we convert this playbook into executable E2E tests, start with these in order: + +1. `E2E-NET-001` disabled network regression +2. `E2E-AUTO-001` create job with valid `retry.none` +3. `E2E-AUTO-002` create trigger with valid `retry.none` +4. `E2E-SES-001` create session and send prompt +5. `E2E-SES-004` restart + resume + transcript recovery +6. `E2E-WS-001` workspace switching and agent visibility +7. `E2E-NET-002` channel creation and peer membership +8. `E2E-BRIDGE-002` bridge delivery validation paths + +These cover the highest-signal product contracts and the two regressions already found in real usage. diff --git a/internal/api/contract/bridges.go b/internal/api/contract/bridges.go index 8229200e5..6483d0cc4 100644 --- a/internal/api/contract/bridges.go +++ b/internal/api/contract/bridges.go @@ -126,6 +126,11 @@ type BridgesResponse struct { BridgeHealth map[string]BridgeHealthPayload `json:"bridge_health,omitempty"` } +// BridgeProvidersResponse wraps the shared installed provider catalog. +type BridgeProvidersResponse struct { + Providers []bridgepkg.BridgeProvider `json:"providers"` +} + // BridgeResponse wraps one shared bridge payload. type BridgeResponse struct { Bridge bridgepkg.BridgeInstance `json:"bridge"` @@ -155,6 +160,7 @@ type BridgeHealthPayload struct { DeliveryDroppedByReason map[string]int `json:"delivery_dropped_by_reason,omitempty"` DeliveryFailuresTotal int `json:"delivery_failures_total"` AuthFailuresTotal int `json:"auth_failures_total"` + LastSuccessAt *time.Time `json:"last_success_at,omitempty"` LastError string `json:"last_error,omitempty"` LastErrorAt *time.Time `json:"last_error_at,omitempty"` } diff --git a/internal/api/contract/contract.go b/internal/api/contract/contract.go index beaa34cf4..802ef6eee 100644 --- a/internal/api/contract/contract.go +++ b/internal/api/contract/contract.go @@ -290,6 +290,13 @@ type NetworkSendPayload struct { Ext map[string]json.RawMessage `json:"ext,omitempty"` } +// CreateNetworkChannelRequest is the shared network channel creation payload. +type CreateNetworkChannelRequest struct { + Channel string `json:"channel"` + WorkspaceID string `json:"workspace_id"` + AgentNames []string `json:"agent_names"` +} + // NetworkPeerCardPayload is the shared JSON representation of one peer card. type NetworkPeerCardPayload struct { PeerID string `json:"peer_id"` @@ -303,20 +310,26 @@ type NetworkPeerCardPayload struct { // NetworkPeerPayload is the shared JSON representation of one visible peer. type NetworkPeerPayload struct { - SessionID *string `json:"session_id,omitempty"` - PeerID string `json:"peer_id"` - Channel string `json:"channel"` - Local bool `json:"local"` - PeerCard NetworkPeerCardPayload `json:"peer_card"` - JoinedAt *time.Time `json:"joined_at,omitempty"` - LastSeen *time.Time `json:"last_seen,omitempty"` - ExpiresAt *time.Time `json:"expires_at,omitempty"` + SessionID *string `json:"session_id,omitempty"` + PeerID string `json:"peer_id"` + DisplayName string `json:"display_name,omitempty"` + Channel string `json:"channel"` + Local bool `json:"local"` + PeerCard NetworkPeerCardPayload `json:"peer_card"` + JoinedAt *time.Time `json:"joined_at,omitempty"` + LastSeen *time.Time `json:"last_seen,omitempty"` + ExpiresAt *time.Time `json:"expires_at,omitempty"` } // NetworkChannelPayload is the shared JSON representation of one active channel. type NetworkChannelPayload struct { - Channel string `json:"channel"` - PeerCount int `json:"peer_count"` + Channel string `json:"channel"` + PeerCount int `json:"peer_count"` + LocalPeerCount int `json:"local_peer_count,omitempty"` + RemotePeerCount int `json:"remote_peer_count,omitempty"` + SessionCount int `json:"session_count,omitempty"` + MessageCount int `json:"message_count,omitempty"` + LastMessageAt *time.Time `json:"last_message_at,omitempty"` } // NetworkEnvelopePayload is the shared JSON representation of one surfaced @@ -339,6 +352,54 @@ type NetworkEnvelopePayload struct { Ext map[string]json.RawMessage `json:"ext,omitempty"` } +// NetworkChannelDetailPayload is the shared channel detail payload used by the network UI. +type NetworkChannelDetailPayload struct { + Channel string `json:"channel"` + PeerCount int `json:"peer_count"` + LocalPeerCount int `json:"local_peer_count,omitempty"` + RemotePeerCount int `json:"remote_peer_count,omitempty"` + SessionCount int `json:"session_count,omitempty"` + MessageCount int `json:"message_count,omitempty"` + LastMessageAt *time.Time `json:"last_message_at,omitempty"` + Sessions []SessionPayload `json:"sessions,omitempty"` + Peers []NetworkPeerPayload `json:"peers,omitempty"` +} + +// NetworkChannelMessagePayload is the shared read-only channel timeline payload. +type NetworkChannelMessagePayload struct { + MessageID string `json:"message_id"` + Channel string `json:"channel"` + PeerID string `json:"peer_id"` + DisplayName string `json:"display_name,omitempty"` + SessionID string `json:"session_id,omitempty"` + Local bool `json:"local,omitempty"` + Intent string `json:"intent,omitempty"` + Text string `json:"text"` + Timestamp time.Time `json:"timestamp"` +} + +// NetworkPeerMetricsPayload is the shared peer-level counter payload. +type NetworkPeerMetricsPayload struct { + Sent int64 `json:"sent,omitempty"` + Received int64 `json:"received,omitempty"` + Rejected int64 `json:"rejected,omitempty"` + Delivered int64 `json:"delivered,omitempty"` +} + +// NetworkPeerDetailPayload is the shared selected-peer detail payload. +type NetworkPeerDetailPayload struct { + SessionID *string `json:"session_id,omitempty"` + PeerID string `json:"peer_id"` + DisplayName string `json:"display_name,omitempty"` + Channel string `json:"channel,omitempty"` + Local bool `json:"local,omitempty"` + PeerCard NetworkPeerCardPayload `json:"peer_card"` + JoinedAt *time.Time `json:"joined_at,omitempty"` + LastSeen *time.Time `json:"last_seen,omitempty"` + ExpiresAt *time.Time `json:"expires_at,omitempty"` + Metrics NetworkPeerMetricsPayload `json:"metrics"` +} + // InstallExtensionRequest is the shared extension install request payload. type InstallExtensionRequest struct { Path string `json:"path"` diff --git a/internal/api/contract/responses.go b/internal/api/contract/responses.go index 8792e3fd5..fc0f33b91 100644 --- a/internal/api/contract/responses.go +++ b/internal/api/contract/responses.go @@ -126,6 +126,26 @@ type NetworkChannelsResponse struct { Channels []NetworkChannelPayload `json:"channels"` } +// CreateNetworkChannelResponse wraps the created channel detail payload. +type CreateNetworkChannelResponse struct { + Channel NetworkChannelDetailPayload `json:"channel"` +} + +// NetworkChannelResponse wraps one channel detail payload. +type NetworkChannelResponse struct { + Channel NetworkChannelDetailPayload `json:"channel"` +} + +// NetworkChannelMessagesResponse wraps the read-only channel timeline payload. +type NetworkChannelMessagesResponse struct { + Messages []NetworkChannelMessagePayload `json:"messages"` +} + +// NetworkPeerResponse wraps one selected peer detail payload. +type NetworkPeerResponse struct { + Peer NetworkPeerDetailPayload `json:"peer"` +} + // NetworkSendResponse wraps the outbound send result payload. type NetworkSendResponse struct { Message NetworkSendPayload `json:"message"` diff --git a/internal/api/core/bridges.go b/internal/api/core/bridges.go index 8e3e3b438..831f73aa8 100644 --- a/internal/api/core/bridges.go +++ b/internal/api/core/bridges.go @@ -35,6 +35,22 @@ func (h *BaseHandlers) ListBridges(c *gin.Context) { c.JSON(http.StatusOK, contract.BridgesResponse{Bridges: instances, BridgeHealth: bridgeHealth}) } +// ListBridgeProviders returns installed bridge-capable providers. +func (h *BaseHandlers) ListBridgeProviders(c *gin.Context) { + bridges, ok := h.bridgeService() + if !ok { + h.respondError(c, http.StatusServiceUnavailable, errBridgeServiceUnavailable) + return + } + + providers, err := bridges.ListProviders(c.Request.Context()) + if err != nil { + h.respondError(c, StatusForBridgeError(err), err) + return + } + c.JSON(http.StatusOK, contract.BridgeProvidersResponse{Providers: providers}) +} + // CreateBridge persists a new bridge instance. func (h *BaseHandlers) CreateBridge(c *gin.Context) { bridges, ok := h.bridgeService() diff --git a/internal/api/core/bridges_test.go b/internal/api/core/bridges_test.go index 7c09a2369..8da073854 100644 --- a/internal/api/core/bridges_test.go +++ b/internal/api/core/bridges_test.go @@ -4,6 +4,7 @@ import ( "context" "errors" "net/http" + "strings" "testing" "time" @@ -196,6 +197,80 @@ func TestBridgeHandlersRoutesAndTestDelivery(t *testing.T) { } } +func TestBridgeHandlersListProviders(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + listProvidersFn func(context.Context) ([]bridgepkg.BridgeProvider, error) + wantError string + wantHealth string + wantPlatform string + wantStatus int + }{ + { + name: "Should list bridge providers", + listProvidersFn: func(context.Context) ([]bridgepkg.BridgeProvider, error) { + return []bridgepkg.BridgeProvider{{ + Platform: "telegram", + ExtensionName: "telegram-reference", + DisplayName: "Telegram", + Description: "Reference Telegram bridge adapter", + Enabled: true, + State: "active", + Health: "healthy", + HealthMessage: "connected", + }}, nil + }, + wantHealth: "healthy", + wantPlatform: "telegram", + wantStatus: http.StatusOK, + }, + { + name: "Should map bridge provider errors through bridge status mapping", + listProvidersFn: func(context.Context) ([]bridgepkg.BridgeProvider, error) { + return nil, bridgepkg.ErrBridgeInstanceUnavailable + }, + wantError: bridgepkg.ErrBridgeInstanceUnavailable.Error(), + wantStatus: http.StatusConflict, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + _, engine := newBridgeHandlerFixture(t, testutil.StubBridgeService{ + ListProvidersFn: tt.listProvidersFn, + }) + + resp := performRequest(t, engine, http.MethodGet, "/bridges/providers", nil) + if resp.Code != tt.wantStatus { + t.Fatalf("providers status = %d, want %d body=%s", resp.Code, tt.wantStatus, resp.Body.String()) + } + if tt.wantError != "" { + var payload contract.ErrorPayload + testutil.DecodeJSONResponse(t, resp, &payload) + if !strings.Contains(payload.Error, tt.wantError) { + t.Fatalf("error payload = %#v, want %q", payload, tt.wantError) + } + return + } + + var payload contract.BridgeProvidersResponse + testutil.DecodeJSONResponse(t, resp, &payload) + if got, want := len(payload.Providers), 1; got != want { + t.Fatalf("len(providers) = %d, want %d", got, want) + } + if got, want := payload.Providers[0].Platform, tt.wantPlatform; got != want { + t.Fatalf("provider platform = %q, want %q", got, want) + } + if got, want := payload.Providers[0].Health, tt.wantHealth; got != want { + t.Fatalf("provider health = %q, want %q", got, want) + } + }) + } +} + func TestBridgeHandlersIncludeObservedHealthPayloads(t *testing.T) { t.Parallel() @@ -230,6 +305,7 @@ func TestBridgeHandlersIncludeObservedHealthPayloads(t *testing.T) { DeliveryBacklog: 1, DeliveryFailuresTotal: 3, AuthFailuresTotal: 1, + LastSuccessAt: time.Date(2026, 4, 3, 11, 59, 0, 0, time.UTC), LastError: "adapter unavailable", }}, nil }, @@ -274,6 +350,9 @@ func TestBridgeHandlersIncludeObservedHealthPayloads(t *testing.T) { if got, want := getPayload.Health.RouteCount, 2; got != want { t.Fatalf("get health route_count = %d, want %d", got, want) } + if getPayload.Health.LastSuccessAt == nil || !getPayload.Health.LastSuccessAt.Equal(time.Date(2026, 4, 3, 11, 59, 0, 0, time.UTC)) { + t.Fatalf("get health last_success_at = %#v, want 2026-04-03T11:59:00Z", getPayload.Health.LastSuccessAt) + } } func TestBridgeHandlersMutationReturnsBestEffortPayloadWhenHealthLookupFails(t *testing.T) { @@ -381,6 +460,7 @@ func newBridgeHandlerFixture(t *testing.T, bridges core.BridgeService) (*core.Ba engine.Use(gin.Recovery()) engine.GET("/bridges", handlers.ListBridges) engine.POST("/bridges", handlers.CreateBridge) + engine.GET("/bridges/providers", handlers.ListBridgeProviders) engine.GET("/bridges/:id", handlers.GetBridge) engine.PATCH("/bridges/:id", handlers.UpdateBridge) engine.POST("/bridges/:id/enable", handlers.EnableBridge) diff --git a/internal/api/core/conversions.go b/internal/api/core/conversions.go index 6937e017b..f22ea1d1f 100644 --- a/internal/api/core/conversions.go +++ b/internal/api/core/conversions.go @@ -341,6 +341,12 @@ func BridgeAggregateHealthPayloadFromObserve(summary observepkg.BridgeAggregateH // BridgeHealthPayloadFromObserve converts the observer per-instance bridge // health snapshot into the shared payload. func BridgeHealthPayloadFromObserve(health observepkg.BridgeInstanceHealth) contract.BridgeHealthPayload { + var lastSuccessAt *time.Time + if !health.LastSuccessAt.IsZero() { + timestamp := health.LastSuccessAt + lastSuccessAt = ×tamp + } + var lastErrorAt *time.Time if !health.LastErrorAt.IsZero() { timestamp := health.LastErrorAt @@ -356,6 +362,7 @@ func BridgeHealthPayloadFromObserve(health observepkg.BridgeInstanceHealth) cont DeliveryDroppedByReason: maps.Clone(health.DeliveryDroppedByReason), DeliveryFailuresTotal: health.DeliveryFailuresTotal, AuthFailuresTotal: health.AuthFailuresTotal, + LastSuccessAt: lastSuccessAt, LastError: health.LastError, LastErrorAt: lastErrorAt, } diff --git a/internal/api/core/handlers.go b/internal/api/core/handlers.go index 57a180da4..c56de267a 100644 --- a/internal/api/core/handlers.go +++ b/internal/api/core/handlers.go @@ -30,6 +30,7 @@ type BaseHandlerConfig struct { IncludeSessionWorkspaceInSSE bool Sessions SessionManager Network NetworkService + NetworkStore NetworkStore Observer Observer Automation AutomationManager Bridges BridgeService @@ -56,6 +57,7 @@ type BaseHandlers struct { IncludeSessionWorkspaceInSSE bool Sessions SessionManager Network NetworkService + NetworkStore NetworkStore Observer Observer Automation AutomationManager Bridges BridgeService @@ -117,6 +119,7 @@ func NewBaseHandlers(cfg BaseHandlerConfig) *BaseHandlers { IncludeSessionWorkspaceInSSE: cfg.IncludeSessionWorkspaceInSSE, Sessions: cfg.Sessions, Network: cfg.Network, + NetworkStore: cfg.NetworkStore, Observer: cfg.Observer, Automation: cfg.Automation, Bridges: cfg.Bridges, diff --git a/internal/api/core/interfaces.go b/internal/api/core/interfaces.go index 56ce29650..ff54dd2c9 100644 --- a/internal/api/core/interfaces.go +++ b/internal/api/core/interfaces.go @@ -54,6 +54,7 @@ type Observer interface { type BridgeService interface { bridgepkg.Registry bridgepkg.TargetResolver + ListProviders(ctx context.Context) ([]bridgepkg.BridgeProvider, error) StartInstance(ctx context.Context, id string) (*bridgepkg.BridgeInstance, error) StopInstance(ctx context.Context, id string) (*bridgepkg.BridgeInstance, error) RestartInstance(ctx context.Context, id string) (*bridgepkg.BridgeInstance, error) @@ -68,6 +69,12 @@ type NetworkService interface { Inbox(ctx context.Context, sessionID string) ([]network.Envelope, error) } +// NetworkStore exposes persisted network audit and timeline queries to the API layer. +type NetworkStore interface { + ListNetworkAudit(ctx context.Context, query store.NetworkAuditQuery) ([]store.NetworkAuditEntry, error) + ListNetworkMessages(ctx context.Context, query store.NetworkMessageQuery) ([]store.NetworkMessageEntry, error) +} + // DreamTrigger exposes consolidation controls and state to the API layer. type DreamTrigger interface { Trigger(ctx context.Context, workspace string) (bool, string, error) diff --git a/internal/api/core/network.go b/internal/api/core/network.go index a5467a340..32c34fe97 100644 --- a/internal/api/core/network.go +++ b/internal/api/core/network.go @@ -2,6 +2,7 @@ package core import ( "bytes" + "context" "encoding/json" "errors" "fmt" @@ -12,6 +13,7 @@ import ( "github.com/gin-gonic/gin" "github.com/pedronauck/agh/internal/api/contract" "github.com/pedronauck/agh/internal/network" + "github.com/pedronauck/agh/internal/session" ) func (h *BaseHandlers) networkServiceRequired() (NetworkService, error) { @@ -47,7 +49,59 @@ func (h *BaseHandlers) NetworkPeers(c *gin.Context) { h.respondError(c, StatusForNetworkError(err), err) return } - c.JSON(http.StatusOK, contract.NetworkPeersResponse{Peers: NetworkPeerPayloadsFromInfos(peers)}) + sessionByID := h.networkPeerSessionInfoMap(c.Request.Context(), peers) + payload := make([]contract.NetworkPeerPayload, 0, len(peers)) + for _, peer := range peers { + payload = append(payload, networkPeerPayloadFromInfoWithSessions(peer, sessionByID)) + } + c.JSON(http.StatusOK, contract.NetworkPeersResponse{Peers: payload}) +} + +func (h *BaseHandlers) networkPeerSessionInfoMap( + ctx context.Context, + peers []network.PeerInfo, +) map[string]*session.SessionInfo { + if h == nil || h.Sessions == nil || len(peers) == 0 { + return nil + } + + sessionByID := make(map[string]*session.SessionInfo, len(peers)) + for _, peer := range peers { + if peer.SessionID == nil { + continue + } + + sessionID := strings.TrimSpace(*peer.SessionID) + if sessionID == "" { + continue + } + if _, seen := sessionByID[sessionID]; seen { + continue + } + + info, err := h.Sessions.Status(ctx, sessionID) + if err != nil { + if h.Logger != nil { + h.Logger.Warn( + h.transportName()+": skip network peer session enrichment", + "session_id", + sessionID, + "peer_id", + strings.TrimSpace(peer.PeerID), + "error", + err, + ) + } + continue + } + if info != nil { + sessionByID[sessionID] = info + } + } + if len(sessionByID) == 0 { + return nil + } + return sessionByID } // NetworkChannels returns the active runtime channels. @@ -58,12 +112,12 @@ func (h *BaseHandlers) NetworkChannels(c *gin.Context) { return } - channels, err := service.ListChannels(c.Request.Context()) + channels, err := h.networkChannelPayloads(c.Request.Context(), service) if err != nil { h.respondError(c, StatusForNetworkError(err), err) return } - c.JSON(http.StatusOK, contract.NetworkChannelsResponse{Channels: NetworkChannelPayloadsFromInfos(channels)}) + c.JSON(http.StatusOK, contract.NetworkChannelsResponse{Channels: channels}) } // NetworkSend validates and forwards one outbound network send request. @@ -234,11 +288,18 @@ func NetworkPeerPayloadsFromInfos(peers []network.PeerInfo) []contract.NetworkPe // NetworkPeerPayloadFromInfo converts one visible peer snapshot into the shared payload. func NetworkPeerPayloadFromInfo(peer network.PeerInfo) contract.NetworkPeerPayload { + displayName := peer.PeerID + if peer.PeerCard.DisplayName != nil { + if trimmed := strings.TrimSpace(*peer.PeerCard.DisplayName); trimmed != "" { + displayName = trimmed + } + } return contract.NetworkPeerPayload{ - SessionID: peer.SessionID, - PeerID: peer.PeerID, - Channel: peer.Channel, - Local: peer.Local, + SessionID: peer.SessionID, + PeerID: peer.PeerID, + DisplayName: displayName, + Channel: peer.Channel, + Local: peer.Local, PeerCard: contract.NetworkPeerCardPayload{ PeerID: peer.PeerCard.PeerID, DisplayName: peer.PeerCard.DisplayName, diff --git a/internal/api/core/network_details.go b/internal/api/core/network_details.go new file mode 100644 index 000000000..187d3f64e --- /dev/null +++ b/internal/api/core/network_details.go @@ -0,0 +1,739 @@ +package core + +import ( + "context" + "errors" + "fmt" + "net/http" + "sort" + "strings" + "time" + + "github.com/gin-gonic/gin" + "github.com/pedronauck/agh/internal/api/contract" + "github.com/pedronauck/agh/internal/network" + "github.com/pedronauck/agh/internal/session" + "github.com/pedronauck/agh/internal/store" + workspacepkg "github.com/pedronauck/agh/internal/workspace" +) + +type networkChannelAggregate struct { + channel string + peerCount int + localPeerCount int + remotePeerCount int + sessionCount int + messageCount int + lastMessageAt *time.Time +} + +var errNetworkChannelNotFound = errors.New("api: network channel not found") + +func (h *BaseHandlers) networkStoreRequired() (NetworkStore, error) { + if h == nil || h.NetworkStore == nil { + return nil, errors.New("api: network store is required") + } + return h.NetworkStore, nil +} + +// CreateNetworkChannel validates and creates one new channel by starting a new session per selected agent. +func (h *BaseHandlers) CreateNetworkChannel(c *gin.Context) { + service, err := h.networkServiceRequired() + if err != nil { + h.respondError(c, http.StatusServiceUnavailable, err) + return + } + + var req contract.CreateNetworkChannelRequest + if err := c.ShouldBindJSON(&req); err != nil { + h.respondError(c, http.StatusBadRequest, fmt.Errorf("%s: decode create network channel request: %w", h.transportName(), err)) + return + } + + channel, resolved, agentNames, err := h.resolveCreateNetworkChannelRequest(c.Request.Context(), req) + if err != nil { + status := http.StatusBadRequest + switch { + case errors.Is(err, workspacepkg.ErrWorkspaceNotFound), + errors.Is(err, workspacepkg.ErrWorkspaceRootMissing): + status = StatusForWorkspaceError(err) + case errors.Is(err, workspacepkg.ErrAgentNotAvailable): + status = StatusForSessionError(err) + case errors.Is(err, network.ErrInvalidField): + status = StatusForNetworkError(err) + } + h.respondError(c, status, err) + return + } + + createdIDs := make([]string, 0, len(agentNames)) + for _, agentName := range agentNames { + sess, createErr := h.Sessions.Create(c.Request.Context(), session.CreateOpts{ + AgentName: agentName, + Workspace: resolved.ID, + Channel: channel, + Type: session.SessionTypeUser, + }) + if createErr != nil { + if rollbackErr := rollbackCreatedNetworkSessions(c.Request.Context(), h.Sessions, createdIDs); rollbackErr != nil { + createErr = errors.Join(createErr, rollbackErr) + } + h.respondError(c, StatusForSessionError(createErr), createErr) + return + } + if sess != nil && sess.Info() != nil { + createdIDs = append(createdIDs, sess.Info().ID) + } + } + + detail, detailErr := h.networkChannelDetailPayload(c.Request.Context(), service, channel) + if detailErr != nil { + if rollbackErr := rollbackCreatedNetworkSessions(c.Request.Context(), h.Sessions, createdIDs); rollbackErr != nil { + detailErr = errors.Join(detailErr, rollbackErr) + } + h.respondError(c, http.StatusInternalServerError, detailErr) + return + } + + c.JSON(http.StatusCreated, contract.CreateNetworkChannelResponse{Channel: detail}) +} + +// NetworkChannel returns one network channel detail payload. +func (h *BaseHandlers) NetworkChannel(c *gin.Context) { + service, err := h.networkServiceRequired() + if err != nil { + h.respondError(c, http.StatusServiceUnavailable, err) + return + } + + channel, err := normalizeNetworkChannel(c.Param("channel")) + if err != nil { + h.respondError(c, StatusForNetworkError(err), err) + return + } + + detail, err := h.networkChannelDetailPayload(c.Request.Context(), service, channel) + if err != nil { + if isNetworkChannelNotFound(err) { + h.respondError(c, http.StatusNotFound, err) + return + } + h.respondError(c, http.StatusInternalServerError, err) + return + } + + c.JSON(http.StatusOK, contract.NetworkChannelResponse{Channel: detail}) +} + +// NetworkChannelMessages returns the read-only message timeline for one network channel. +func (h *BaseHandlers) NetworkChannelMessages(c *gin.Context) { + service, err := h.networkServiceRequired() + if err != nil { + h.respondError(c, http.StatusServiceUnavailable, err) + return + } + networkStore, err := h.networkStoreRequired() + if err != nil { + h.respondError(c, http.StatusInternalServerError, err) + return + } + + channel, err := normalizeNetworkChannel(c.Param("channel")) + if err != nil { + h.respondError(c, StatusForNetworkError(err), err) + return + } + limit, err := ParseOptionalInt(c.Query("limit")) + if err != nil { + h.respondError(c, http.StatusBadRequest, err) + return + } + + sessions, err := h.Sessions.ListAll(c.Request.Context()) + if err != nil { + h.respondError(c, http.StatusInternalServerError, err) + return + } + peers, err := service.ListPeers(c.Request.Context(), channel) + if err != nil { + h.respondError(c, StatusForNetworkError(err), err) + return + } + + messages, err := networkStore.ListNetworkMessages(c.Request.Context(), store.NetworkMessageQuery{ + Channel: channel, + Limit: limit, + }) + if err != nil { + h.respondError(c, http.StatusInternalServerError, err) + return + } + + if len(messages) == 0 && !networkChannelExists(sessions, peers, channel) { + notFoundErr := fmt.Errorf("%w: %s", errNetworkChannelNotFound, channel) + h.respondError(c, http.StatusNotFound, notFoundErr) + return + } + + directionByMessageID := map[string]string{} + if len(messages) > 0 { + auditEntries, auditErr := networkStore.ListNetworkAudit(c.Request.Context(), store.NetworkAuditQuery{ + Channel: channel, + }) + if auditErr != nil { + h.respondError(c, http.StatusInternalServerError, auditErr) + return + } + directionByMessageID = networkMessageDirectionMap(auditEntries, networkMessageIDSet(messages)) + } + + sessionByID := sessionInfoMapByID(sessions) + peerByID := peerInfoMapByID(peers) + payload := make([]contract.NetworkChannelMessagePayload, 0, len(messages)) + for _, entry := range messages { + payload = append(payload, NetworkChannelMessagePayloadFromEntry( + entry, + directionByMessageID[strings.TrimSpace(entry.MessageID)], + sessionByID, + peerByID, + )) + } + + c.JSON(http.StatusOK, contract.NetworkChannelMessagesResponse{Messages: payload}) +} + +// NetworkPeer returns one selected peer detail payload. +func (h *BaseHandlers) NetworkPeer(c *gin.Context) { + service, err := h.networkServiceRequired() + if err != nil { + h.respondError(c, http.StatusServiceUnavailable, err) + return + } + networkStore, err := h.networkStoreRequired() + if err != nil { + h.respondError(c, http.StatusInternalServerError, err) + return + } + + peerID := strings.TrimSpace(c.Param("peer_id")) + if peerID == "" { + err := NewNetworkValidationError(errors.New("peer_id path is required")) + h.respondError(c, http.StatusBadRequest, err) + return + } + + peers, err := service.ListPeers(c.Request.Context(), "") + if err != nil { + h.respondError(c, StatusForNetworkError(err), err) + return + } + peer, ok := findPeerInfo(peers, peerID) + if !ok { + h.respondError(c, http.StatusNotFound, fmt.Errorf("api: network peer not found: %s", peerID)) + return + } + + sessions, err := h.Sessions.ListAll(c.Request.Context()) + if err != nil { + h.respondError(c, http.StatusInternalServerError, err) + return + } + + auditEntries, err := h.loadPeerAuditEntries(c.Request.Context(), networkStore, peer) + if err != nil { + h.respondError(c, http.StatusInternalServerError, err) + return + } + + payload := NetworkPeerDetailPayloadFromInfo(peer, sessionInfoMapByID(sessions), summarizePeerMetrics(peer, auditEntries)) + c.JSON(http.StatusOK, contract.NetworkPeerResponse{Peer: payload}) +} + +func (h *BaseHandlers) resolveCreateNetworkChannelRequest( + ctx context.Context, + req contract.CreateNetworkChannelRequest, +) (string, workspacepkg.ResolvedWorkspace, []string, error) { + channel, err := normalizeNetworkChannel(req.Channel) + if err != nil { + return "", workspacepkg.ResolvedWorkspace{}, nil, err + } + + workspaceID := strings.TrimSpace(req.WorkspaceID) + if workspaceID == "" { + return "", workspacepkg.ResolvedWorkspace{}, nil, NewNetworkValidationError(errors.New("workspace_id is required")) + } + + resolved, err := h.Workspaces.Resolve(ctx, workspaceID) + if err != nil { + return "", workspacepkg.ResolvedWorkspace{}, nil, err + } + + agentNames, err := normalizeNetworkAgentNames(req.AgentNames) + if err != nil { + return "", workspacepkg.ResolvedWorkspace{}, nil, err + } + available := make(map[string]struct{}, len(resolved.Agents)) + for _, agent := range resolved.Agents { + available[strings.TrimSpace(agent.Name)] = struct{}{} + } + for _, agentName := range agentNames { + if _, ok := available[agentName]; ok { + continue + } + return "", workspacepkg.ResolvedWorkspace{}, nil, fmt.Errorf("%w: %s", workspacepkg.ErrAgentNotAvailable, agentName) + } + + return channel, resolved, agentNames, nil +} + +func normalizeNetworkChannel(channel string) (string, error) { + trimmed := strings.TrimSpace(channel) + if trimmed == "" { + return "", NewNetworkValidationError(errors.New("channel is required")) + } + if err := network.ValidateChannel(trimmed); err != nil { + return "", err + } + return trimmed, nil +} + +func normalizeNetworkAgentNames(agentNames []string) ([]string, error) { + if len(agentNames) == 0 { + return nil, NewNetworkValidationError(errors.New("agent_names is required")) + } + + normalized := make([]string, 0, len(agentNames)) + seen := make(map[string]struct{}, len(agentNames)) + for _, raw := range agentNames { + name := strings.TrimSpace(raw) + if name == "" { + return nil, NewNetworkValidationError(errors.New("agent_names entries are required")) + } + if _, ok := seen[name]; ok { + return nil, NewNetworkValidationError(fmt.Errorf("agent_names contains duplicate entry %q", name)) + } + seen[name] = struct{}{} + normalized = append(normalized, name) + } + return normalized, nil +} + +func rollbackCreatedNetworkSessions(ctx context.Context, sessions SessionManager, sessionIDs []string) error { + if len(sessionIDs) == 0 { + return nil + } + + var rollbackErr error + for _, sessionID := range sessionIDs { + if strings.TrimSpace(sessionID) == "" { + continue + } + rollbackErr = errors.Join( + rollbackErr, + sessions.StopWithCause(ctx, sessionID, session.CauseFailed, "rollback network channel creation"), + ) + } + return rollbackErr +} + +func (h *BaseHandlers) networkChannelPayloads(ctx context.Context, service NetworkService) ([]contract.NetworkChannelPayload, error) { + runtimePeers, err := service.ListPeers(ctx, "") + if err != nil { + return nil, err + } + sessions, err := h.Sessions.ListAll(ctx) + if err != nil { + return nil, err + } + + aggregates := make(map[string]*networkChannelAggregate) + for _, info := range sessions { + if !networkChannelSessionVisible(info) { + continue + } + channel := strings.TrimSpace(info.Channel) + aggregate := ensureNetworkChannelAggregate(aggregates, channel) + aggregate.sessionCount++ + } + for _, peer := range runtimePeers { + aggregate := ensureNetworkChannelAggregate(aggregates, peer.Channel) + aggregate.peerCount++ + if peer.Local { + aggregate.localPeerCount++ + } else { + aggregate.remotePeerCount++ + } + } + + if h != nil && h.NetworkStore != nil { + messages, msgErr := h.NetworkStore.ListNetworkMessages(ctx, store.NetworkMessageQuery{}) + if msgErr != nil { + return nil, msgErr + } + for _, message := range messages { + aggregate := ensureNetworkChannelAggregate(aggregates, message.Channel) + aggregate.messageCount++ + aggregate.lastMessageAt = laterTimePtr(aggregate.lastMessageAt, message.Timestamp) + } + } + + channels := make([]contract.NetworkChannelPayload, 0, len(aggregates)) + for _, aggregate := range aggregates { + if aggregate == nil { + continue + } + channels = append(channels, contract.NetworkChannelPayload{ + Channel: aggregate.channel, + PeerCount: aggregate.peerCount, + LocalPeerCount: aggregate.localPeerCount, + RemotePeerCount: aggregate.remotePeerCount, + SessionCount: aggregate.sessionCount, + MessageCount: aggregate.messageCount, + LastMessageAt: cloneTimePtr(aggregate.lastMessageAt), + }) + } + sort.Slice(channels, func(i int, j int) bool { + return channels[i].Channel < channels[j].Channel + }) + return channels, nil +} + +func (h *BaseHandlers) networkChannelDetailPayload( + ctx context.Context, + service NetworkService, + channel string, +) (contract.NetworkChannelDetailPayload, error) { + peers, err := service.ListPeers(ctx, channel) + if err != nil { + return contract.NetworkChannelDetailPayload{}, err + } + sessions, err := h.Sessions.ListAll(ctx) + if err != nil { + return contract.NetworkChannelDetailPayload{}, err + } + + filteredSessions := sessionsForChannel(sessions, channel) + messageCount := 0 + var lastMessageAt *time.Time + if h != nil && h.NetworkStore != nil { + messages, msgErr := h.NetworkStore.ListNetworkMessages(ctx, store.NetworkMessageQuery{Channel: channel}) + if msgErr != nil { + return contract.NetworkChannelDetailPayload{}, msgErr + } + messageCount = len(messages) + if messageCount > 0 { + lastMessageAt = laterTimePtr(nil, messages[len(messages)-1].Timestamp) + } + } + if len(filteredSessions) == 0 && len(peers) == 0 && messageCount == 0 { + return contract.NetworkChannelDetailPayload{}, fmt.Errorf("%w: %s", errNetworkChannelNotFound, channel) + } + + sessionByID := sessionInfoMapByID(filteredSessions) + payloadPeers := make([]contract.NetworkPeerPayload, 0, len(peers)) + localPeerCount := 0 + for _, peer := range peers { + if peer.Local { + localPeerCount++ + } + payloadPeers = append(payloadPeers, networkPeerPayloadFromInfoWithSessions(peer, sessionByID)) + } + + return contract.NetworkChannelDetailPayload{ + Channel: channel, + PeerCount: len(peers), + LocalPeerCount: localPeerCount, + RemotePeerCount: len(peers) - localPeerCount, + SessionCount: len(filteredSessions), + MessageCount: messageCount, + LastMessageAt: cloneTimePtr(lastMessageAt), + Sessions: SessionPayloadsFromInfos(filteredSessions), + Peers: payloadPeers, + }, nil +} + +func ensureNetworkChannelAggregate(aggregates map[string]*networkChannelAggregate, channel string) *networkChannelAggregate { + trimmed := strings.TrimSpace(channel) + aggregate, ok := aggregates[trimmed] + if ok && aggregate != nil { + return aggregate + } + aggregate = &networkChannelAggregate{channel: trimmed} + aggregates[trimmed] = aggregate + return aggregate +} + +func sessionsForChannel(sessions []*session.SessionInfo, channel string) []*session.SessionInfo { + filtered := make([]*session.SessionInfo, 0, len(sessions)) + for _, info := range sessions { + if !networkChannelSessionVisible(info) || strings.TrimSpace(info.Channel) != channel { + continue + } + filtered = append(filtered, info) + } + return filtered +} + +func networkChannelExists(sessions []*session.SessionInfo, peers []network.PeerInfo, channel string) bool { + for _, info := range sessions { + if networkChannelSessionVisible(info) && strings.TrimSpace(info.Channel) == channel { + return true + } + } + for _, peer := range peers { + if strings.TrimSpace(peer.Channel) == channel { + return true + } + } + return false +} + +func networkChannelSessionVisible(info *session.SessionInfo) bool { + if info == nil { + return false + } + if info.State == session.StateStopped { + return false + } + return strings.TrimSpace(info.Channel) != "" +} + +func isNetworkChannelNotFound(err error) bool { + return errors.Is(err, errNetworkChannelNotFound) +} + +func sessionInfoMapByID(sessions []*session.SessionInfo) map[string]*session.SessionInfo { + index := make(map[string]*session.SessionInfo, len(sessions)) + for _, info := range sessions { + if info == nil { + continue + } + index[strings.TrimSpace(info.ID)] = info + } + return index +} + +func peerInfoMapByID(peers []network.PeerInfo) map[string]network.PeerInfo { + index := make(map[string]network.PeerInfo, len(peers)) + for _, peer := range peers { + index[strings.TrimSpace(peer.PeerID)] = peer + } + return index +} + +func networkMessageIDSet(messages []store.NetworkMessageEntry) map[string]struct{} { + ids := make(map[string]struct{}, len(messages)) + for _, message := range messages { + messageID := strings.TrimSpace(message.MessageID) + if messageID == "" { + continue + } + ids[messageID] = struct{}{} + } + return ids +} + +func networkMessageDirectionMap( + entries []store.NetworkAuditEntry, + messageIDs map[string]struct{}, +) map[string]string { + directions := make(map[string]string, len(messageIDs)) + for _, entry := range entries { + messageID := strings.TrimSpace(entry.MessageID) + if messageID == "" { + continue + } + if _, ok := messageIDs[messageID]; !ok { + continue + } + direction := strings.TrimSpace(entry.Direction) + if direction != network.AuditDirectionSent && direction != network.AuditDirectionReceived { + continue + } + if _, seen := directions[messageID]; seen { + continue + } + directions[messageID] = direction + } + return directions +} + +func findPeerInfo(peers []network.PeerInfo, peerID string) (network.PeerInfo, bool) { + target := strings.TrimSpace(peerID) + for _, peer := range peers { + if strings.TrimSpace(peer.PeerID) == target { + return peer, true + } + } + return network.PeerInfo{}, false +} + +func laterTimePtr(current *time.Time, candidate time.Time) *time.Time { + if candidate.IsZero() { + return cloneTimePtr(current) + } + if current == nil || candidate.After(current.UTC()) { + value := candidate.UTC() + return &value + } + return cloneTimePtr(current) +} + +func networkPeerPayloadFromInfoWithSessions( + peer network.PeerInfo, + sessionsByID map[string]*session.SessionInfo, +) contract.NetworkPeerPayload { + payload := NetworkPeerPayloadFromInfo(peer) + payload.DisplayName = networkPeerDisplayName(peer, sessionsByID) + return payload +} + +func networkPeerDisplayName(peer network.PeerInfo, sessionsByID map[string]*session.SessionInfo) string { + if peer.PeerCard.DisplayName != nil { + if value := strings.TrimSpace(*peer.PeerCard.DisplayName); value != "" { + return value + } + } + if peer.SessionID != nil && sessionsByID != nil { + if info, ok := sessionsByID[strings.TrimSpace(*peer.SessionID)]; ok && info != nil { + if value := strings.TrimSpace(info.Name); value != "" { + return value + } + if value := strings.TrimSpace(info.AgentName); value != "" { + return value + } + } + } + return strings.TrimSpace(peer.PeerID) +} + +// NetworkChannelMessagePayloadFromEntry converts one persisted timeline row into the shared payload. +func NetworkChannelMessagePayloadFromEntry( + entry store.NetworkMessageEntry, + auditDirection string, + sessionsByID map[string]*session.SessionInfo, + peersByID map[string]network.PeerInfo, +) contract.NetworkChannelMessagePayload { + storedSessionID := strings.TrimSpace(entry.SessionID) + displayName := strings.TrimSpace(entry.PeerFrom) + local := false + payloadSessionID := "" + + if peer, ok := peersByID[strings.TrimSpace(entry.PeerFrom)]; ok { + displayName = networkPeerDisplayName(peer, sessionsByID) + local = peer.Local + } + + switch strings.TrimSpace(auditDirection) { + case network.AuditDirectionSent: + local = true + payloadSessionID = storedSessionID + case network.AuditDirectionReceived: + local = false + default: + if local { + payloadSessionID = storedSessionID + } + } + + if local && payloadSessionID != "" { + if info, ok := sessionsByID[payloadSessionID]; ok && info != nil { + if value := strings.TrimSpace(info.Name); value != "" { + displayName = value + } else if value := strings.TrimSpace(info.AgentName); value != "" { + displayName = value + } + } + } + + return contract.NetworkChannelMessagePayload{ + MessageID: strings.TrimSpace(entry.MessageID), + Channel: strings.TrimSpace(entry.Channel), + PeerID: strings.TrimSpace(entry.PeerFrom), + DisplayName: displayName, + SessionID: payloadSessionID, + Local: local, + Intent: strings.TrimSpace(entry.Intent), + Text: strings.TrimSpace(entry.Text), + Timestamp: entry.Timestamp.UTC(), + } +} + +func (h *BaseHandlers) loadPeerAuditEntries( + ctx context.Context, + networkStore NetworkStore, + peer network.PeerInfo, +) ([]store.NetworkAuditEntry, error) { + if peer.SessionID != nil { + return networkStore.ListNetworkAudit(ctx, store.NetworkAuditQuery{ + SessionID: strings.TrimSpace(*peer.SessionID), + }) + } + + entries, err := networkStore.ListNetworkAudit(ctx, store.NetworkAuditQuery{ + Channel: strings.TrimSpace(peer.Channel), + }) + if err != nil { + return nil, err + } + + filtered := make([]store.NetworkAuditEntry, 0, len(entries)) + for _, entry := range entries { + if networkAuditMatchesPeer(peer, entry) { + filtered = append(filtered, entry) + } + } + return filtered, nil +} + +func networkAuditMatchesPeer(peer network.PeerInfo, entry store.NetworkAuditEntry) bool { + targetPeerID := strings.TrimSpace(peer.PeerID) + if targetPeerID == "" { + return false + } + if peer.SessionID != nil && strings.TrimSpace(entry.SessionID) == strings.TrimSpace(*peer.SessionID) { + return true + } + return strings.TrimSpace(entry.PeerFrom) == targetPeerID || strings.TrimSpace(entry.PeerTo) == targetPeerID +} + +func summarizePeerMetrics(peer network.PeerInfo, entries []store.NetworkAuditEntry) contract.NetworkPeerMetricsPayload { + metrics := contract.NetworkPeerMetricsPayload{} + for _, entry := range entries { + if !networkAuditMatchesPeer(peer, entry) { + continue + } + switch strings.TrimSpace(entry.Direction) { + case network.AuditDirectionSent: + metrics.Sent++ + case network.AuditDirectionReceived: + metrics.Received++ + case network.AuditDirectionRejected: + metrics.Rejected++ + case network.AuditDirectionDelivered: + metrics.Delivered++ + } + } + return metrics +} + +// NetworkPeerDetailPayloadFromInfo converts one peer info plus metrics into the shared detail payload. +func NetworkPeerDetailPayloadFromInfo( + peer network.PeerInfo, + sessionsByID map[string]*session.SessionInfo, + metrics contract.NetworkPeerMetricsPayload, +) contract.NetworkPeerDetailPayload { + payload := contract.NetworkPeerDetailPayload{ + SessionID: peer.SessionID, + PeerID: peer.PeerID, + DisplayName: networkPeerDisplayName(peer, sessionsByID), + Channel: peer.Channel, + Local: peer.Local, + PeerCard: NetworkPeerPayloadFromInfo(peer).PeerCard, + JoinedAt: cloneTimePtr(peer.JoinedAt), + LastSeen: cloneTimePtr(peer.LastSeen), + ExpiresAt: cloneTimePtr(peer.ExpiresAt), + Metrics: metrics, + } + return payload +} diff --git a/internal/api/core/network_test.go b/internal/api/core/network_test.go index 7fcd03f23..743c4e858 100644 --- a/internal/api/core/network_test.go +++ b/internal/api/core/network_test.go @@ -5,6 +5,7 @@ import ( "encoding/json" "errors" "net/http" + "sort" "strings" "testing" "time" @@ -12,8 +13,12 @@ import ( "github.com/pedronauck/agh/internal/api/contract" "github.com/pedronauck/agh/internal/api/core" "github.com/pedronauck/agh/internal/api/testutil" + aghconfig "github.com/pedronauck/agh/internal/config" "github.com/pedronauck/agh/internal/memory" "github.com/pedronauck/agh/internal/network" + "github.com/pedronauck/agh/internal/session" + "github.com/pedronauck/agh/internal/store" + workspacepkg "github.com/pedronauck/agh/internal/workspace" ) func TestNetworkConversionHelpersPreserveMetadata(t *testing.T) { @@ -136,6 +141,22 @@ func TestNetworkConversionHelpersPreserveMetadata(t *testing.T) { t.Fatalf("Ext = %#v, want cloned ext payload", envelopePayload.Ext) } }) + + t.Run("Should fall back to peer id when peer-card display name is blank", func(t *testing.T) { + t.Parallel() + + blank := " " + payload := core.NetworkPeerPayloadFromInfo(network.PeerInfo{ + PeerID: "reviewer.sess-b", + Channel: "builders", + Local: true, + PeerCard: network.PeerCard{PeerID: "reviewer.sess-b", DisplayName: &blank}, + }) + + if got, want := payload.DisplayName, "reviewer.sess-b"; got != want { + t.Fatalf("payload.DisplayName = %q, want %q", got, want) + } + }) } func TestBaseHandlersNetworkEndpoints(t *testing.T) { @@ -175,12 +196,12 @@ func TestBaseHandlersNetworkEndpoints(t *testing.T) { }, nil }, ListPeersFn: func(_ context.Context, channel string) ([]network.PeerInfo, error) { - if channel != "builders" { - t.Fatalf("ListPeers() channel = %q, want builders", channel) + if channel != "builders" && channel != "" { + t.Fatalf("ListPeers() channel = %q, want builders or empty", channel) } displayName := "Reviewer" sessionID := "sess-a" - return []network.PeerInfo{{ + peers := []network.PeerInfo{{ SessionID: &sessionID, PeerID: "reviewer.sess-a", Channel: "builders", @@ -197,7 +218,22 @@ func TestBaseHandlersNetworkEndpoints(t *testing.T) { JoinedAt: timePtr(fixedNow), LastSeen: timePtr(fixedNow), ExpiresAt: timePtr(fixedNow.Add(time.Minute)), - }}, nil + }} + if channel == "" { + remoteDisplayName := "Coder" + peers = append(peers, network.PeerInfo{ + PeerID: "coder.sess-remote", + Channel: "builders", + Local: false, + PeerCard: network.PeerCard{ + PeerID: "coder.sess-remote", + DisplayName: &remoteDisplayName, + }, + LastSeen: timePtr(fixedNow), + ExpiresAt: timePtr(fixedNow.Add(time.Minute)), + }) + } + return peers, nil }, ListChannelsFn: func(context.Context) ([]network.ChannelInfo, error) { return []network.ChannelInfo{{Channel: "builders", PeerCount: 2}}, nil @@ -315,6 +351,263 @@ func TestBaseHandlersNetworkEndpoints(t *testing.T) { }) } +func TestBaseHandlersNetworkPeersUseBestEffortSessionEnrichment(t *testing.T) { + t.Parallel() + + t.Run("Should enrich local peers and fall back to peer-card display names on lookup failures", func(t *testing.T) { + localSessionID := "sess-local" + brokenSessionID := "sess-broken" + brokenDisplayName := "Broken peer" + + manager := testutil.StubSessionManager{ + ListAllFn: func(context.Context) ([]*session.SessionInfo, error) { + t.Fatal("ListAll() should not be called for peer enrichment") + return nil, nil + }, + StatusFn: func(_ context.Context, id string) (*session.SessionInfo, error) { + switch id { + case localSessionID: + return &session.SessionInfo{ + ID: localSessionID, + Name: "Reviewer", + AgentName: "reviewer", + }, nil + case brokenSessionID: + return nil, errors.New("status lookup failed") + default: + return nil, session.ErrSessionNotFound + } + }, + } + + fixture := newHandlerFixture(t, manager, testutil.StubObserver{}, testutil.StubWorkspaceService{}, nil, nil) + fixture.Handlers.Config.Network.Enabled = true + fixture.Handlers.Network = testutil.StubNetworkService{ + ListPeersFn: func(_ context.Context, channel string) ([]network.PeerInfo, error) { + if got, want := channel, "builders"; got != want { + t.Fatalf("ListPeers() channel = %q, want %q", got, want) + } + return []network.PeerInfo{ + { + SessionID: &localSessionID, + PeerID: "reviewer.sess-local", + Channel: "builders", + Local: true, + PeerCard: network.PeerCard{PeerID: "reviewer.sess-local"}, + }, + { + SessionID: &brokenSessionID, + PeerID: "broken.sess-broken", + Channel: "builders", + Local: true, + PeerCard: network.PeerCard{ + PeerID: "broken.sess-broken", + DisplayName: &brokenDisplayName, + }, + }, + }, nil + }, + } + + resp := performRequest(t, fixture.Engine, http.MethodGet, "/network/peers?channel=builders", nil) + if resp.Code != http.StatusOK { + t.Fatalf("peers code = %d, want %d", resp.Code, http.StatusOK) + } + + var payload contract.NetworkPeersResponse + testutil.DecodeJSONResponse(t, resp, &payload) + if got, want := len(payload.Peers), 2; got != want { + t.Fatalf("len(peers) = %d, want %d", got, want) + } + if got, want := payload.Peers[0].DisplayName, "Reviewer"; got != want { + t.Fatalf("peers[0].display_name = %q, want %q", got, want) + } + if got, want := payload.Peers[1].DisplayName, brokenDisplayName; got != want { + t.Fatalf("peers[1].display_name = %q, want %q", got, want) + } + }) +} + +func TestBaseHandlersCreateNetworkChannelRollsBackWhenDetailReadbackFails(t *testing.T) { + t.Parallel() + + t.Run("Should roll back created sessions when channel readback fails", func(t *testing.T) { + createdAt := time.Date(2026, 4, 11, 18, 0, 0, 0, time.UTC) + var rolledBack []string + manager := testutil.StubSessionManager{ + CreateFn: func(_ context.Context, opts session.CreateOpts) (*session.Session, error) { + return &session.Session{ + ID: "sess-" + opts.AgentName, + Name: strings.ToUpper(opts.AgentName), + AgentName: opts.AgentName, + WorkspaceID: opts.Workspace, + Channel: opts.Channel, + Type: session.SessionTypeUser, + State: session.StateActive, + CreatedAt: createdAt, + UpdatedAt: createdAt, + }, nil + }, + ListAllFn: func(context.Context) ([]*session.SessionInfo, error) { + return nil, errors.New("readback failed") + }, + StopWithCauseFn: func(_ context.Context, id string, cause session.StopCause, detail string) error { + if got, want := cause, session.CauseFailed; got != want { + t.Fatalf("StopWithCause() cause = %q, want %q", got, want) + } + if got, want := detail, "rollback network channel creation"; got != want { + t.Fatalf("StopWithCause() detail = %q, want %q", got, want) + } + rolledBack = append(rolledBack, id) + return nil + }, + } + workspaces := testutil.StubWorkspaceService{ + ResolveFn: func(_ context.Context, ref string) (workspacepkg.ResolvedWorkspace, error) { + return workspacepkg.ResolvedWorkspace{ + Workspace: workspacepkg.Workspace{ID: ref, Name: "Workspace"}, + Agents: []aghconfig.AgentDef{ + {Name: "coder"}, + {Name: "reviewer"}, + }, + }, nil + }, + } + + fixture := newHandlerFixture(t, manager, testutil.StubObserver{}, workspaces, nil, nil) + fixture.Handlers.Config.Network.Enabled = true + fixture.Handlers.Network = testutil.StubNetworkService{ + ListPeersFn: func(context.Context, string) ([]network.PeerInfo, error) { + return nil, nil + }, + } + + resp := performRequest( + t, + fixture.Engine, + http.MethodPost, + "/network/channels", + []byte(`{"channel":"builders","workspace_id":"ws-1","agent_names":["coder","reviewer"]}`), + ) + if resp.Code != http.StatusInternalServerError { + t.Fatalf("create channel code = %d, want %d", resp.Code, http.StatusInternalServerError) + } + + sort.Strings(rolledBack) + if got, want := strings.Join(rolledBack, ","), "sess-coder,sess-reviewer"; got != want { + t.Fatalf("rolled back sessions = %q, want %q", got, want) + } + }) +} + +func TestBaseHandlersNetworkChannelsIncludeHistoryOnlyChannels(t *testing.T) { + t.Parallel() + + t.Run("Should include history-only channels from persisted message logs", func(t *testing.T) { + recordedAt := time.Date(2026, 4, 11, 18, 30, 0, 0, time.UTC) + fixture := newHandlerFixture(t, testutil.StubSessionManager{ + ListAllFn: func(context.Context) ([]*session.SessionInfo, error) { + return nil, nil + }, + }, testutil.StubObserver{}, testutil.StubWorkspaceService{}, nil, nil) + fixture.Handlers.Config.Network.Enabled = true + fixture.Handlers.Network = testutil.StubNetworkService{ + ListPeersFn: func(context.Context, string) ([]network.PeerInfo, error) { + return nil, nil + }, + } + fixture.Handlers.NetworkStore = testutil.StubNetworkStore{ + ListNetworkMessagesFn: func(_ context.Context, query store.NetworkMessageQuery) ([]store.NetworkMessageEntry, error) { + if got := query.Channel; got != "" { + t.Fatalf("ListNetworkMessages() channel = %q, want empty list query", got) + } + return []store.NetworkMessageEntry{{ + MessageID: "msg-history-only", + Channel: "builders", + PeerFrom: "reviewer.sess-remote", + Kind: "say", + Text: "History survives runtime disconnects.", + Timestamp: recordedAt, + }}, nil + }, + } + + resp := performRequest(t, fixture.Engine, http.MethodGet, "/network/channels", nil) + if resp.Code != http.StatusOK { + t.Fatalf("channels code = %d, want %d", resp.Code, http.StatusOK) + } + + var payload contract.NetworkChannelsResponse + testutil.DecodeJSONResponse(t, resp, &payload) + if got, want := len(payload.Channels), 1; got != want { + t.Fatalf("len(channels) = %d, want %d", got, want) + } + if got, want := payload.Channels[0].Channel, "builders"; got != want { + t.Fatalf("channel = %q, want %q", got, want) + } + if got, want := payload.Channels[0].MessageCount, 1; got != want { + t.Fatalf("message_count = %d, want %d", got, want) + } + }) +} + +func TestBaseHandlersNetworkChannelReturnsHistoryOnlyDetails(t *testing.T) { + t.Parallel() + + t.Run("Should return history-only channel details from persisted message logs", func(t *testing.T) { + recordedAt := time.Date(2026, 4, 11, 19, 0, 0, 0, time.UTC) + fixture := newHandlerFixture(t, testutil.StubSessionManager{ + ListAllFn: func(context.Context) ([]*session.SessionInfo, error) { + return nil, nil + }, + }, testutil.StubObserver{}, testutil.StubWorkspaceService{}, nil, nil) + fixture.Handlers.Config.Network.Enabled = true + fixture.Handlers.Network = testutil.StubNetworkService{ + ListPeersFn: func(_ context.Context, channel string) ([]network.PeerInfo, error) { + if got, want := channel, "builders"; got != want { + t.Fatalf("ListPeers() channel = %q, want %q", got, want) + } + return nil, nil + }, + } + fixture.Handlers.NetworkStore = testutil.StubNetworkStore{ + ListNetworkMessagesFn: func(_ context.Context, query store.NetworkMessageQuery) ([]store.NetworkMessageEntry, error) { + if got, want := query.Channel, "builders"; got != want { + t.Fatalf("ListNetworkMessages() channel = %q, want %q", got, want) + } + return []store.NetworkMessageEntry{{ + MessageID: "msg-history-detail", + Channel: "builders", + PeerFrom: "reviewer.sess-remote", + Kind: "say", + Text: "Still visible from persisted history.", + Timestamp: recordedAt, + }}, nil + }, + } + + resp := performRequest(t, fixture.Engine, http.MethodGet, "/network/channels/builders", nil) + if resp.Code != http.StatusOK { + t.Fatalf("channel detail code = %d, want %d body=%s", resp.Code, http.StatusOK, resp.Body.String()) + } + + var payload contract.NetworkChannelResponse + testutil.DecodeJSONResponse(t, resp, &payload) + if got, want := payload.Channel.Channel, "builders"; got != want { + t.Fatalf("channel = %q, want %q", got, want) + } + if got, want := payload.Channel.MessageCount, 1; got != want { + t.Fatalf("message_count = %d, want %d", got, want) + } + if got, want := payload.Channel.SessionCount, 0; got != want { + t.Fatalf("session_count = %d, want %d", got, want) + } + if got, want := payload.Channel.PeerCount, 0; got != want { + t.Fatalf("peer_count = %d, want %d", got, want) + } + }) +} + func TestBaseHandlersNetworkErrorsAndDisabledMode(t *testing.T) { t.Parallel() @@ -379,7 +672,7 @@ func TestBaseHandlersNetworkErrorsAndDisabledMode(t *testing.T) { fixture := newHandlerFixture(t, testutil.StubSessionManager{}, testutil.StubObserver{}, testutil.StubWorkspaceService{}, nil, nil) fixture.Handlers.Config.Network.Enabled = true fixture.Handlers.Network = testutil.StubNetworkService{ - ListChannelsFn: func(context.Context) ([]network.ChannelInfo, error) { + ListPeersFn: func(context.Context, string) ([]network.PeerInfo, error) { return nil, network.ErrInvalidField }, } @@ -524,6 +817,626 @@ func TestValidationErrorHelpersPreserveInnerErrorChain(t *testing.T) { }) } +func TestBaseHandlersNetworkChannelEndpointsIgnoreStoppedSessions(t *testing.T) { + t.Parallel() + + createdAt := time.Date(2026, 4, 11, 18, 0, 0, 0, time.UTC) + coderSessionID := "sess-coder" + reviewerSessionID := "sess-reviewer" + + newFixture := func(t *testing.T) handlerFixture { + t.Helper() + + manager := testutil.StubSessionManager{ + ListAllFn: func(context.Context) ([]*session.SessionInfo, error) { + return []*session.SessionInfo{ + { + ID: coderSessionID, + Name: "Coder", + AgentName: "coder", + WorkspaceID: "ws-1", + Channel: "builders", + Type: session.SessionTypeUser, + State: session.StateActive, + CreatedAt: createdAt, + UpdatedAt: createdAt, + }, + { + ID: reviewerSessionID, + Name: "Reviewer", + AgentName: "reviewer", + WorkspaceID: "ws-1", + Channel: "retro", + Type: session.SessionTypeUser, + State: session.StateStopped, + CreatedAt: createdAt.Add(time.Minute), + UpdatedAt: createdAt.Add(time.Minute), + }, + }, nil + }, + } + + fixture := newHandlerFixture(t, manager, testutil.StubObserver{}, testutil.StubWorkspaceService{}, nil, nil) + fixture.Handlers.Config.Network.Enabled = true + fixture.Handlers.Network = testutil.StubNetworkService{ + ListPeersFn: func(_ context.Context, channel string) ([]network.PeerInfo, error) { + switch channel { + case "": + return []network.PeerInfo{ + { + SessionID: &coderSessionID, + PeerID: "coder.sess-coder", + Channel: "builders", + Local: true, + PeerCard: network.PeerCard{PeerID: "coder.sess-coder"}, + JoinedAt: timePtr(createdAt), + LastSeen: timePtr(createdAt), + }, + }, nil + case "builders": + return []network.PeerInfo{ + { + SessionID: &coderSessionID, + PeerID: "coder.sess-coder", + Channel: "builders", + Local: true, + PeerCard: network.PeerCard{PeerID: "coder.sess-coder"}, + JoinedAt: timePtr(createdAt), + LastSeen: timePtr(createdAt), + }, + }, nil + case "retro": + return nil, nil + default: + t.Fatalf("unexpected ListPeers() channel %q", channel) + return nil, nil + } + }, + } + fixture.Handlers.NetworkStore = testutil.StubNetworkStore{ + ListNetworkAuditFn: func(_ context.Context, query store.NetworkAuditQuery) ([]store.NetworkAuditEntry, error) { + switch query.Channel { + case "builders": + return []store.NetworkAuditEntry{{ + ID: "naud-builders-01", + SessionID: coderSessionID, + Direction: network.AuditDirectionSent, + Kind: "say", + Channel: "builders", + PeerFrom: "coder.sess-coder", + MessageID: "msg-builders-01", + Size: 1, + Timestamp: createdAt.Add(2 * time.Minute), + }}, nil + case "retro": + return []store.NetworkAuditEntry{{ + ID: "naud-retro-01", + SessionID: reviewerSessionID, + Direction: network.AuditDirectionSent, + Kind: "say", + Channel: "retro", + PeerFrom: "reviewer.sess-reviewer", + MessageID: "msg-retro-01", + Size: 1, + Timestamp: createdAt.Add(3 * time.Minute), + }}, nil + default: + return nil, nil + } + }, + ListNetworkMessagesFn: func(_ context.Context, query store.NetworkMessageQuery) ([]store.NetworkMessageEntry, error) { + switch query.Channel { + case "": + return []store.NetworkMessageEntry{ + { + MessageID: "msg-builders-01", + SessionID: coderSessionID, + Channel: "builders", + PeerFrom: "coder.sess-coder", + Kind: "say", + Intent: "announce", + Text: "hello builders", + Timestamp: createdAt.Add(2 * time.Minute), + }, + { + MessageID: "msg-retro-01", + SessionID: reviewerSessionID, + Channel: "retro", + PeerFrom: "reviewer.sess-reviewer", + Kind: "say", + Text: "retro note", + Timestamp: createdAt.Add(3 * time.Minute), + }, + }, nil + case "builders": + return []store.NetworkMessageEntry{{ + MessageID: "msg-builders-01", + SessionID: coderSessionID, + Channel: "builders", + PeerFrom: "coder.sess-coder", + Kind: "say", + Intent: "announce", + Text: "hello builders", + Timestamp: createdAt.Add(2 * time.Minute), + }}, nil + case "retro": + return []store.NetworkMessageEntry{{ + MessageID: "msg-retro-01", + SessionID: reviewerSessionID, + Channel: "retro", + PeerFrom: "reviewer.sess-reviewer", + Kind: "say", + Text: "retro note", + Timestamp: createdAt.Add(3 * time.Minute), + }}, nil + default: + return nil, nil + } + }, + } + return fixture + } + + t.Run("Should keep stopped sessions out of the channel list while preserving history-only channels", func(t *testing.T) { + fixture := newFixture(t) + + channelsResp := performRequest(t, fixture.Engine, http.MethodGet, "/network/channels", nil) + if channelsResp.Code != http.StatusOK { + t.Fatalf("channels code = %d, want %d", channelsResp.Code, http.StatusOK) + } + + var channelsPayload contract.NetworkChannelsResponse + testutil.DecodeJSONResponse(t, channelsResp, &channelsPayload) + if got, want := len(channelsPayload.Channels), 2; got != want { + t.Fatalf("len(channels) = %d, want %d", got, want) + } + sort.Slice(channelsPayload.Channels, func(i, j int) bool { + return channelsPayload.Channels[i].Channel < channelsPayload.Channels[j].Channel + }) + if got, want := channelsPayload.Channels[0].Channel, "builders"; got != want { + t.Fatalf("channels[0].Channel = %q, want %q", got, want) + } + if got, want := channelsPayload.Channels[0].SessionCount, 1; got != want { + t.Fatalf("channels[0].SessionCount = %d, want %d", got, want) + } + if got, want := channelsPayload.Channels[1].Channel, "retro"; got != want { + t.Fatalf("channels[1].Channel = %q, want %q", got, want) + } + if got, want := channelsPayload.Channels[1].SessionCount, 0; got != want { + t.Fatalf("channels[1].SessionCount = %d, want %d", got, want) + } + if got, want := channelsPayload.Channels[1].MessageCount, 1; got != want { + t.Fatalf("channels[1].MessageCount = %d, want %d", got, want) + } + }) + + t.Run("Should exclude stopped sessions from active channel details", func(t *testing.T) { + fixture := newFixture(t) + + channelResp := performRequest(t, fixture.Engine, http.MethodGet, "/network/channels/builders", nil) + if channelResp.Code != http.StatusOK { + t.Fatalf("channel detail code = %d, want %d", channelResp.Code, http.StatusOK) + } + + var channelPayload contract.NetworkChannelResponse + testutil.DecodeJSONResponse(t, channelResp, &channelPayload) + if got, want := channelPayload.Channel.Channel, "builders"; got != want { + t.Fatalf("channel detail channel = %q, want %q", got, want) + } + if got, want := channelPayload.Channel.Peers[0].DisplayName, "Coder"; got != want { + t.Fatalf("channel detail peer display = %q, want %q", got, want) + } + if got, want := channelPayload.Channel.MessageCount, 1; got != want { + t.Fatalf("channel detail message count = %d, want %d", got, want) + } + }) + + t.Run("Should preserve active local authors in channel message history", func(t *testing.T) { + fixture := newFixture(t) + + messagesResp := performRequest(t, fixture.Engine, http.MethodGet, "/network/channels/builders/messages", nil) + if messagesResp.Code != http.StatusOK { + t.Fatalf("channel messages code = %d, want %d", messagesResp.Code, http.StatusOK) + } + + var messagesPayload contract.NetworkChannelMessagesResponse + testutil.DecodeJSONResponse(t, messagesResp, &messagesPayload) + if got, want := len(messagesPayload.Messages), 1; got != want { + t.Fatalf("len(messages) = %d, want %d", got, want) + } + if got, want := messagesPayload.Messages[0].DisplayName, "Coder"; got != want { + t.Fatalf("message display_name = %q, want %q", got, want) + } + if !messagesPayload.Messages[0].Local { + t.Fatal("message local = false, want true") + } + }) + + t.Run("Should return history-only channel details without reviving stopped sessions", func(t *testing.T) { + fixture := newFixture(t) + + historyResp := performRequest(t, fixture.Engine, http.MethodGet, "/network/channels/retro", nil) + if historyResp.Code != http.StatusOK { + t.Fatalf("history-only channel detail code = %d, want %d", historyResp.Code, http.StatusOK) + } + + var historyPayload contract.NetworkChannelResponse + testutil.DecodeJSONResponse(t, historyResp, &historyPayload) + if got, want := historyPayload.Channel.Channel, "retro"; got != want { + t.Fatalf("history-only channel = %q, want %q", got, want) + } + if got, want := historyPayload.Channel.SessionCount, 0; got != want { + t.Fatalf("history-only session count = %d, want %d", got, want) + } + if got, want := historyPayload.Channel.PeerCount, 0; got != want { + t.Fatalf("history-only peer count = %d, want %d", got, want) + } + if got, want := historyPayload.Channel.MessageCount, 1; got != want { + t.Fatalf("history-only message count = %d, want %d", got, want) + } + }) +} + +func TestBaseHandlersNetworkChannelMessagesPreserveRemoteAuthors(t *testing.T) { + t.Parallel() + + t.Run("Should preserve remote author identity while keeping local session metadata intact", func(t *testing.T) { + createdAt := time.Date(2026, 4, 11, 18, 0, 0, 0, time.UTC) + localSessionID := "sess-coder" + remotePeerID := "reviewer.sess-remote" + + manager := testutil.StubSessionManager{ + ListAllFn: func(context.Context) ([]*session.SessionInfo, error) { + return []*session.SessionInfo{{ + ID: localSessionID, + Name: "Coder", + AgentName: "coder", + WorkspaceID: "ws-1", + Channel: "builders", + Type: session.SessionTypeUser, + State: session.StateActive, + CreatedAt: createdAt, + UpdatedAt: createdAt, + }}, nil + }, + } + + fixture := newHandlerFixture(t, manager, testutil.StubObserver{}, testutil.StubWorkspaceService{}, nil, nil) + fixture.Handlers.Config.Network.Enabled = true + fixture.Handlers.Network = testutil.StubNetworkService{ + ListPeersFn: func(_ context.Context, channel string) ([]network.PeerInfo, error) { + if channel != "builders" { + t.Fatalf("ListPeers() channel = %q, want builders", channel) + } + displayName := "Reviewer" + return []network.PeerInfo{ + { + SessionID: &localSessionID, + PeerID: "coder.sess-coder", + Channel: "builders", + Local: true, + PeerCard: network.PeerCard{PeerID: "coder.sess-coder"}, + }, + { + PeerID: remotePeerID, + Channel: "builders", + Local: false, + PeerCard: network.PeerCard{ + PeerID: remotePeerID, + DisplayName: &displayName, + }, + }, + }, nil + }, + } + fixture.Handlers.NetworkStore = testutil.StubNetworkStore{ + ListNetworkAuditFn: func(_ context.Context, query store.NetworkAuditQuery) ([]store.NetworkAuditEntry, error) { + if got, want := query.Channel, "builders"; got != want { + t.Fatalf("ListNetworkAudit() channel = %q, want %q", got, want) + } + return []store.NetworkAuditEntry{ + { + ID: "naud-1", + SessionID: localSessionID, + Direction: network.AuditDirectionReceived, + Kind: "say", + Channel: "builders", + PeerFrom: remotePeerID, + MessageID: "msg-remote-01", + Size: 1, + Timestamp: createdAt.Add(time.Minute), + }, + { + ID: "naud-2", + SessionID: localSessionID, + Direction: network.AuditDirectionDelivered, + Kind: "say", + Channel: "builders", + PeerFrom: remotePeerID, + MessageID: "msg-remote-01", + Size: 1, + Timestamp: createdAt.Add(2 * time.Minute), + }, + { + ID: "naud-3", + SessionID: localSessionID, + Direction: network.AuditDirectionSent, + Kind: "say", + Channel: "builders", + PeerFrom: "coder.sess-coder", + MessageID: "msg-local-01", + Size: 1, + Timestamp: createdAt.Add(3 * time.Minute), + }, + }, nil + }, + ListNetworkMessagesFn: func(_ context.Context, query store.NetworkMessageQuery) ([]store.NetworkMessageEntry, error) { + if got, want := query.Channel, "builders"; got != want { + t.Fatalf("ListNetworkMessages() channel = %q, want %q", got, want) + } + return []store.NetworkMessageEntry{ + { + MessageID: "msg-remote-01", + SessionID: localSessionID, + Channel: "builders", + PeerFrom: remotePeerID, + Kind: "say", + Intent: "review", + Text: "Please double-check the rollout.", + Timestamp: createdAt.Add(time.Minute), + }, + { + MessageID: "msg-local-01", + SessionID: localSessionID, + Channel: "builders", + PeerFrom: "coder.sess-coder", + Kind: "say", + Intent: "announce", + Text: "Starting rollout now.", + Timestamp: createdAt.Add(3 * time.Minute), + }, + }, nil + }, + } + + resp := performRequest(t, fixture.Engine, http.MethodGet, "/network/channels/builders/messages", nil) + if resp.Code != http.StatusOK { + t.Fatalf("channel messages code = %d, want %d", resp.Code, http.StatusOK) + } + + var payload contract.NetworkChannelMessagesResponse + testutil.DecodeJSONResponse(t, resp, &payload) + if got, want := len(payload.Messages), 2; got != want { + t.Fatalf("len(messages) = %d, want %d", got, want) + } + + if got, want := payload.Messages[0].DisplayName, "Reviewer"; got != want { + t.Fatalf("remote display_name = %q, want %q", got, want) + } + if payload.Messages[0].Local { + t.Fatal("remote message local = true, want false") + } + if got := payload.Messages[0].SessionID; got != "" { + t.Fatalf("remote session_id = %q, want empty", got) + } + + if got, want := payload.Messages[1].DisplayName, "Coder"; got != want { + t.Fatalf("local display_name = %q, want %q", got, want) + } + if !payload.Messages[1].Local { + t.Fatal("local message local = false, want true") + } + if got, want := payload.Messages[1].SessionID, localSessionID; got != want { + t.Fatalf("local session_id = %q, want %q", got, want) + } + }) +} + +func TestBaseHandlersCreateNetworkChannelCreatesSessionsPerAgent(t *testing.T) { + t.Parallel() + + t.Run("Should create one session per requested agent and return the aggregated channel payload", func(t *testing.T) { + var createCalls []session.CreateOpts + manager := testutil.StubSessionManager{ + CreateFn: func(_ context.Context, opts session.CreateOpts) (*session.Session, error) { + createCalls = append(createCalls, opts) + return &session.Session{ + ID: "sess-" + opts.AgentName, + Name: strings.ToUpper(opts.AgentName), + AgentName: opts.AgentName, + WorkspaceID: opts.Workspace, + Channel: opts.Channel, + Type: session.SessionTypeUser, + State: session.StateActive, + CreatedAt: time.Date(2026, 4, 11, 18, 0, 0, 0, time.UTC), + UpdatedAt: time.Date(2026, 4, 11, 18, 0, 0, 0, time.UTC), + }, nil + }, + ListAllFn: func(_ context.Context) ([]*session.SessionInfo, error) { + infos := make([]*session.SessionInfo, 0, len(createCalls)) + for _, call := range createCalls { + infos = append(infos, &session.SessionInfo{ + ID: "sess-" + call.AgentName, + Name: strings.ToUpper(call.AgentName), + AgentName: call.AgentName, + WorkspaceID: call.Workspace, + Channel: call.Channel, + Type: session.SessionTypeUser, + State: session.StateActive, + CreatedAt: time.Date(2026, 4, 11, 18, 0, 0, 0, time.UTC), + UpdatedAt: time.Date(2026, 4, 11, 18, 0, 0, 0, time.UTC), + }) + } + return infos, nil + }, + } + workspaces := testutil.StubWorkspaceService{ + ResolveFn: func(_ context.Context, ref string) (workspacepkg.ResolvedWorkspace, error) { + if ref != "ws-1" { + t.Fatalf("Resolve() ref = %q, want ws-1", ref) + } + return workspacepkg.ResolvedWorkspace{ + Workspace: workspacepkg.Workspace{ID: "ws-1", Name: "Workspace"}, + Agents: []aghconfig.AgentDef{ + {Name: "coder"}, + {Name: "reviewer"}, + }, + }, nil + }, + } + fixture := newHandlerFixture(t, manager, testutil.StubObserver{}, workspaces, nil, nil) + fixture.Handlers.Config.Network.Enabled = true + fixture.Handlers.Network = testutil.StubNetworkService{ + ListPeersFn: func(_ context.Context, channel string) ([]network.PeerInfo, error) { + if channel != "builders" { + return nil, nil + } + coderSessionID := "sess-coder" + reviewerSessionID := "sess-reviewer" + return []network.PeerInfo{ + { + SessionID: &coderSessionID, + PeerID: "coder.sess-coder", + Channel: "builders", + Local: true, + PeerCard: network.PeerCard{PeerID: "coder.sess-coder"}, + }, + { + SessionID: &reviewerSessionID, + PeerID: "reviewer.sess-reviewer", + Channel: "builders", + Local: true, + PeerCard: network.PeerCard{PeerID: "reviewer.sess-reviewer"}, + }, + }, nil + }, + } + fixture.Handlers.NetworkStore = testutil.StubNetworkStore{ + ListNetworkMessagesFn: func(_ context.Context, query store.NetworkMessageQuery) ([]store.NetworkMessageEntry, error) { + if query.Channel != "builders" { + return nil, nil + } + return nil, nil + }, + } + + resp := performRequest( + t, + fixture.Engine, + http.MethodPost, + "/network/channels", + []byte(`{"channel":"builders","workspace_id":"ws-1","agent_names":["coder","reviewer"]}`), + ) + if resp.Code != http.StatusCreated { + t.Fatalf("create channel code = %d, want %d; body=%s", resp.Code, http.StatusCreated, resp.Body.String()) + } + + if got, want := len(createCalls), 2; got != want { + t.Fatalf("len(createCalls) = %d, want %d", got, want) + } + expectedAgents := map[string]struct{}{ + "coder": {}, + "reviewer": {}, + } + for _, call := range createCalls { + if _, ok := expectedAgents[call.AgentName]; !ok { + t.Fatalf("Create() agent = %q, want coder/reviewer", call.AgentName) + } + delete(expectedAgents, call.AgentName) + if got, want := call.Workspace, "ws-1"; got != want { + t.Fatalf("Create() workspace = %q, want %q", got, want) + } + if got, want := call.Channel, "builders"; got != want { + t.Fatalf("Create() channel = %q, want %q", got, want) + } + } + if len(expectedAgents) != 0 { + t.Fatalf("missing Create() calls for agents: %#v", expectedAgents) + } + + var payload contract.CreateNetworkChannelResponse + testutil.DecodeJSONResponse(t, resp, &payload) + if got, want := payload.Channel.SessionCount, 2; got != want { + t.Fatalf("payload.Channel.SessionCount = %d, want %d", got, want) + } + }) +} + +func TestBaseHandlersNetworkPeerDetailUsesAuditMetrics(t *testing.T) { + t.Parallel() + + t.Run("Should derive peer metrics from persisted audit history", func(t *testing.T) { + coderSessionID := "sess-coder" + manager := testutil.StubSessionManager{ + ListAllFn: func(context.Context) ([]*session.SessionInfo, error) { + return []*session.SessionInfo{{ + ID: coderSessionID, + Name: "Coder", + AgentName: "coder", + WorkspaceID: "ws-1", + Channel: "builders", + Type: session.SessionTypeUser, + State: session.StateActive, + CreatedAt: time.Date(2026, 4, 11, 18, 0, 0, 0, time.UTC), + UpdatedAt: time.Date(2026, 4, 11, 18, 0, 0, 0, time.UTC), + }}, nil + }, + } + fixture := newHandlerFixture(t, manager, testutil.StubObserver{}, testutil.StubWorkspaceService{}, nil, nil) + fixture.Handlers.Config.Network.Enabled = true + fixture.Handlers.Network = testutil.StubNetworkService{ + ListPeersFn: func(_ context.Context, channel string) ([]network.PeerInfo, error) { + if channel != "" { + t.Fatalf("ListPeers() channel = %q, want empty filter", channel) + } + return []network.PeerInfo{{ + SessionID: &coderSessionID, + PeerID: "coder.sess-coder", + Channel: "builders", + Local: true, + PeerCard: network.PeerCard{PeerID: "coder.sess-coder"}, + }}, nil + }, + } + fixture.Handlers.NetworkStore = testutil.StubNetworkStore{ + ListNetworkAuditFn: func(_ context.Context, query store.NetworkAuditQuery) ([]store.NetworkAuditEntry, error) { + if query.SessionID != coderSessionID { + t.Fatalf("ListNetworkAudit() session_id = %q, want %q", query.SessionID, coderSessionID) + } + return []store.NetworkAuditEntry{ + {SessionID: coderSessionID, Direction: network.AuditDirectionSent, Kind: "say", Channel: "builders", PeerFrom: "coder.sess-coder", MessageID: "msg-1", Size: 1}, + {SessionID: coderSessionID, Direction: network.AuditDirectionReceived, Kind: "direct", Channel: "builders", PeerFrom: "reviewer.sess-remote", MessageID: "msg-2", Size: 1}, + {SessionID: coderSessionID, Direction: network.AuditDirectionDelivered, Kind: "say", Channel: "builders", PeerFrom: "coder.sess-coder", MessageID: "msg-1", Size: 1}, + {SessionID: coderSessionID, Direction: network.AuditDirectionRejected, Kind: "receipt", Channel: "builders", PeerFrom: "reviewer.sess-remote", MessageID: "msg-3", Reason: "busy", Size: 1}, + }, nil + }, + } + + resp := performRequest(t, fixture.Engine, http.MethodGet, "/network/peers/coder.sess-coder", nil) + if resp.Code != http.StatusOK { + t.Fatalf("peer detail code = %d, want %d", resp.Code, http.StatusOK) + } + + var payload contract.NetworkPeerResponse + testutil.DecodeJSONResponse(t, resp, &payload) + if got, want := payload.Peer.DisplayName, "Coder"; got != want { + t.Fatalf("payload.Peer.DisplayName = %q, want %q", got, want) + } + if got, want := payload.Peer.Metrics.Sent, int64(1); got != want { + t.Fatalf("payload.Peer.Metrics.Sent = %d, want %d", got, want) + } + if got, want := payload.Peer.Metrics.Received, int64(1); got != want { + t.Fatalf("payload.Peer.Metrics.Received = %d, want %d", got, want) + } + if got, want := payload.Peer.Metrics.Delivered, int64(1); got != want { + t.Fatalf("payload.Peer.Metrics.Delivered = %d, want %d", got, want) + } + if got, want := payload.Peer.Metrics.Rejected, int64(1); got != want { + t.Fatalf("payload.Peer.Metrics.Rejected = %d, want %d", got, want) + } + }) +} + func timePtr(value time.Time) *time.Time { return &value } diff --git a/internal/api/core/session_workspace.go b/internal/api/core/session_workspace.go index 183d22e12..10f72c95a 100644 --- a/internal/api/core/session_workspace.go +++ b/internal/api/core/session_workspace.go @@ -125,6 +125,8 @@ func statusForSessionError(err error) int { return http.StatusNotFound case errors.Is(err, workspacepkg.ErrWorkspaceRootMissing): return http.StatusGone + case errors.Is(err, workspacepkg.ErrAgentNotAvailable): + return http.StatusBadRequest case errors.Is(err, session.ErrSessionNotActive): return http.StatusBadRequest case errors.Is(err, session.ErrMaxSessionsReached), diff --git a/internal/api/core/test_helpers_test.go b/internal/api/core/test_helpers_test.go index 5fdc530e7..b65a4bddc 100644 --- a/internal/api/core/test_helpers_test.go +++ b/internal/api/core/test_helpers_test.go @@ -134,7 +134,11 @@ func newHandlerFixtureWithAutomation( engine.GET("/daemon/status", handlers.DaemonStatus) engine.GET("/network/status", handlers.NetworkStatus) engine.GET("/network/peers", handlers.NetworkPeers) + engine.GET("/network/peers/:peer_id", handlers.NetworkPeer) engine.GET("/network/channels", handlers.NetworkChannels) + engine.POST("/network/channels", handlers.CreateNetworkChannel) + engine.GET("/network/channels/:channel", handlers.NetworkChannel) + engine.GET("/network/channels/:channel/messages", handlers.NetworkChannelMessages) engine.POST("/network/send", handlers.NetworkSend) engine.GET("/network/inbox", handlers.NetworkInbox) engine.GET("/memory", handlers.ListMemory) diff --git a/internal/api/httpapi/bridges_integration_test.go b/internal/api/httpapi/bridges_integration_test.go index fb9710f55..0c8dd30f7 100644 --- a/internal/api/httpapi/bridges_integration_test.go +++ b/internal/api/httpapi/bridges_integration_test.go @@ -301,6 +301,9 @@ func TestHTTPBridgeDetailReportsBacklogAndClearsAfterDeliveryCompletes(t *testin if bridge.Health.DeliveryBacklog != 0 { t.Fatalf("bridge.health.delivery_backlog = %d, want 0", bridge.Health.DeliveryBacklog) } + if bridge.Health.LastSuccessAt == nil { + t.Fatal("bridge.health.last_success_at = nil, want successful delivery timestamp") + } } func createIntegrationBridge(t *testing.T, runtime integrationRuntime, req bridgepkg.CreateInstanceRequest) *bridgepkg.BridgeInstance { diff --git a/internal/api/httpapi/bridges_test.go b/internal/api/httpapi/bridges_test.go index c7d5dd25a..d44687ba6 100644 --- a/internal/api/httpapi/bridges_test.go +++ b/internal/api/httpapi/bridges_test.go @@ -22,6 +22,39 @@ func TestBridgeHandlersShouldHandleBridgeRoutes(t *testing.T) { bridges stubBridgeService assert func(t *testing.T, recorder *httptest.ResponseRecorder) }{ + { + name: "ShouldListBridgeProviders", + method: http.MethodGet, + path: "/api/bridges/providers", + bridges: stubBridgeService{ + ListProvidersFn: func(context.Context) ([]bridgepkg.BridgeProvider, error) { + return []bridgepkg.BridgeProvider{{ + Platform: "telegram", + ExtensionName: "telegram-reference", + DisplayName: "Telegram", + Description: "Reference Telegram bridge adapter", + Enabled: true, + State: "active", + Health: "healthy", + }}, nil + }, + }, + assert: func(t *testing.T, recorder *httptest.ResponseRecorder) { + t.Helper() + if recorder.Code != http.StatusOK { + t.Fatalf("status = %d, want %d; body=%s", recorder.Code, http.StatusOK, recorder.Body.String()) + } + + var response contract.BridgeProvidersResponse + decodeJSONResponse(t, recorder, &response) + if got, want := len(response.Providers), 1; got != want { + t.Fatalf("len(providers) = %d, want %d", got, want) + } + if response.Providers[0].ExtensionName != "telegram-reference" { + t.Fatalf("provider = %#v", response.Providers[0]) + } + }, + }, { name: "ShouldCreateBridgeInstance", method: http.MethodPost, diff --git a/internal/api/httpapi/handlers.go b/internal/api/httpapi/handlers.go index fc9d11e4d..e6cffca6c 100644 --- a/internal/api/httpapi/handlers.go +++ b/internal/api/httpapi/handlers.go @@ -13,6 +13,7 @@ import ( type handlerConfig struct { sessions core.SessionManager network core.NetworkService + networkStore core.NetworkStore observer core.Observer automation core.AutomationManager bridges core.BridgeService @@ -52,6 +53,7 @@ func newHandlers(cfg handlerConfig) *Handlers { IncludeSessionWorkspaceInSSE: false, Sessions: cfg.sessions, Network: cfg.network, + NetworkStore: cfg.networkStore, Observer: cfg.observer, Automation: cfg.automation, Bridges: cfg.bridges, diff --git a/internal/api/httpapi/handlers_test.go b/internal/api/httpapi/handlers_test.go index 1fc4b0fea..68d3a0b8d 100644 --- a/internal/api/httpapi/handlers_test.go +++ b/internal/api/httpapi/handlers_test.go @@ -55,6 +55,7 @@ func TestRegisterRoutesCoversTechSpecEndpoints(t *testing.T) { "GET /api/bridges", "GET /api/bridges/:id", "GET /api/bridges/:id/routes", + "GET /api/bridges/providers", "GET /api/daemon/status", "GET /api/hooks/catalog", "GET /api/hooks/events", @@ -63,7 +64,10 @@ func TestRegisterRoutesCoversTechSpecEndpoints(t *testing.T) { "GET /api/memory/:filename", "GET /api/network/inbox", "GET /api/network/peers", + "GET /api/network/peers/:peer_id", "GET /api/network/channels", + "GET /api/network/channels/:channel", + "GET /api/network/channels/:channel/messages", "GET /api/network/status", "GET /api/observe/events", "GET /api/observe/events/stream", @@ -87,6 +91,7 @@ func TestRegisterRoutesCoversTechSpecEndpoints(t *testing.T) { "POST /api/automation/jobs/:id/trigger", "POST /api/automation/triggers", "POST /api/memory/consolidate", + "POST /api/network/channels", "POST /api/bridges", "POST /api/bridges/:id/disable", "POST /api/bridges/:id/enable", diff --git a/internal/api/httpapi/httpapi_integration_test.go b/internal/api/httpapi/httpapi_integration_test.go index b850b64f6..5f2af8c0a 100644 --- a/internal/api/httpapi/httpapi_integration_test.go +++ b/internal/api/httpapi/httpapi_integration_test.go @@ -1014,6 +1014,10 @@ func (s *integrationBridgeService) RestartInstance(ctx context.Context, id strin return instance, nil } +func (s *integrationBridgeService) ListProviders(context.Context) ([]bridgepkg.BridgeProvider, error) { + return []bridgepkg.BridgeProvider{}, nil +} + func (s *integrationBridgeService) DeliveryMetrics() map[string]bridgepkg.BridgeDeliveryMetrics { if s == nil || s.broker == nil { return nil diff --git a/internal/api/httpapi/routes.go b/internal/api/httpapi/routes.go index d26dc18ae..7c7c71753 100644 --- a/internal/api/httpapi/routes.go +++ b/internal/api/httpapi/routes.go @@ -32,6 +32,7 @@ func registerBridgeRoutes(api gin.IRouter, handlers *Handlers) { bridges := api.Group("/bridges") bridges.GET("", handlers.ListBridges) bridges.POST("", handlers.CreateBridge) + bridges.GET("/providers", handlers.ListBridgeProviders) bridges.GET("/:id", handlers.GetBridge) bridges.PATCH("/:id", handlers.UpdateBridge) bridges.POST("/:id/enable", handlers.EnableBridge) @@ -138,7 +139,11 @@ func registerNetworkRoutes(api gin.IRouter, handlers *Handlers) { networkGroup := api.Group("/network") networkGroup.GET("/status", handlers.NetworkStatus) networkGroup.GET("/peers", handlers.NetworkPeers) + networkGroup.GET("/peers/:peer_id", handlers.NetworkPeer) networkGroup.GET("/channels", handlers.NetworkChannels) + networkGroup.POST("/channels", handlers.CreateNetworkChannel) + networkGroup.GET("/channels/:channel", handlers.NetworkChannel) + networkGroup.GET("/channels/:channel/messages", handlers.NetworkChannelMessages) networkGroup.POST("/send", handlers.NetworkSend) networkGroup.GET("/inbox", handlers.NetworkInbox) } diff --git a/internal/api/httpapi/server.go b/internal/api/httpapi/server.go index 2934d89bc..543f2242d 100644 --- a/internal/api/httpapi/server.go +++ b/internal/api/httpapi/server.go @@ -42,6 +42,7 @@ type Server struct { pollInterval time.Duration sessions core.SessionManager network core.NetworkService + networkStore core.NetworkStore observer core.Observer automation core.AutomationManager bridges core.BridgeService @@ -132,6 +133,13 @@ func WithNetworkService(service core.NetworkService) Option { } } +// WithNetworkStore injects the persisted network query store. +func WithNetworkStore(store core.NetworkStore) Option { + return func(server *Server) { + server.networkStore = store + } +} + // WithObserver injects the runtime observer. func WithObserver(observer core.Observer) Option { return func(server *Server) { @@ -271,6 +279,7 @@ func New(opts ...Option) (*Server, error) { server.handlers = newHandlers(handlerConfig{ sessions: server.sessions, network: server.network, + networkStore: server.networkStore, observer: server.observer, automation: server.automation, bridges: server.bridges, diff --git a/internal/api/spec/spec.go b/internal/api/spec/spec.go index 88319f36e..3ecc7b3c4 100644 --- a/internal/api/spec/spec.go +++ b/internal/api/spec/spec.go @@ -87,6 +87,7 @@ func Document() (*openapi3.T, error) { {Name: "automation"}, {Name: "bridges"}, {Name: "daemon"}, + {Name: "network"}, {Name: "extensions"}, {Name: "hooks"}, {Name: "memory"}, @@ -501,6 +502,19 @@ func Operations() []OperationSpec { {Status: 500, Description: "Internal server error", Body: contract.ErrorPayload{}}, }, }, + { + Method: "GET", + Path: "/api/bridges/providers", + OperationID: "listBridgeProviders", + Summary: "List installed bridge-capable providers", + Tags: []string{"bridges"}, + Transports: []Transport{TransportHTTP, TransportUDS}, + Responses: []ResponseSpec{ + {Status: 200, Description: "OK", Body: contract.BridgeProvidersResponse{}}, + {Status: 503, Description: "Bridge service is not configured", Body: contract.ErrorPayload{}}, + {Status: 500, Description: "Internal server error", Body: contract.ErrorPayload{}}, + }, + }, { Method: "GET", Path: "/api/bridges/{id}", @@ -640,6 +654,152 @@ func Operations() []OperationSpec { {Status: 500, Description: "Internal server error", Body: contract.ErrorPayload{}}, }, }, + { + Method: "GET", + Path: "/api/network/status", + OperationID: "getNetworkStatus", + Summary: "Get the network runtime status snapshot", + Tags: []string{"network"}, + Transports: []Transport{TransportHTTP, TransportUDS}, + Responses: []ResponseSpec{ + {Status: 200, Description: "OK", Body: contract.NetworkStatusResponse{}}, + {Status: 500, Description: "Internal server error", Body: contract.ErrorPayload{}}, + }, + }, + { + Method: "GET", + Path: "/api/network/peers", + OperationID: "listNetworkPeers", + Summary: "List visible network peers", + Tags: []string{"network"}, + Transports: []Transport{TransportHTTP, TransportUDS}, + Parameters: []ParameterSpec{ + queryParam("channel", "Filter peers by channel", false), + }, + Responses: []ResponseSpec{ + {Status: 200, Description: "OK", Body: contract.NetworkPeersResponse{}}, + {Status: 400, Description: "Invalid network filter", Body: contract.ErrorPayload{}}, + {Status: 503, Description: "Network runtime is not configured", Body: contract.ErrorPayload{}}, + {Status: 500, Description: "Internal server error", Body: contract.ErrorPayload{}}, + }, + }, + { + Method: "GET", + Path: "/api/network/peers/{peer_id}", + OperationID: "getNetworkPeer", + Summary: "Get one visible network peer detail", + Tags: []string{"network"}, + Transports: []Transport{TransportHTTP, TransportUDS}, + Parameters: []ParameterSpec{ + pathParam("peer_id", "Network peer id"), + }, + Responses: []ResponseSpec{ + {Status: 200, Description: "OK", Body: contract.NetworkPeerResponse{}}, + {Status: 404, Description: "Network peer not found", Body: contract.ErrorPayload{}}, + {Status: 503, Description: "Network runtime is not configured", Body: contract.ErrorPayload{}}, + {Status: 500, Description: "Internal server error", Body: contract.ErrorPayload{}}, + }, + }, + { + Method: "GET", + Path: "/api/network/channels", + OperationID: "listNetworkChannels", + Summary: "List materialized network channels", + Tags: []string{"network"}, + Transports: []Transport{TransportHTTP, TransportUDS}, + Responses: []ResponseSpec{ + {Status: 200, Description: "OK", Body: contract.NetworkChannelsResponse{}}, + {Status: 503, Description: "Network runtime is not configured", Body: contract.ErrorPayload{}}, + {Status: 500, Description: "Internal server error", Body: contract.ErrorPayload{}}, + }, + }, + { + Method: "POST", + Path: "/api/network/channels", + OperationID: "createNetworkChannel", + Summary: "Create a network channel by spawning agent sessions", + Tags: []string{"network"}, + Transports: []Transport{TransportHTTP, TransportUDS}, + RequestBody: contract.CreateNetworkChannelRequest{}, + Responses: []ResponseSpec{ + {Status: 201, Description: "Created", Body: contract.CreateNetworkChannelResponse{}}, + {Status: 400, Description: "Invalid network channel request", Body: contract.ErrorPayload{}}, + {Status: 404, Description: "Workspace not found", Body: contract.ErrorPayload{}}, + {Status: 503, Description: "Network runtime is not configured", Body: contract.ErrorPayload{}}, + {Status: 500, Description: "Internal server error", Body: contract.ErrorPayload{}}, + }, + }, + { + Method: "GET", + Path: "/api/network/channels/{channel}", + OperationID: "getNetworkChannel", + Summary: "Get one network channel detail", + Tags: []string{"network"}, + Transports: []Transport{TransportHTTP, TransportUDS}, + Parameters: []ParameterSpec{ + pathParam("channel", "Network channel"), + }, + Responses: []ResponseSpec{ + {Status: 200, Description: "OK", Body: contract.NetworkChannelResponse{}}, + {Status: 400, Description: "Invalid network channel", Body: contract.ErrorPayload{}}, + {Status: 404, Description: "Network channel not found", Body: contract.ErrorPayload{}}, + {Status: 503, Description: "Network runtime is not configured", Body: contract.ErrorPayload{}}, + {Status: 500, Description: "Internal server error", Body: contract.ErrorPayload{}}, + }, + }, + { + Method: "GET", + Path: "/api/network/channels/{channel}/messages", + OperationID: "listNetworkChannelMessages", + Summary: "List the read-only timeline for one network channel", + Tags: []string{"network"}, + Transports: []Transport{TransportHTTP, TransportUDS}, + Parameters: []ParameterSpec{ + pathParam("channel", "Network channel"), + intQueryParam("limit", "Maximum number of timeline messages to return", false), + }, + Responses: []ResponseSpec{ + {Status: 200, Description: "OK", Body: contract.NetworkChannelMessagesResponse{}}, + {Status: 400, Description: "Invalid network channel", Body: contract.ErrorPayload{}}, + {Status: 404, Description: "Network channel not found", Body: contract.ErrorPayload{}}, + {Status: 503, Description: "Network runtime is not configured", Body: contract.ErrorPayload{}}, + {Status: 500, Description: "Internal server error", Body: contract.ErrorPayload{}}, + }, + }, + { + Method: "POST", + Path: "/api/network/send", + OperationID: "sendNetworkMessage", + Summary: "Send one network message", + Tags: []string{"network"}, + Transports: []Transport{TransportHTTP, TransportUDS}, + RequestBody: contract.NetworkSendRequest{}, + Responses: []ResponseSpec{ + {Status: 200, Description: "OK", Body: contract.NetworkSendResponse{}}, + {Status: 400, Description: "Invalid network send request", Body: contract.ErrorPayload{}}, + {Status: 404, Description: "Network target not found", Body: contract.ErrorPayload{}}, + {Status: 503, Description: "Network runtime is not configured", Body: contract.ErrorPayload{}}, + {Status: 500, Description: "Internal server error", Body: contract.ErrorPayload{}}, + }, + }, + { + Method: "GET", + Path: "/api/network/inbox", + OperationID: "listNetworkInbox", + Summary: "List queued network inbox messages for one local session", + Tags: []string{"network"}, + Transports: []Transport{TransportHTTP, TransportUDS}, + Parameters: []ParameterSpec{ + queryParam("session_id", "Target local session id", true), + }, + Responses: []ResponseSpec{ + {Status: 200, Description: "OK", Body: contract.NetworkInboxResponse{}}, + {Status: 400, Description: "Invalid inbox request", Body: contract.ErrorPayload{}}, + {Status: 404, Description: "Network target not found", Body: contract.ErrorPayload{}}, + {Status: 503, Description: "Network runtime is not configured", Body: contract.ErrorPayload{}}, + {Status: 500, Description: "Internal server error", Body: contract.ErrorPayload{}}, + }, + }, { Method: "GET", Path: "/api/extensions", diff --git a/internal/api/spec/spec_test.go b/internal/api/spec/spec_test.go index b6c4f0a91..a9bfdf6dd 100644 --- a/internal/api/spec/spec_test.go +++ b/internal/api/spec/spec_test.go @@ -177,6 +177,29 @@ func TestDocumentTracksRequiredFieldsAndEnums(t *testing.T) { assertRequired(t, responseSchema, "status", "delivery_target") }, }, + { + name: "ShouldDescribeBridgeProvidersAndHealthTelemetry", + check: func(t *testing.T, doc *openapi3.T) { + t.Helper() + + providers := operationFor(t, doc, "/api/bridges/providers", "GET") + providersSchema := jsonResponseSchema(t, providers, 200) + assertRequired(t, providersSchema, "providers") + + providerItems := propertySchema(t, providersSchema, "providers") + if providerItems.Items == nil || providerItems.Items.Value == nil { + t.Fatal("expected providers to define an items schema") + } + providerSchema := providerItems.Items.Value + assertRequired(t, providerSchema, "platform", "extension_name", "display_name", "enabled", "state", "health") + assertNotRequired(t, providerSchema, "description", "health_message") + + getBridge := operationFor(t, doc, "/api/bridges/{id}", "GET") + getBridgeSchema := jsonResponseSchema(t, getBridge, 200) + healthSchema := propertySchema(t, getBridgeSchema, "health") + assertNotRequired(t, healthSchema, "last_success_at", "last_error", "last_error_at") + }, + }, } for _, tt := range tests { diff --git a/internal/api/testutil/apitest.go b/internal/api/testutil/apitest.go index 9ab1b0949..c3f51fde2 100644 --- a/internal/api/testutil/apitest.go +++ b/internal/api/testutil/apitest.go @@ -340,6 +340,11 @@ type StubNetworkService struct { InboxFn func(context.Context, string) ([]network.Envelope, error) } +type StubNetworkStore struct { + ListNetworkAuditFn func(context.Context, store.NetworkAuditQuery) ([]store.NetworkAuditEntry, error) + ListNetworkMessagesFn func(context.Context, store.NetworkMessageQuery) ([]store.NetworkMessageEntry, error) +} + func (s StubNetworkService) Send(ctx context.Context, req network.SendRequest) (string, error) { if s.SendFn != nil { return s.SendFn(ctx, req) @@ -375,6 +380,20 @@ func (s StubNetworkService) Inbox(ctx context.Context, sessionID string) ([]netw return nil, nil } +func (s StubNetworkStore) ListNetworkAudit(ctx context.Context, query store.NetworkAuditQuery) ([]store.NetworkAuditEntry, error) { + if s.ListNetworkAuditFn != nil { + return s.ListNetworkAuditFn(ctx, query) + } + return nil, nil +} + +func (s StubNetworkStore) ListNetworkMessages(ctx context.Context, query store.NetworkMessageQuery) ([]store.NetworkMessageEntry, error) { + if s.ListNetworkMessagesFn != nil { + return s.ListNetworkMessagesFn(ctx, query) + } + return nil, nil +} + func (s StubObserver) Health(ctx context.Context) (observe.Health, error) { if s.HealthFn != nil { return s.HealthFn(ctx) @@ -414,6 +433,7 @@ type StubBridgeService struct { CreateInstanceFn func(context.Context, bridgepkg.CreateInstanceRequest) (*bridgepkg.BridgeInstance, error) GetInstanceFn func(context.Context, string) (*bridgepkg.BridgeInstance, error) ListInstancesFn func(context.Context) ([]bridgepkg.BridgeInstance, error) + ListProvidersFn func(context.Context) ([]bridgepkg.BridgeProvider, error) UpdateInstanceFn func(context.Context, bridgepkg.UpdateInstanceRequest) (*bridgepkg.BridgeInstance, error) UpdateInstanceStateFn func(context.Context, bridgepkg.UpdateInstanceStateRequest) (*bridgepkg.BridgeInstance, error) BuildRoutingKeyFn func(context.Context, bridgepkg.RoutingKey) (bridgepkg.RoutingKey, error) @@ -450,6 +470,13 @@ func (s StubBridgeService) ListInstances(ctx context.Context) ([]bridgepkg.Bridg return nil, nil } +func (s StubBridgeService) ListProviders(ctx context.Context) ([]bridgepkg.BridgeProvider, error) { + if s.ListProvidersFn != nil { + return s.ListProvidersFn(ctx) + } + return nil, nil +} + func (s StubBridgeService) UpdateInstance(ctx context.Context, req bridgepkg.UpdateInstanceRequest) (*bridgepkg.BridgeInstance, error) { if s.UpdateInstanceFn != nil { return s.UpdateInstanceFn(ctx, req) @@ -785,6 +812,7 @@ func DiscardLogger() *slog.Logger { var _ core.SessionManager = (*StubSessionManager)(nil) var _ core.NetworkService = (*StubNetworkService)(nil) +var _ core.NetworkStore = (*StubNetworkStore)(nil) var _ core.Observer = (*StubObserver)(nil) var _ core.AutomationManager = (*StubAutomationManager)(nil) var _ core.WorkspaceService = (*StubWorkspaceService)(nil) diff --git a/internal/api/udsapi/bridges_test.go b/internal/api/udsapi/bridges_test.go index a8e546843..2abf1e39d 100644 --- a/internal/api/udsapi/bridges_test.go +++ b/internal/api/udsapi/bridges_test.go @@ -123,3 +123,48 @@ func TestListBridgeRoutesHandlerReturnsRequestedPayload(t *testing.T) { t.Fatalf("route = %#v", response.Routes[0]) } } + +func TestListBridgeProvidersHandlerReturnsRequestedPayload(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + }{ + {name: "Should return requested payload"}, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + homePaths := newTestHomePaths(t) + bridges := stubBridgeService{ + ListProvidersFn: func(_ context.Context) ([]bridgepkg.BridgeProvider, error) { + return []bridgepkg.BridgeProvider{{ + Platform: "telegram", + ExtensionName: "telegram-reference", + DisplayName: "Telegram", + Description: "Reference Telegram bridge adapter", + Enabled: true, + State: "active", + Health: "healthy", + }}, nil + }, + } + + engine := newTestRouter(t, newTestHandlersWithBridges(t, stubSessionManager{}, stubObserver{}, bridges, stubWorkspaceService{}, homePaths)) + recorder := performRequest(t, engine, http.MethodGet, "/api/bridges/providers", nil) + if recorder.Code != http.StatusOK { + t.Fatalf("status = %d, want %d; body=%s", recorder.Code, http.StatusOK, recorder.Body.String()) + } + + var response contract.BridgeProvidersResponse + decodeJSONResponse(t, recorder, &response) + if got, want := len(response.Providers), 1; got != want { + t.Fatalf("len(providers) = %d, want %d", got, want) + } + if response.Providers[0].ExtensionName != "telegram-reference" { + t.Fatalf("provider = %#v", response.Providers[0]) + } + }) + } +} diff --git a/internal/api/udsapi/handlers_test.go b/internal/api/udsapi/handlers_test.go index 9680d93a7..163077e02 100644 --- a/internal/api/udsapi/handlers_test.go +++ b/internal/api/udsapi/handlers_test.go @@ -95,6 +95,7 @@ func TestRegisterRoutesCoversTechSpecEndpoints(t *testing.T) { "GET /api/bridges", "GET /api/bridges/:id", "GET /api/bridges/:id/routes", + "GET /api/bridges/providers", "GET /api/daemon/status", "GET /api/extensions", "GET /api/extensions/:name", @@ -105,7 +106,10 @@ func TestRegisterRoutesCoversTechSpecEndpoints(t *testing.T) { "GET /api/memory/:filename", "GET /api/network/inbox", "GET /api/network/peers", + "GET /api/network/peers/:peer_id", "GET /api/network/channels", + "GET /api/network/channels/:channel", + "GET /api/network/channels/:channel/messages", "GET /api/network/status", "GET /api/observe/events", "GET /api/observe/events/stream", @@ -137,6 +141,7 @@ func TestRegisterRoutesCoversTechSpecEndpoints(t *testing.T) { "POST /api/extensions/:name/disable", "POST /api/extensions/:name/enable", "POST /api/memory/consolidate", + "POST /api/network/channels", "POST /api/network/send", "POST /api/sessions", "POST /api/sessions/:id/approve", diff --git a/internal/api/udsapi/routes.go b/internal/api/udsapi/routes.go index 095ee07c6..3dd592457 100644 --- a/internal/api/udsapi/routes.go +++ b/internal/api/udsapi/routes.go @@ -10,6 +10,7 @@ func RegisterRoutes(router gin.IRouter, handlers *Handlers) { { bridges.GET("", handlers.ListBridges) bridges.POST("", handlers.CreateBridge) + bridges.GET("/providers", handlers.ListBridgeProviders) bridges.GET("/:id", handlers.GetBridge) bridges.PATCH("/:id", handlers.UpdateBridge) bridges.POST("/:id/enable", handlers.EnableBridge) @@ -115,7 +116,11 @@ func RegisterRoutes(router gin.IRouter, handlers *Handlers) { { network.GET("/status", handlers.NetworkStatus) network.GET("/peers", handlers.NetworkPeers) + network.GET("/peers/:peer_id", handlers.NetworkPeer) network.GET("/channels", handlers.NetworkChannels) + network.POST("/channels", handlers.CreateNetworkChannel) + network.GET("/channels/:channel", handlers.NetworkChannel) + network.GET("/channels/:channel/messages", handlers.NetworkChannelMessages) network.POST("/send", handlers.NetworkSend) network.GET("/inbox", handlers.NetworkInbox) } diff --git a/internal/api/udsapi/server.go b/internal/api/udsapi/server.go index 7e56fb9ed..cfc671295 100644 --- a/internal/api/udsapi/server.go +++ b/internal/api/udsapi/server.go @@ -52,6 +52,7 @@ type Server struct { pollInterval time.Duration sessions core.SessionManager network core.NetworkService + networkStore core.NetworkStore observer core.Observer automation core.AutomationManager bridges core.BridgeService @@ -75,6 +76,7 @@ type Server struct { type handlerConfig struct { sessions core.SessionManager network core.NetworkService + networkStore core.NetworkStore observer core.Observer automation core.AutomationManager bridges core.BridgeService @@ -161,6 +163,13 @@ func WithNetworkService(service core.NetworkService) Option { } } +// WithNetworkStore injects the persisted network query store. +func WithNetworkStore(store core.NetworkStore) Option { + return func(server *Server) { + server.networkStore = store + } +} + // WithObserver injects the runtime observer. func WithObserver(observer core.Observer) Option { return func(server *Server) { @@ -297,6 +306,7 @@ func New(opts ...Option) (*Server, error) { server.handlers = newHandlers(handlerConfig{ sessions: server.sessions, network: server.network, + networkStore: server.networkStore, observer: server.observer, automation: server.automation, bridges: server.bridges, @@ -507,6 +517,7 @@ func newHandlers(cfg handlerConfig) *Handlers { IncludeSessionWorkspaceInSSE: true, Sessions: cfg.sessions, Network: cfg.network, + NetworkStore: cfg.networkStore, Observer: cfg.observer, Automation: cfg.automation, Bridges: cfg.bridges, diff --git a/internal/api/udsapi/udsapi_integration_test.go b/internal/api/udsapi/udsapi_integration_test.go index 683a85468..b5e3c53f5 100644 --- a/internal/api/udsapi/udsapi_integration_test.go +++ b/internal/api/udsapi/udsapi_integration_test.go @@ -587,6 +587,10 @@ func (s *integrationBridgeService) RestartInstance(ctx context.Context, id strin return instance, nil } +func (s *integrationBridgeService) ListProviders(context.Context) ([]bridgepkg.BridgeProvider, error) { + return nil, nil +} + func (t *integrationDreamTrigger) Trigger(context.Context, string) (bool, string, error) { t.calls++ return t.triggered, t.reason, nil diff --git a/internal/automation/schedule_test.go b/internal/automation/schedule_test.go index 50feca330..6bd954465 100644 --- a/internal/automation/schedule_test.go +++ b/internal/automation/schedule_test.go @@ -98,6 +98,7 @@ func TestSchedulerAtJobUnregistersAfterFiringOnce(t *testing.T) { waitForTimers(t, fakeClock, 1) fakeClock.Advance(1 * time.Minute) dispatcher.waitForDispatchCount(t, 1, 2*time.Second) + dispatcher.waitForCompletionCount(t, 1, 2*time.Second) if _, err := scheduler.State(job.ID); !errors.Is(err, ErrScheduledJobNotFound) { t.Fatalf("State() error = %v, want ErrScheduledJobNotFound", err) @@ -138,6 +139,7 @@ func TestSchedulerSingletonPreventsOverlap(t *testing.T) { dispatcher.assertDispatchCount(t, 1) dispatcher.releaseBlockedDispatch() + dispatcher.waitForCompletionCount(t, 1, 2*time.Second) waitForTimers(t, fakeClock, 1) fakeClock.Advance(1 * time.Second) dispatcher.waitForDispatchCount(t, 2, 2*time.Second) @@ -367,16 +369,20 @@ type stubScheduleDispatcher struct { blocked bool releaseCh chan struct{} dispatchedCh chan struct{} + completedCh chan struct{} dispatchResult error } func newStubScheduleDispatcher() *stubScheduleDispatcher { return &stubScheduleDispatcher{ dispatchedCh: make(chan struct{}, 32), + completedCh: make(chan struct{}, 32), } } func (d *stubScheduleDispatcher) Dispatch(ctx context.Context, req DispatchRequest) (*Run, error) { + defer notify(d.completedCh) + d.mu.Lock() d.calls = append(d.calls, req) releaseCh := d.releaseCh @@ -451,6 +457,21 @@ func (d *stubScheduleDispatcher) waitForDispatchCount(t *testing.T, want int, ti } } +func (d *stubScheduleDispatcher) waitForCompletionCount(t *testing.T, want int, timeout time.Duration) { + t.Helper() + + completed := 0 + deadline := time.After(timeout) + for completed < want { + select { + case <-deadline: + t.Fatalf("dispatch completion count did not reach %d within %s; got %d", want, timeout, completed) + case <-d.completedCh: + completed++ + } + } +} + func newTestScheduler(t *testing.T, dispatcher ScheduleDispatcher, opts ...SchedulerOption) *Scheduler { t.Helper() diff --git a/internal/bridges/delivery_broker.go b/internal/bridges/delivery_broker.go index 1802fb651..edec3c725 100644 --- a/internal/bridges/delivery_broker.go +++ b/internal/bridges/delivery_broker.go @@ -73,6 +73,7 @@ type instanceDeliveryMetrics struct { deliveryFailuresTotal int lastError string lastErrorAt time.Time + lastSuccessAt time.Time } // Broker projects session output into ordered delivery requests for one @@ -199,6 +200,7 @@ func (b *Broker) DeliveryMetrics() map[string]BridgeDeliveryMetrics { DeliveryFailuresTotal: metrics.deliveryFailuresTotal, LastError: metrics.lastError, LastErrorAt: metrics.lastErrorAt, + LastSuccessAt: metrics.lastSuccessAt, } } @@ -700,6 +702,7 @@ func (b *Broker) handleSendSuccess(route *routeWorker, deliveryID string, eventT } } delivery.updatedAt = b.now() + b.recordDeliverySuccessLocked(delivery.bridgeInstanceID, delivery.updatedAt) if delivery.final && !delivery.hasQueuedItems() { b.removeDeliveryLocked(route, delivery) @@ -983,6 +986,14 @@ func (b *Broker) recordDeliveryFailureLocked(bridgeInstanceID string, message st metrics.lastErrorAt = b.now() } +func (b *Broker) recordDeliverySuccessLocked(bridgeInstanceID string, deliveredAt time.Time) { + metrics := b.metricsLocked(bridgeInstanceID) + if metrics == nil { + return + } + metrics.lastSuccessAt = deliveredAt.UTC() +} + func (b *Broker) removeQueuedSlotLocked(route *routeWorker, deliveryID string, kind deliveryQueueKind) bool { if route == nil { return false diff --git a/internal/bridges/delivery_broker_test.go b/internal/bridges/delivery_broker_test.go index b23cda7a4..960d6a662 100644 --- a/internal/bridges/delivery_broker_test.go +++ b/internal/bridges/delivery_broker_test.go @@ -373,6 +373,7 @@ func TestBrokerSnapshotCapturesActiveDeliveryAfterFailure(t *testing.T) { func TestBrokerDeliveryMetricsReflectBacklogAndClearAfterAck(t *testing.T) { t.Parallel() + now := time.Date(2026, 4, 11, 12, 0, 0, 0, time.UTC) releaseStart := make(chan struct{}) transport := &fakeDeliveryTransport{ handler: func(ctx context.Context, _ string, req DeliveryRequest) (DeliveryAck, error) { @@ -386,7 +387,7 @@ func TestBrokerDeliveryMetricsReflectBacklogAndClearAfterAck(t *testing.T) { return DeliveryAck{DeliveryID: req.Event.DeliveryID, Seq: req.Event.Seq}, nil }, } - broker := NewBroker(transport) + broker := NewBroker(transport, WithDeliveryBrokerNow(func() time.Time { return now })) t.Cleanup(broker.Close) reg := mustRegisterTestDelivery(t, broker, PromptDeliveryRegistration{ @@ -422,6 +423,9 @@ func TestBrokerDeliveryMetricsReflectBacklogAndClearAfterAck(t *testing.T) { if got, want := metrics.DeliveryBacklog, 0; got != want { t.Fatalf("DeliveryMetrics().DeliveryBacklog after ack = %d, want %d", got, want) } + if got, want := metrics.LastSuccessAt, now; !got.Equal(want) { + t.Fatalf("DeliveryMetrics().LastSuccessAt = %s, want %s", got, want) + } } func TestBrokerDeliveryMetricsCaptureTerminalFailures(t *testing.T) { diff --git a/internal/bridges/delivery_metrics.go b/internal/bridges/delivery_metrics.go index 3ec024bb8..69a686789 100644 --- a/internal/bridges/delivery_metrics.go +++ b/internal/bridges/delivery_metrics.go @@ -12,4 +12,5 @@ type BridgeDeliveryMetrics struct { DeliveryFailuresTotal int `json:"delivery_failures_total"` LastError string `json:"last_error,omitempty"` LastErrorAt time.Time `json:"last_error_at,omitempty"` + LastSuccessAt time.Time `json:"last_success_at,omitempty"` } diff --git a/internal/bridges/types.go b/internal/bridges/types.go index d905b9cd9..fd891e125 100644 --- a/internal/bridges/types.go +++ b/internal/bridges/types.go @@ -128,6 +128,19 @@ func (p RoutingPolicy) Validate() error { return nil } +// BridgeProvider describes one installed bridge-capable extension that can be +// selected when creating a bridge instance. +type BridgeProvider struct { + Platform string `json:"platform"` + ExtensionName string `json:"extension_name"` + DisplayName string `json:"display_name"` + Description string `json:"description,omitempty"` + Enabled bool `json:"enabled"` + State string `json:"state"` + Health string `json:"health"` + HealthMessage string `json:"health_message,omitempty"` +} + // BridgeInstance is the authoritative persisted configuration for one bridge adapter instance. type BridgeInstance struct { ID string `json:"id"` diff --git a/internal/cli/cli_integration_test.go b/internal/cli/cli_integration_test.go index 0b1129aa6..e7f3cba18 100644 --- a/internal/cli/cli_integration_test.go +++ b/internal/cli/cli_integration_test.go @@ -1167,6 +1167,10 @@ func (s *integrationBridgeService) RestartInstance(ctx context.Context, id strin }) } +func (s *integrationBridgeService) ListProviders(context.Context) ([]bridgepkg.BridgeProvider, error) { + return []bridgepkg.BridgeProvider{}, nil +} + func (s *integrationExtensionService) List(ctx context.Context) ([]contract.ExtensionPayload, error) { infos, err := s.registry.List() if err != nil { diff --git a/internal/daemon/bridges.go b/internal/daemon/bridges.go index 048675197..1b7f51fdb 100644 --- a/internal/daemon/bridges.go +++ b/internal/daemon/bridges.go @@ -5,12 +5,14 @@ import ( "errors" "fmt" "log/slog" + "slices" "strings" "sync" "time" bridgepkg "github.com/pedronauck/agh/internal/bridges" extensionpkg "github.com/pedronauck/agh/internal/extension" + extensionprotocol "github.com/pedronauck/agh/internal/extension/protocol" "github.com/pedronauck/agh/internal/subprocess" ) @@ -38,6 +40,7 @@ type bridgeRuntime struct { *bridgepkg.Service store bridgeRuntimeStore + registry *extensionpkg.Registry secretResolver BridgeSecretResolver broker *bridgepkg.Broker logger *slog.Logger @@ -72,9 +75,15 @@ func newBridgeRuntime( now = func() time.Time { return time.Now().UTC() } } + var registry *extensionpkg.Registry + if dbSource, ok := store.(extensionDBSource); ok && dbSource.DB() != nil { + registry = extensionpkg.NewRegistry(dbSource.DB()) + } + return &bridgeRuntime{ Service: bridgepkg.NewRegistry(store, bridgepkg.WithNow(now)), store: store, + registry: registry, secretResolver: secretResolver, broker: bridgepkg.NewBroker(nil, bridgepkg.WithDeliveryBrokerNow(now)), logger: logger, @@ -134,6 +143,79 @@ func (r *bridgeRuntime) DeliveryMetrics() map[string]bridgepkg.BridgeDeliveryMet return r.broker.DeliveryMetrics() } +func (r *bridgeRuntime) ListProviders(ctx context.Context) ([]bridgepkg.BridgeProvider, error) { + if r == nil { + return nil, errors.New("daemon: bridge runtime is required") + } + if ctx == nil { + return nil, errors.New("daemon: list bridge providers context is required") + } + if err := ctx.Err(); err != nil { + return nil, err + } + if r.registry == nil { + return nil, nil + } + + infos, err := r.registry.List() + if err != nil { + return nil, fmt.Errorf("daemon: list bridge providers: %w", err) + } + + r.mu.RLock() + extensions := r.extensions + r.mu.RUnlock() + + providers := make([]bridgepkg.BridgeProvider, 0, len(infos)) + for _, info := range infos { + if !slices.Contains(info.Capabilities.Provides, extensionprotocol.CapabilityProvideBridgeAdapter) { + continue + } + + ext, err := loadExtensionSnapshot(r.registry, extensions, r.logger, info.Name) + if err != nil { + r.logger.Warn("daemon: skip invalid bridge provider extension", "extension_name", info.Name, "error", err) + continue + } + if ext == nil || ext.Manifest == nil { + r.logger.Warn("daemon: skip bridge provider with missing manifest", "extension_name", info.Name) + continue + } + + platform := strings.TrimSpace(ext.Manifest.Bridge.Platform) + displayName := strings.TrimSpace(ext.Manifest.Bridge.DisplayName) + if platform == "" { + r.logger.Warn("daemon: skip bridge provider with missing platform", "extension_name", info.Name) + continue + } + if displayName == "" { + r.logger.Warn("daemon: skip bridge provider with missing display name", "extension_name", info.Name) + continue + } + + description := strings.TrimSpace(ext.Manifest.Description) + status := extensionpkg.DescribeExtension(ext, extensions != nil, r.now()) + providers = append(providers, bridgepkg.BridgeProvider{ + Platform: platform, + ExtensionName: info.Name, + DisplayName: displayName, + Description: description, + Enabled: info.Enabled, + State: status.State, + Health: status.Health, + HealthMessage: status.HealthMessage, + }) + } + + slices.SortFunc(providers, func(left, right bridgepkg.BridgeProvider) int { + if byName := strings.Compare(left.DisplayName, right.DisplayName); byName != 0 { + return byName + } + return strings.Compare(left.ExtensionName, right.ExtensionName) + }) + return providers, nil +} + func (r *bridgeRuntime) Close() { if r == nil || r.broker == nil { return diff --git a/internal/daemon/bridges_test.go b/internal/daemon/bridges_test.go index 421c20ea0..e7b698029 100644 --- a/internal/daemon/bridges_test.go +++ b/internal/daemon/bridges_test.go @@ -3,6 +3,10 @@ package daemon import ( "context" "errors" + "fmt" + "os" + "path/filepath" + "strings" "sync" "testing" "time" @@ -76,6 +80,9 @@ func TestComposeBridgeRuntime(t *testing.T) { if runtime.store != db { t.Fatalf("composeBridgeRuntime(globaldb) store = %#v, want global db", runtime.store) } + if runtime.registry == nil { + t.Fatal("composeBridgeRuntime(globaldb) registry = nil, want extension registry") + } }) } @@ -275,6 +282,120 @@ func TestBridgeRuntimeCreateInstance(t *testing.T) { }) } +func TestBridgeRuntimeListProviders(t *testing.T) { + t.Run("ShouldProjectInstalledBridgeProvidersFromExtensionRegistry", func(t *testing.T) { + t.Parallel() + + db := openDaemonTestGlobalDB(t) + now := time.Date(2026, 4, 11, 12, 25, 0, 0, time.UTC) + runtime := newBridgeRuntime(db, discardLogger(), func() time.Time { return now }, nil) + if runtime == nil { + t.Fatal("newBridgeRuntime() = nil, want non-nil") + } + if runtime.registry == nil { + t.Fatal("runtime.registry = nil, want extension registry") + } + + bridgeInfo := mustInstallDaemonExtension(t, runtime.registry, daemonExtensionFixture{ + name: "telegram-reference", + description: "Reference Telegram bridge adapter", + capabilities: []string{"bridge.adapter"}, + bridgePlatform: "telegram", + bridgeDisplayName: "Telegram", + enabled: true, + }) + mustInstallDaemonExtension(t, runtime.registry, daemonExtensionFixture{ + name: "memory-only", + description: "Memory backend", + capabilities: []string{"memory.backend"}, + enabled: true, + }) + + runtime.setExtensionRuntime(&fakeExtensionRuntime{ + getExt: &extensionpkg.Extension{ + Info: *bridgeInfo, + Status: extensionpkg.ExtensionStatus{ + Name: bridgeInfo.Name, + Version: bridgeInfo.Version, + Source: bridgeInfo.Source, + Enabled: bridgeInfo.Enabled, + Registered: true, + Active: true, + Healthy: true, + HealthMessage: "connected", + LastStartedAt: now.Add(-time.Minute), + }, + }, + }) + + providers, err := runtime.ListProviders(testutil.Context(t)) + if err != nil { + t.Fatalf("ListProviders() error = %v", err) + } + if got, want := len(providers), 1; got != want { + t.Fatalf("len(providers) = %d, want %d", got, want) + } + if got, want := providers[0].Platform, "telegram"; got != want { + t.Fatalf("provider platform = %q, want %q", got, want) + } + if got, want := providers[0].DisplayName, "Telegram"; got != want { + t.Fatalf("provider display name = %q, want %q", got, want) + } + if got, want := providers[0].State, "active"; got != want { + t.Fatalf("provider state = %q, want %q", got, want) + } + if got, want := providers[0].Health, "healthy"; got != want { + t.Fatalf("provider health = %q, want %q", got, want) + } + if got, want := providers[0].HealthMessage, "connected"; got != want { + t.Fatalf("provider health message = %q, want %q", got, want) + } + }) + + t.Run("ShouldSkipBridgeProvidersWithUnreadableManifestSnapshots", func(t *testing.T) { + t.Parallel() + + db := openDaemonTestGlobalDB(t) + runtime := newBridgeRuntime(db, discardLogger(), func() time.Time { + return time.Date(2026, 4, 11, 12, 35, 0, 0, time.UTC) + }, nil) + if runtime == nil || runtime.registry == nil { + t.Fatal("newBridgeRuntime() missing registry") + } + + goodInfo := mustInstallDaemonExtension(t, runtime.registry, daemonExtensionFixture{ + name: "telegram-reference", + description: "Reference Telegram bridge adapter", + capabilities: []string{"bridge.adapter"}, + bridgePlatform: "telegram", + bridgeDisplayName: "Telegram", + enabled: true, + }) + badInfo := mustInstallDaemonExtension(t, runtime.registry, daemonExtensionFixture{ + name: "slack-broken", + description: "Broken Slack bridge adapter", + capabilities: []string{"bridge.adapter"}, + bridgePlatform: "slack", + bridgeDisplayName: "Slack", + enabled: true, + }) + if err := os.Remove(badInfo.ManifestPath); err != nil { + t.Fatalf("os.Remove(%s) error = %v", badInfo.ManifestPath, err) + } + + providers, err := runtime.ListProviders(testutil.Context(t)) + if err != nil { + t.Fatalf("ListProviders() error = %v", err) + } + if got, want := len(providers), 1; got != want { + t.Fatalf("len(providers) = %d, want %d", got, want) + } + if got, want := providers[0].ExtensionName, goodInfo.Name; got != want { + t.Fatalf("provider extension_name = %q, want %q", got, want) + } + }) +} + func TestBridgeRuntimeResolveBridgeRuntime(t *testing.T) { t.Run("ShouldResolveBoundSecrets", func(t *testing.T) { t.Parallel() @@ -759,6 +880,77 @@ func newBlockingReloadExtensionRuntime(reloadErr error) *blockingReloadExtension } } +type daemonExtensionFixture struct { + name string + description string + capabilities []string + bridgePlatform string + bridgeDisplayName string + enabled bool +} + +func mustInstallDaemonExtension( + t *testing.T, + registry *extensionpkg.Registry, + fixture daemonExtensionFixture, +) *extensionpkg.ExtensionInfo { + t.Helper() + + dir := t.TempDir() + manifestPath := filepath.Join(dir, "extension.toml") + if err := os.WriteFile(manifestPath, []byte(daemonExtensionManifest(fixture)), 0o644); err != nil { + t.Fatalf("os.WriteFile(%s) error = %v", manifestPath, err) + } + + manifest, err := extensionpkg.LoadManifest(dir) + if err != nil { + t.Fatalf("LoadManifest(%s) error = %v", dir, err) + } + checksum, err := extensionpkg.ComputeDirectoryChecksum(dir) + if err != nil { + t.Fatalf("ComputeDirectoryChecksum(%s) error = %v", dir, err) + } + if err := registry.Install(manifest, dir, checksum); err != nil { + t.Fatalf("Install(%s) error = %v", fixture.name, err) + } + if !fixture.enabled { + if err := registry.Disable(fixture.name); err != nil { + t.Fatalf("Disable(%s) error = %v", fixture.name, err) + } + } + + info, err := registry.Get(fixture.name) + if err != nil { + t.Fatalf("Get(%s) error = %v", fixture.name, err) + } + return info +} + +func daemonExtensionManifest(fixture daemonExtensionFixture) string { + var builder strings.Builder + + fmt.Fprintf(&builder, "[extension]\nname = %q\nversion = \"0.1.0\"\ndescription = %q\nmin_agh_version = \"0.5.0\"\n\n", fixture.name, fixture.description) + if len(fixture.capabilities) > 0 { + fmt.Fprintf(&builder, "[capabilities]\nprovides = [%s]\n\n", quotedStringList(fixture.capabilities)) + } + if fixture.bridgePlatform != "" || fixture.bridgeDisplayName != "" { + fmt.Fprintf(&builder, "[bridge]\nplatform = %q\ndisplay_name = %q\n", fixture.bridgePlatform, fixture.bridgeDisplayName) + } + return builder.String() +} + +func quotedStringList(values []string) string { + if len(values) == 0 { + return "" + } + + quoted := make([]string, 0, len(values)) + for _, value := range values { + quoted = append(quoted, fmt.Sprintf("%q", value)) + } + return strings.Join(quoted, ", ") +} + func (r *blockingReloadExtensionRuntime) Start(context.Context) error { return nil } diff --git a/internal/daemon/daemon.go b/internal/daemon/daemon.go index 36abcf4ef..d510525a0 100644 --- a/internal/daemon/daemon.go +++ b/internal/daemon/daemon.go @@ -56,6 +56,7 @@ type Observer interface { type Registry interface { observe.Registry store.NetworkAuditStore + store.NetworkMessageStore workspacepkg.WorkspaceStore } @@ -463,6 +464,7 @@ func (d *Daemon) applyDefaults() error { httpapi.WithStartedAt(deps.StartedAt), httpapi.WithSessionManager(deps.Sessions), httpapi.WithNetworkService(deps.Network), + httpapi.WithNetworkStore(deps.Registry), httpapi.WithObserver(deps.Observer), httpapi.WithAutomation(deps.Automation), httpapi.WithBridgeService(deps.Bridges), @@ -482,6 +484,7 @@ func (d *Daemon) applyDefaults() error { udsapi.WithStartedAt(deps.StartedAt), udsapi.WithSessionManager(deps.Sessions), udsapi.WithNetworkService(deps.Network), + udsapi.WithNetworkStore(deps.Registry), udsapi.WithObserver(deps.Observer), udsapi.WithAutomation(deps.Automation), udsapi.WithBridgeService(deps.Bridges), diff --git a/internal/daemon/daemon_test.go b/internal/daemon/daemon_test.go index 5e5fac8ec..a98bae71d 100644 --- a/internal/daemon/daemon_test.go +++ b/internal/daemon/daemon_test.go @@ -3102,6 +3102,14 @@ func (r *recordingRegistry) ListNetworkAudit(context.Context, store.NetworkAudit return nil, nil } +func (r *recordingRegistry) WriteNetworkMessage(context.Context, store.NetworkMessageEntry) error { + return nil +} + +func (r *recordingRegistry) ListNetworkMessages(context.Context, store.NetworkMessageQuery) ([]store.NetworkMessageEntry, error) { + return nil, nil +} + func (r *recordingRegistry) Close(context.Context) error { if r.onClose != nil { r.onClose() diff --git a/internal/daemon/extensions.go b/internal/daemon/extensions.go index 8083fae6e..6353aba88 100644 --- a/internal/daemon/extensions.go +++ b/internal/daemon/extensions.go @@ -142,15 +142,28 @@ func (s *daemonExtensionService) reload(ctx context.Context) error { } func (s *daemonExtensionService) lookup(name string) (*extensionpkg.Extension, error) { + return loadExtensionSnapshot(s.registry, s.runtime, s.logger, name) +} + +func loadExtensionSnapshot( + registry *extensionpkg.Registry, + runtime extensionRuntime, + logger *slog.Logger, + name string, +) (*extensionpkg.Extension, error) { + if registry == nil { + return nil, errors.New("daemon: extension registry is required") + } + trimmed := strings.TrimSpace(name) if trimmed == "" { return nil, errors.New("extension: extension name is required") } - if s.runtime != nil { - ext, err := s.runtime.Get(trimmed) + if runtime != nil { + ext, err := runtime.Get(trimmed) if err == nil { - s.populateManifest(ext) + populateExtensionManifest(logger, ext) return ext, nil } if !errors.Is(err, extensionpkg.ErrExtensionNotFound) { @@ -158,7 +171,7 @@ func (s *daemonExtensionService) lookup(name string) (*extensionpkg.Extension, e } } - info, err := s.registry.Get(trimmed) + info, err := registry.Get(trimmed) if err != nil { return nil, err } @@ -172,18 +185,20 @@ func (s *daemonExtensionService) lookup(name string) (*extensionpkg.Extension, e Enabled: info.Enabled, }, } - s.populateManifest(ext) + populateExtensionManifest(logger, ext) return ext, nil } -func (s *daemonExtensionService) populateManifest(ext *extensionpkg.Extension) { +func populateExtensionManifest(logger *slog.Logger, ext *extensionpkg.Extension) { if ext == nil || ext.Manifest != nil || strings.TrimSpace(ext.Info.ManifestPath) == "" { return } manifest, err := extensionpkg.LoadManifest(filepath.Dir(ext.Info.ManifestPath)) if err != nil { - s.logger.Debug("daemon: load extension manifest for status failed", "path", ext.Info.ManifestPath, "error", err) + if logger != nil { + logger.Debug("daemon: load extension manifest for status failed", "path", ext.Info.ManifestPath, "error", err) + } return } ext.Manifest = manifest diff --git a/internal/extension/manager_test.go b/internal/extension/manager_test.go index 83f517f91..f8b4abf51 100644 --- a/internal/extension/manager_test.go +++ b/internal/extension/manager_test.go @@ -1250,18 +1250,20 @@ type managerFixture struct { } type managerManifestOptions struct { - command string - args []string - withEnv map[string]string - withSkills bool - withAgents bool - withHooks bool - withMCP bool - minVersion string - capabilities []string - actions []string - security []string - shutdown time.Duration + command string + args []string + withEnv map[string]string + withSkills bool + withAgents bool + withHooks bool + withMCP bool + minVersion string + capabilities []string + actions []string + security []string + bridgePlatform string + bridgeDisplayName string + shutdown time.Duration } type fakeLauncher struct { @@ -1781,6 +1783,16 @@ func managerTestManifest(name string, opts managerManifestOptions) string { if len(security) == 0 { security = []string{"session.read"} } + bridgePlatform := opts.bridgePlatform + bridgeDisplayName := opts.bridgeDisplayName + if slices.Contains(capabilities, extensionprotocol.CapabilityProvideBridgeAdapter) { + if bridgePlatform == "" { + bridgePlatform = "telegram" + } + if bridgeDisplayName == "" { + bridgeDisplayName = "Telegram" + } + } var builder strings.Builder fmt.Fprintf(&builder, `[extension] @@ -1823,7 +1835,15 @@ args = ["--context", "prod"] [capabilities] provides = ` + tomlStringArray(capabilities) + ` -[actions] +`) + if bridgePlatform != "" || bridgeDisplayName != "" { + fmt.Fprintf(&builder, `[bridge] +platform = %q +display_name = %q + +`, bridgePlatform, bridgeDisplayName) + } + builder.WriteString(`[actions] requires = ` + tomlStringArray(actions) + ` [subprocess] diff --git a/internal/extension/manifest.go b/internal/extension/manifest.go index 34eed9520..6b36bbfaf 100644 --- a/internal/extension/manifest.go +++ b/internal/extension/manifest.go @@ -14,6 +14,7 @@ import ( "github.com/BurntSushi/toml" + extensionprotocol "github.com/pedronauck/agh/internal/extension/protocol" "github.com/pedronauck/agh/internal/version" ) @@ -44,6 +45,7 @@ type Manifest struct { Actions ActionsConfig `toml:"actions" json:"actions"` Subprocess SubprocessConfig `toml:"subprocess" json:"subprocess"` Security SecurityConfig `toml:"security" json:"security"` + Bridge BridgeConfig `toml:"bridge" json:"bridge"` } // ResourcesConfig declares static assets bundled with an extension. @@ -78,6 +80,12 @@ type SecurityConfig struct { Capabilities []string `toml:"capabilities,omitempty" json:"capabilities,omitempty"` } +// BridgeConfig declares provider metadata for bridge-capable extensions. +type BridgeConfig struct { + Platform string `toml:"platform,omitempty" json:"platform,omitempty"` + DisplayName string `toml:"display_name,omitempty" json:"display_name,omitempty"` +} + // HookConfig mirrors the hook declaration shape accepted from extension manifests. type HookConfig struct { Name string `toml:"name" json:"name"` @@ -162,6 +170,7 @@ type manifestDocument struct { Actions ActionsConfig `toml:"actions" json:"actions"` Subprocess SubprocessConfig `toml:"subprocess" json:"subprocess"` Security SecurityConfig `toml:"security" json:"security"` + Bridge BridgeConfig `toml:"bridge" json:"bridge"` } type manifestCore struct { @@ -230,6 +239,14 @@ func (m *Manifest) Validate() error { if err := validateDottedIdentifiers("security.capabilities", m.Security.Capabilities, true); err != nil { return err } + if providesCapability(m.Capabilities.Provides, extensionprotocol.CapabilityProvideBridgeAdapter) { + if err := requireField("bridge.platform", m.Bridge.Platform); err != nil { + return err + } + if err := requireField("bridge.display_name", m.Bridge.DisplayName); err != nil { + return err + } + } return nil } @@ -414,6 +431,7 @@ func (d manifestDocument) toManifest() (Manifest, error) { Actions: normalizeActionsConfig(d.Actions), Subprocess: normalizeSubprocessConfig(d.Subprocess), Security: normalizeSecurityConfig(d.Security), + Bridge: normalizeBridgeConfig(d.Bridge), } return manifest, nil } @@ -473,6 +491,13 @@ func normalizeSecurityConfig(cfg SecurityConfig) SecurityConfig { } } +func normalizeBridgeConfig(cfg BridgeConfig) BridgeConfig { + return BridgeConfig{ + Platform: strings.TrimSpace(cfg.Platform), + DisplayName: strings.TrimSpace(cfg.DisplayName), + } +} + func normalizeHooks(src []HookConfig) []HookConfig { if len(src) == 0 { return nil @@ -729,6 +754,15 @@ func validIdentifierPart(part string) bool { return true } +func providesCapability(values []string, want string) bool { + for _, value := range values { + if strings.TrimSpace(value) == strings.TrimSpace(want) { + return true + } + } + return false +} + type semanticVersion struct { core [3]int prerelease []prereleaseIdentifier diff --git a/internal/extension/manifest_test.go b/internal/extension/manifest_test.go index 8483800e7..e8e20b6c4 100644 --- a/internal/extension/manifest_test.go +++ b/internal/extension/manifest_test.go @@ -9,6 +9,7 @@ import ( "testing" "time" + extensionprotocol "github.com/pedronauck/agh/internal/extension/protocol" "github.com/pedronauck/agh/internal/version" ) @@ -463,6 +464,61 @@ func TestManifestValidate_RejectsInvalidActionName(t *testing.T) { } } +func TestManifestValidate_RequiresBridgeMetadataForBridgeAdapters(t *testing.T) { + withDaemonVersion(t, "0.6.0") + + t.Run("Should reject bridge adapters without platform metadata", func(t *testing.T) { + manifest := expectedManifest() + manifest.Capabilities.Provides = []string{extensionprotocol.CapabilityProvideBridgeAdapter} + + err := manifest.Validate() + if err == nil { + t.Fatal("Validate() error = nil, want ErrManifestInvalid") + } + if !errors.Is(err, ErrManifestInvalid) { + t.Fatalf("Validate() error = %v, want ErrManifestInvalid", err) + } + + var validationErr *ManifestValidationError + if !errors.As(err, &validationErr) { + t.Fatalf("Validate() error = %T, want *ManifestValidationError", err) + } + if validationErr.Field != "bridge.platform" { + t.Fatalf("validation field = %q, want %q", validationErr.Field, "bridge.platform") + } + }) + + t.Run("Should reject bridge adapters without display name metadata", func(t *testing.T) { + manifest := expectedManifest() + manifest.Capabilities.Provides = []string{extensionprotocol.CapabilityProvideBridgeAdapter} + manifest.Bridge.Platform = "telegram" + + err := manifest.Validate() + if err == nil { + t.Fatal("Validate() error = nil, want ErrManifestInvalid") + } + + var validationErr *ManifestValidationError + if !errors.As(err, &validationErr) { + t.Fatalf("Validate() error = %T, want *ManifestValidationError", err) + } + if validationErr.Field != "bridge.display_name" { + t.Fatalf("validation field = %q, want %q", validationErr.Field, "bridge.display_name") + } + }) + + t.Run("Should accept bridge adapters with complete bridge metadata", func(t *testing.T) { + manifest := expectedManifest() + manifest.Capabilities.Provides = []string{extensionprotocol.CapabilityProvideBridgeAdapter} + manifest.Bridge.Platform = "telegram" + manifest.Bridge.DisplayName = "Telegram" + + if err := manifest.Validate(); err != nil { + t.Fatalf("Validate() with bridge metadata error = %v", err) + } + }) +} + func TestManifestHelpers_ErrorFormattingAndDurationMethods(t *testing.T) { notFound := &ManifestNotFoundError{ Dir: "/tmp/ext", diff --git a/internal/network/audit.go b/internal/network/audit.go index 9f68162c6..840633853 100644 --- a/internal/network/audit.go +++ b/internal/network/audit.go @@ -21,6 +21,8 @@ const ( AuditDirectionReceived = "received" // AuditDirectionRejected records a rejected envelope. AuditDirectionRejected = "rejected" + // AuditDirectionDelivered records a completed local delivery. + AuditDirectionDelivered = "delivered" ) // AuditStore is the persistence surface consumed by the network audit writer. @@ -28,19 +30,26 @@ type AuditStore interface { WriteNetworkAudit(ctx context.Context, entry store.NetworkAuditEntry) error } +// MessageStore is the persistence surface consumed by the network timeline writer. +type MessageStore interface { + WriteNetworkMessage(ctx context.Context, entry store.NetworkMessageEntry) error +} + // AuditWriter records network activity into the configured sinks. type AuditWriter interface { RecordSent(ctx context.Context, sessionID string, envelope Envelope) error RecordReceived(ctx context.Context, sessionID string, envelope Envelope) error RecordRejected(ctx context.Context, sessionID string, envelope Envelope, reason string) error + RecordDelivered(ctx context.Context, sessionID string, envelope Envelope) error } // FileAuditWriter writes normalized network audit records to a JSONL file and // optionally mirrors them into a persistent store. type FileAuditWriter struct { - path string - store AuditStore - now func() time.Time + path string + store AuditStore + messageStore MessageStore + now func() time.Time mu sync.Mutex } @@ -58,9 +67,21 @@ func NewAuditWriter(path string, auditStore AuditStore) (*FileAuditWriter, error now: func() time.Time { return time.Now().UTC() }, + messageStore: messageStoreFromAuditStore(auditStore), }, nil } +func messageStoreFromAuditStore(auditStore AuditStore) MessageStore { + if auditStore == nil { + return nil + } + messageStore, ok := auditStore.(MessageStore) + if !ok { + return nil + } + return messageStore +} + var _ AuditWriter = (*FileAuditWriter)(nil) // RecordSent stores a sent network audit record. @@ -78,6 +99,11 @@ func (w *FileAuditWriter) RecordRejected(ctx context.Context, sessionID string, return w.record(ctx, sessionID, AuditDirectionRejected, envelope, reason) } +// RecordDelivered stores a delivered network audit record. +func (w *FileAuditWriter) RecordDelivered(ctx context.Context, sessionID string, envelope Envelope) error { + return w.record(ctx, sessionID, AuditDirectionDelivered, envelope, "") +} + func (w *FileAuditWriter) record(ctx context.Context, sessionID string, direction string, envelope Envelope, reason string) error { if ctx == nil { return errors.New("network: audit context is required") @@ -95,8 +121,22 @@ func (w *FileAuditWriter) record(ctx context.Context, sessionID string, directio if w.path != "" { recordErr = errors.Join(recordErr, w.appendFile(entry)) } + var auditWriteErr error if w.store != nil { - recordErr = errors.Join(recordErr, w.store.WriteNetworkAudit(ctx, entry)) + auditWriteErr = w.store.WriteNetworkAudit(ctx, entry) + recordErr = errors.Join(recordErr, auditWriteErr) + } + if w.messageStore == nil || auditWriteErr != nil { + return recordErr + } + + messageEntry, ok, messageErr := normalizeTimelineMessageEntry(sessionID, direction, envelope, entry.Timestamp) + if messageErr != nil { + recordErr = errors.Join(recordErr, messageErr) + return recordErr + } + if ok { + recordErr = errors.Join(recordErr, w.messageStore.WriteNetworkMessage(ctx, messageEntry)) } return recordErr @@ -137,6 +177,44 @@ func NormalizeAuditEntry(sessionID string, direction string, envelope Envelope, return entry, nil } +func normalizeTimelineMessageEntry(sessionID string, direction string, envelope Envelope, at time.Time) (store.NetworkMessageEntry, bool, error) { + if envelope.Kind != KindSay { + return store.NetworkMessageEntry{}, false, nil + } + switch strings.TrimSpace(direction) { + case AuditDirectionSent, AuditDirectionReceived: + default: + return store.NetworkMessageEntry{}, false, nil + } + + body, err := envelope.DecodeBody() + if err != nil { + return store.NetworkMessageEntry{}, false, fmt.Errorf("network: decode timeline envelope body: %w", err) + } + sayBody, ok := body.(SayBody) + if !ok { + return store.NetworkMessageEntry{}, false, fmt.Errorf("network: unexpected timeline body type for %q", envelope.ID) + } + if at.IsZero() { + at = time.Now().UTC() + } + + entry := store.NetworkMessageEntry{ + MessageID: strings.TrimSpace(envelope.ID), + SessionID: strings.TrimSpace(sessionID), + Channel: strings.TrimSpace(envelope.Channel), + PeerFrom: strings.TrimSpace(envelope.From), + Kind: strings.TrimSpace(string(envelope.Kind)), + Intent: strings.TrimSpace(sayBody.Intent), + Text: sayBody.Text, + Timestamp: at.UTC(), + } + if err := entry.Validate(); err != nil { + return store.NetworkMessageEntry{}, false, err + } + return entry, true, nil +} + func (w *FileAuditWriter) appendFile(entry store.NetworkAuditEntry) error { w.mu.Lock() defer w.mu.Unlock() diff --git a/internal/network/audit_test.go b/internal/network/audit_test.go index 56dd55260..d9083b93b 100644 --- a/internal/network/audit_test.go +++ b/internal/network/audit_test.go @@ -3,6 +3,7 @@ package network import ( "context" "encoding/json" + "errors" "os" "path/filepath" "reflect" @@ -13,7 +14,8 @@ import ( ) type recordingAuditStore struct { - entries []store.NetworkAuditEntry + entries []store.NetworkAuditEntry + messages []store.NetworkMessageEntry } func (s *recordingAuditStore) WriteNetworkAudit(_ context.Context, entry store.NetworkAuditEntry) error { @@ -21,6 +23,31 @@ func (s *recordingAuditStore) WriteNetworkAudit(_ context.Context, entry store.N return nil } +func (s *recordingAuditStore) WriteNetworkMessage(_ context.Context, entry store.NetworkMessageEntry) error { + s.messages = append(s.messages, entry) + return nil +} + +type failingAuditStore struct { + recordingAuditStore + auditErr error + auditCalls int + messageCalls int +} + +func (s *failingAuditStore) WriteNetworkAudit(_ context.Context, entry store.NetworkAuditEntry) error { + s.auditCalls++ + if s.auditErr != nil { + return s.auditErr + } + return s.recordingAuditStore.WriteNetworkAudit(context.Background(), entry) +} + +func (s *failingAuditStore) WriteNetworkMessage(_ context.Context, entry store.NetworkMessageEntry) error { + s.messageCalls++ + return s.recordingAuditStore.WriteNetworkMessage(context.Background(), entry) +} + func TestNewAuditWriterRequiresSink(t *testing.T) { t.Parallel() @@ -111,6 +138,149 @@ func TestAuditWriterRecordSentAndRejected(t *testing.T) { } } +func TestAuditWriterRecordsDeliveredDirection(t *testing.T) { + t.Parallel() + + t.Run("Should record delivered direction in the audit sink", func(t *testing.T) { + storeSink := &recordingAuditStore{} + writer, err := NewAuditWriter("", storeSink) + if err != nil { + t.Fatalf("NewAuditWriter() error = %v", err) + } + + if err := writer.RecordDelivered(context.Background(), "sess-audit", testAuditEnvelope(t)); err != nil { + t.Fatalf("RecordDelivered() error = %v", err) + } + if got, want := len(storeSink.entries), 1; got != want { + t.Fatalf("len(store entries) = %d, want %d", got, want) + } + if got, want := storeSink.entries[0].Direction, AuditDirectionDelivered; got != want { + t.Fatalf("entries[0].Direction = %q, want %q", got, want) + } + }) +} + +func TestAuditWriterPersistsTimelineMessagesForSayEnvelopesOnly(t *testing.T) { + t.Parallel() + + t.Run("Should persist sent say envelopes to the timeline store", func(t *testing.T) { + storeSink := &recordingAuditStore{} + writer, err := NewAuditWriter("", storeSink) + if err != nil { + t.Fatalf("NewAuditWriter() error = %v", err) + } + recordedAt := time.Date(2026, 4, 10, 12, 0, 0, 0, time.UTC) + writer.now = func() time.Time { return recordedAt } + + if err := writer.RecordSent(context.Background(), "sess-audit", testSayAuditEnvelope(t)); err != nil { + t.Fatalf("RecordSent(say) error = %v", err) + } + + if got, want := len(storeSink.messages), 1; got != want { + t.Fatalf("len(store messages) = %d, want %d", got, want) + } + if got, want := storeSink.messages[0].MessageID, "msg_say_01"; got != want { + t.Fatalf("messages[0].MessageID = %q, want %q", got, want) + } + if got, want := storeSink.messages[0].Intent, "announce"; got != want { + t.Fatalf("messages[0].Intent = %q, want %q", got, want) + } + if got, want := storeSink.messages[0].Text, " hello builders \n"; got != want { + t.Fatalf("messages[0].Text = %q, want %q", got, want) + } + }) + + t.Run("Should persist received say envelopes to the timeline store", func(t *testing.T) { + storeSink := &recordingAuditStore{} + writer, err := NewAuditWriter("", storeSink) + if err != nil { + t.Fatalf("NewAuditWriter() error = %v", err) + } + recordedAt := time.Date(2026, 4, 10, 12, 0, 0, 0, time.UTC) + writer.now = func() time.Time { return recordedAt } + + if err := writer.RecordReceived(context.Background(), "sess-remote", testSayAuditEnvelope(t)); err != nil { + t.Fatalf("RecordReceived(say) error = %v", err) + } + + if got, want := len(storeSink.messages), 1; got != want { + t.Fatalf("len(store messages) = %d, want %d", got, want) + } + if got, want := storeSink.messages[0].SessionID, "sess-remote"; got != want { + t.Fatalf("messages[0].SessionID = %q, want %q", got, want) + } + if got, want := storeSink.messages[0].MessageID, "msg_say_01"; got != want { + t.Fatalf("messages[0].MessageID = %q, want %q", got, want) + } + }) + + t.Run("Should ignore non-say envelopes when writing timeline messages", func(t *testing.T) { + storeSink := &recordingAuditStore{} + writer, err := NewAuditWriter("", storeSink) + if err != nil { + t.Fatalf("NewAuditWriter() error = %v", err) + } + + if err := writer.RecordSent(context.Background(), "sess-audit", testAuditEnvelope(t)); err != nil { + t.Fatalf("RecordSent(direct) error = %v", err) + } + if err := writer.RecordReceived(context.Background(), "sess-audit", testAuditEnvelope(t)); err != nil { + t.Fatalf("RecordReceived(direct) error = %v", err) + } + + if got := len(storeSink.messages); got != 0 { + t.Fatalf("len(store messages) = %d, want 0", got) + } + }) +} + +func TestAuditWriterSkipsTimelineWriteWhenAuditStoreFails(t *testing.T) { + t.Parallel() + + t.Run("Should not persist timeline rows after audit write failures", func(t *testing.T) { + storeErr := errors.New("audit store unavailable") + storeSink := &failingAuditStore{auditErr: storeErr} + writer, err := NewAuditWriter("", storeSink) + if err != nil { + t.Fatalf("NewAuditWriter() error = %v", err) + } + + err = writer.RecordSent(context.Background(), "sess-audit", testSayAuditEnvelope(t)) + if !errors.Is(err, storeErr) { + t.Fatalf("RecordSent() error = %v, want wrapped store error", err) + } + if got, want := storeSink.auditCalls, 1; got != want { + t.Fatalf("audit calls = %d, want %d", got, want) + } + if got := storeSink.messageCalls; got != 0 { + t.Fatalf("message calls = %d, want 0", got) + } + if got := len(storeSink.messages); got != 0 { + t.Fatalf("len(store messages) = %d, want 0", got) + } + }) +} + +func TestAuditWriterAllowsFileOnlySinksWithoutTimelineNormalization(t *testing.T) { + t.Parallel() + + t.Run("Should skip timeline normalization when no message store is configured", func(t *testing.T) { + auditPath := filepath.Join(t.TempDir(), "logs", "network.audit") + writer, err := NewAuditWriter(auditPath, nil) + if err != nil { + t.Fatalf("NewAuditWriter() error = %v", err) + } + + if err := writer.RecordSent(context.Background(), "sess-audit", testInvalidSayAuditEnvelope(t)); err != nil { + t.Fatalf("RecordSent(file-only invalid say) error = %v", err) + } + + if _, err := os.Stat(auditPath); err != nil { + t.Fatalf("Stat(%q) error = %v", auditPath, err) + } + }) +} + func testAuditEnvelope(t *testing.T) Envelope { t.Helper() @@ -126,3 +296,31 @@ func testAuditEnvelope(t *testing.T) Envelope { Body: mustRawJSON(t, map[string]any{"text": "Please inspect auth.go"}), } } + +func testSayAuditEnvelope(t *testing.T) Envelope { + t.Helper() + + return Envelope{ + Protocol: ProtocolV0, + ID: "msg_say_01", + Kind: KindSay, + Channel: "builders", + From: "coder.sess-audit", + TS: time.Date(2026, 4, 10, 12, 0, 0, 0, time.UTC).Unix(), + Body: mustRawJSON(t, SayBody{Text: " hello builders \n", Intent: "announce"}), + } +} + +func testInvalidSayAuditEnvelope(t *testing.T) Envelope { + t.Helper() + + return Envelope{ + Protocol: ProtocolV0, + ID: "msg_say_invalid_01", + Kind: KindSay, + Channel: "builders", + From: "coder.sess-audit", + TS: time.Date(2026, 4, 10, 12, 0, 0, 0, time.UTC).Unix(), + Body: mustRawJSON(t, []string{"not", "an", "object"}), + } +} diff --git a/internal/network/manager.go b/internal/network/manager.go index 31d431a42..0d61f9800 100644 --- a/internal/network/manager.go +++ b/internal/network/manager.go @@ -970,7 +970,15 @@ func (m *Manager) recordAuditRejected(ctx context.Context, sessionID string, env } func (m *Manager) recordDelivered(sessionID string, envelope Envelope, _ string, _ time.Duration) { - if m == nil || m.stats == nil { + if m == nil { + return + } + if m.auditor != nil { + if err := m.auditor.RecordDelivered(m.lifecycleCtx, sessionID, envelope); err != nil { + m.logger.Warn("network.audit.record_delivered_failed", "session_id", sessionID, "envelope_id", envelope.ID, "error", err) + } + } + if m.stats == nil { return } m.stats.recordDelivered(envelope) diff --git a/internal/network/manager_test.go b/internal/network/manager_test.go index 4e743c48d..eea797355 100644 --- a/internal/network/manager_test.go +++ b/internal/network/manager_test.go @@ -950,10 +950,11 @@ func discardManagerLogger() *slog.Logger { } type recordingAuditWriter struct { - mu sync.Mutex - sent []auditCall - received []auditCall - rejected []auditCall + mu sync.Mutex + sent []auditCall + received []auditCall + rejected []auditCall + delivered []auditCall } var _ AuditWriter = (*recordingAuditWriter)(nil) @@ -985,6 +986,13 @@ func (w *recordingAuditWriter) RecordRejected(_ context.Context, sessionID strin return nil } +func (w *recordingAuditWriter) RecordDelivered(_ context.Context, sessionID string, envelope Envelope) error { + w.mu.Lock() + defer w.mu.Unlock() + w.delivered = append(w.delivered, auditCall{sessionID: sessionID, envelope: envelope}) + return nil +} + func (w *recordingAuditWriter) countSent(kind Kind) int { w.mu.Lock() defer w.mu.Unlock() @@ -1043,4 +1051,5 @@ func (w *recordingAuditWriter) reset() { w.sent = nil w.received = nil w.rejected = nil + w.delivered = nil } diff --git a/internal/observe/bridges.go b/internal/observe/bridges.go index 6daf12e28..b88ada029 100644 --- a/internal/observe/bridges.go +++ b/internal/observe/bridges.go @@ -52,6 +52,7 @@ type BridgeInstanceHealth struct { DeliveryDroppedByReason map[string]int `json:"delivery_dropped_by_reason,omitempty"` DeliveryFailuresTotal int `json:"delivery_failures_total"` AuthFailuresTotal int `json:"auth_failures_total"` + LastSuccessAt time.Time `json:"last_success_at,omitempty"` LastError string `json:"last_error,omitempty"` LastErrorAt time.Time `json:"last_error_at,omitempty"` } @@ -193,6 +194,7 @@ func (o *Observer) collectBridgeHealth(ctx context.Context) ([]BridgeInstanceHea item.DeliveryDroppedTotal = metrics.DeliveryDroppedTotal item.DeliveryDroppedByReason = cloneDroppedReasons(metrics.DeliveryDroppedByReason) item.DeliveryFailuresTotal = metrics.DeliveryFailuresTotal + item.LastSuccessAt = metrics.LastSuccessAt item.LastError = strings.TrimSpace(metrics.LastError) item.LastErrorAt = metrics.LastErrorAt } diff --git a/internal/observe/bridges_test.go b/internal/observe/bridges_test.go index eec5ce303..0357930c9 100644 --- a/internal/observe/bridges_test.go +++ b/internal/observe/bridges_test.go @@ -128,6 +128,11 @@ func TestHealthTracksDeliveryBacklogWithoutChangingActiveSessions(t *testing.T) health, err := h.observer.Health(testutil.Context(t)) return err == nil && health.Bridges.DeliveryBacklog == 0 }) + + observed := observeBridgeHealthMap(t, h) + if got := observed[instance.ID].LastSuccessAt; !got.Equal(h.now) { + t.Fatalf("QueryBridgeHealth(%s).LastSuccessAt = %s, want %s", instance.ID, got, h.now) + } } func TestQueryBridgeHealthSurfacesAuthAndTerminalDeliveryFailuresPerInstance(t *testing.T) { diff --git a/internal/store/globaldb/global_db.go b/internal/store/globaldb/global_db.go index 96c2b6cd2..eb2708c87 100644 --- a/internal/store/globaldb/global_db.go +++ b/internal/store/globaldb/global_db.go @@ -89,6 +89,18 @@ var globalSchemaStatements = []string{ );`, `CREATE INDEX IF NOT EXISTS idx_net_audit_ts ON network_audit_log(timestamp);`, `CREATE INDEX IF NOT EXISTS idx_net_audit_session ON network_audit_log(session_id);`, + `CREATE TABLE IF NOT EXISTS network_message_log ( + message_id TEXT PRIMARY KEY, + session_id TEXT, + channel TEXT NOT NULL, + peer_from TEXT NOT NULL, + kind TEXT NOT NULL, + intent TEXT, + text TEXT NOT NULL, + timestamp TEXT NOT NULL + );`, + `CREATE INDEX IF NOT EXISTS idx_net_msg_channel_ts ON network_message_log(channel, timestamp);`, + `CREATE INDEX IF NOT EXISTS idx_net_msg_peer_ts ON network_message_log(peer_from, timestamp);`, `CREATE TABLE IF NOT EXISTS extensions ( name TEXT PRIMARY KEY, version TEXT NOT NULL, diff --git a/internal/store/globaldb/global_db_network_messages.go b/internal/store/globaldb/global_db_network_messages.go new file mode 100644 index 000000000..36a4a08cb --- /dev/null +++ b/internal/store/globaldb/global_db_network_messages.go @@ -0,0 +1,133 @@ +package globaldb + +import ( + "context" + "database/sql" + "errors" + "fmt" + "strings" + + "github.com/pedronauck/agh/internal/store" +) + +// WriteNetworkMessage stores one persisted network timeline message, ignoring duplicate message ids. +func (g *GlobalDB) WriteNetworkMessage(ctx context.Context, entry store.NetworkMessageEntry) error { + if err := g.checkReady(ctx, "write network message"); err != nil { + return err + } + if err := entry.Validate(); err != nil { + return fmt.Errorf("store: validate network message entry: %w", err) + } + if entry.Timestamp.IsZero() { + entry.Timestamp = g.now() + } + + if _, err := g.db.ExecContext( + ctx, + `INSERT INTO network_message_log ( + message_id, session_id, channel, peer_from, kind, intent, text, timestamp + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(message_id) DO NOTHING`, + entry.MessageID, + store.NullableString(entry.SessionID), + entry.Channel, + entry.PeerFrom, + entry.Kind, + store.NullableString(entry.Intent), + entry.Text, + store.FormatTimestamp(entry.Timestamp), + ); err != nil { + return fmt.Errorf("store: insert network message entry: %w", err) + } + + return nil +} + +// ListNetworkMessages returns persisted network timeline rows filtered by the supplied options. +func (g *GlobalDB) ListNetworkMessages( + ctx context.Context, + query store.NetworkMessageQuery, +) (entries []store.NetworkMessageEntry, err error) { + if err := g.checkReady(ctx, "list network messages"); err != nil { + return nil, err + } + if err := query.Validate(); err != nil { + return nil, fmt.Errorf("store: validate network message query: %w", err) + } + + sqlQuery := `SELECT message_id, session_id, channel, peer_from, kind, intent, text, timestamp FROM network_message_log` + where, args := store.BuildClauses( + store.StringClause("session_id", query.SessionID), + store.StringClause("channel", query.Channel), + store.StringClause("peer_from", query.PeerFrom), + store.StringClause("message_id", query.MessageID), + store.TimeClause("timestamp", ">=", query.Since), + ) + sqlQuery = store.AppendWhere(sqlQuery, where) + sqlQuery += " ORDER BY timestamp ASC, message_id ASC" + sqlQuery, args = store.AppendLimit(sqlQuery, args, query.Limit) + + rows, err := g.db.QueryContext(ctx, sqlQuery, args...) + if err != nil { + return nil, fmt.Errorf("store: query network messages: %w", err) + } + defer func() { + if closeErr := rows.Close(); closeErr != nil { + closeErr = fmt.Errorf("store: close network messages rows: %w", closeErr) + if err != nil { + err = errors.Join(err, closeErr) + return + } + err = closeErr + } + }() + + entries = make([]store.NetworkMessageEntry, 0) + for rows.Next() { + entry, scanErr := scanNetworkMessage(rows) + if scanErr != nil { + return nil, scanErr + } + entries = append(entries, entry) + } + if err := rows.Err(); err != nil { + return nil, fmt.Errorf("store: iterate network messages: %w", err) + } + + return entries, nil +} + +func scanNetworkMessage(scanner rowScanner) (store.NetworkMessageEntry, error) { + var ( + entry store.NetworkMessageEntry + sessionID sql.NullString + intent sql.NullString + timestampRaw string + ) + if err := scanner.Scan( + &entry.MessageID, + &sessionID, + &entry.Channel, + &entry.PeerFrom, + &entry.Kind, + &intent, + &entry.Text, + ×tampRaw, + ); err != nil { + return store.NetworkMessageEntry{}, fmt.Errorf("store: scan network message: %w", err) + } + + if value := store.NullString(sessionID); value != nil { + entry.SessionID = *value + } + if value := store.NullString(intent); value != nil { + entry.Intent = strings.TrimSpace(*value) + } + + timestamp, err := store.ParseTimestamp(timestampRaw) + if err != nil { + return store.NetworkMessageEntry{}, fmt.Errorf("store: parse network message timestamp: %w", err) + } + entry.Timestamp = timestamp + return entry, nil +} diff --git a/internal/store/globaldb/global_db_network_messages_test.go b/internal/store/globaldb/global_db_network_messages_test.go new file mode 100644 index 000000000..e0d66d297 --- /dev/null +++ b/internal/store/globaldb/global_db_network_messages_test.go @@ -0,0 +1,199 @@ +package globaldb + +import ( + "errors" + "strings" + "testing" + "time" + + "github.com/pedronauck/agh/internal/store" + "github.com/pedronauck/agh/internal/testutil" +) + +func TestOpenGlobalDBCreatesNetworkMessageLogSchema(t *testing.T) { + t.Parallel() + + globalDB := openTestGlobalDB(t) + + assertTablesPresent(t, globalDB.db, "network_message_log") + assertTableColumns(t, globalDB.db, "network_message_log", []string{ + "message_id", + "session_id", + "channel", + "peer_from", + "kind", + "intent", + "text", + "timestamp", + }) + assertTableHasNoForeignKeys(t, globalDB.db, "network_message_log") +} + +func TestGlobalDBWriteAndListNetworkMessages(t *testing.T) { + t.Parallel() + + globalDB := openTestGlobalDB(t) + recordedAt := time.Date(2026, 4, 11, 12, 0, 0, 0, time.UTC) + globalDB.now = func() time.Time { return recordedAt } + + if err := globalDB.WriteNetworkMessage(testutil.Context(t), store.NetworkMessageEntry{ + MessageID: "msg_say_01", + SessionID: "sess-audit", + Channel: "builders", + PeerFrom: "coder.sess-audit", + Kind: "say", + Intent: "announce", + Text: "hello builders", + }); err != nil { + t.Fatalf("WriteNetworkMessage(first) error = %v", err) + } + if err := globalDB.WriteNetworkMessage(testutil.Context(t), store.NetworkMessageEntry{ + MessageID: "msg_say_01", + SessionID: "", + Channel: "builders", + PeerFrom: "coder.sess-audit", + Kind: "say", + Intent: "announce", + Text: "hello builders", + Timestamp: recordedAt.Add(time.Minute), + }); err != nil { + t.Fatalf("WriteNetworkMessage(duplicate) error = %v", err) + } + if err := globalDB.WriteNetworkMessage(testutil.Context(t), store.NetworkMessageEntry{ + MessageID: "msg_say_02", + Channel: "builders", + PeerFrom: "reviewer.sess-remote", + Kind: "say", + Text: "review in progress", + Timestamp: recordedAt.Add(time.Minute), + }); err != nil { + t.Fatalf("WriteNetworkMessage(second) error = %v", err) + } + + entries, err := globalDB.ListNetworkMessages(testutil.Context(t), store.NetworkMessageQuery{ + Channel: "builders", + Limit: 10, + }) + if err != nil { + t.Fatalf("ListNetworkMessages() error = %v", err) + } + if got, want := len(entries), 2; got != want { + t.Fatalf("len(entries) = %d, want %d", got, want) + } + if got, want := entries[0].MessageID, "msg_say_01"; got != want { + t.Fatalf("entries[0].MessageID = %q, want %q", got, want) + } + if got, want := entries[0].Intent, "announce"; got != want { + t.Fatalf("entries[0].Intent = %q, want %q", got, want) + } + if got, want := entries[0].Timestamp, recordedAt; !got.Equal(want) { + t.Fatalf("entries[0].Timestamp = %s, want %s", got, want) + } + if got, want := entries[1].PeerFrom, "reviewer.sess-remote"; got != want { + t.Fatalf("entries[1].PeerFrom = %q, want %q", got, want) + } +} + +func TestGlobalDBNetworkMessageGuardClauses(t *testing.T) { + t.Parallel() + + var nilDB *GlobalDB + globalDB := openTestGlobalDB(t) + if err := globalDB.Close(testutil.Context(t)); err != nil { + t.Fatalf("Close() error = %v", err) + } + + tests := []struct { + name string + run func() error + want error + }{ + { + name: "Should reject writes on a nil receiver", + run: func() error { + return nilDB.WriteNetworkMessage(testutil.Context(t), store.NetworkMessageEntry{}) + }, + }, + { + name: "Should reject reads on a nil receiver", + run: func() error { + _, err := nilDB.ListNetworkMessages(testutil.Context(t), store.NetworkMessageQuery{}) + return err + }, + }, + { + name: "Should reject writes with a nil context", + run: func() error { + freshDB := openTestGlobalDB(t) + defer func() { + _ = freshDB.Close(testutil.Context(t)) + }() + return freshDB.WriteNetworkMessage(nilGlobalContext(), store.NetworkMessageEntry{}) + }, + }, + { + name: "Should reject reads with a nil context", + run: func() error { + freshDB := openTestGlobalDB(t) + defer func() { + _ = freshDB.Close(testutil.Context(t)) + }() + _, err := freshDB.ListNetworkMessages(nilGlobalContext(), store.NetworkMessageQuery{}) + return err + }, + }, + { + name: "Should reject writes after the store is closed", + run: func() error { + return globalDB.WriteNetworkMessage(testutil.Context(t), store.NetworkMessageEntry{}) + }, + want: store.ErrClosed, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + err := tt.run() + if tt.want != nil { + if !errors.Is(err, tt.want) { + t.Fatalf("error = %v, want %v", err, tt.want) + } + return + } + if err == nil { + t.Fatal("error = nil, want non-nil") + } + }) + } +} + +func TestGlobalDBListNetworkMessagesWrapsTimestampParseFailures(t *testing.T) { + t.Parallel() + + globalDB := openTestGlobalDB(t) + if _, err := globalDB.db.ExecContext( + testutil.Context(t), + `INSERT INTO network_message_log ( + message_id, session_id, channel, peer_from, kind, intent, text, timestamp + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)`, + "msg_bad_timestamp", + nil, + "builders", + "coder.sess-audit", + "say", + nil, + "hello", + "not-a-timestamp", + ); err != nil { + t.Fatalf("ExecContext(insert invalid network message) error = %v", err) + } + + _, err := globalDB.ListNetworkMessages(testutil.Context(t), store.NetworkMessageQuery{Channel: "builders"}) + if err == nil { + t.Fatal("ListNetworkMessages(invalid timestamp) error = nil, want non-nil") + } + if !strings.Contains(err.Error(), "parse network message timestamp") { + t.Fatalf("ListNetworkMessages(invalid timestamp) error = %v, want wrapped timestamp parse context", err) + } +} diff --git a/internal/store/store.go b/internal/store/store.go index 9c1ead05a..8b0fc6598 100644 --- a/internal/store/store.go +++ b/internal/store/store.go @@ -72,6 +72,12 @@ type NetworkAuditStore interface { ListNetworkAudit(ctx context.Context, query NetworkAuditQuery) ([]NetworkAuditEntry, error) } +// NetworkMessageStore manages persisted network timeline messages. +type NetworkMessageStore interface { + WriteNetworkMessage(ctx context.Context, entry NetworkMessageEntry) error + ListNetworkMessages(ctx context.Context, query NetworkMessageQuery) ([]NetworkMessageEntry, error) +} + // SessionRegistry composes the global persistence surfaces used by runtime consumers. type SessionRegistry interface { SessionCatalog @@ -79,6 +85,7 @@ type SessionRegistry interface { TokenStatsStore PermissionLogStore NetworkAuditStore + NetworkMessageStore Close(ctx context.Context) error } diff --git a/internal/store/types.go b/internal/store/types.go index 5995c318d..37fa3ea86 100644 --- a/internal/store/types.go +++ b/internal/store/types.go @@ -366,9 +366,16 @@ func (e NetworkAuditEntry) Validate() error { } direction := strings.TrimSpace(e.Direction) switch direction { - case "sent", "received", "rejected": + case "sent", "received", "rejected", "delivered": default: - return fmt.Errorf("store: network audit direction must be one of %q, %q, %q: %q", "sent", "received", "rejected", e.Direction) + return fmt.Errorf( + "store: network audit direction must be one of %q, %q, %q, %q: %q", + "sent", + "received", + "rejected", + "delivered", + e.Direction, + ) } if direction != e.Direction { return fmt.Errorf("store: network audit direction must not contain surrounding whitespace: %q", e.Direction) @@ -410,6 +417,53 @@ func (q NetworkAuditQuery) Validate() error { return requirePositiveLimit(q.Limit, "network audit limit") } +// NetworkMessageEntry is one persisted network timeline message. +type NetworkMessageEntry struct { + MessageID string + SessionID string + Channel string + PeerFrom string + Kind string + Intent string + Text string + Timestamp time.Time +} + +// Validate ensures the persisted network message is complete and internally consistent. +func (e NetworkMessageEntry) Validate() error { + if err := requireField(e.MessageID, "network message id"); err != nil { + return err + } + if err := requireField(e.Channel, "network message channel"); err != nil { + return err + } + if err := requireField(e.PeerFrom, "network message peer_from"); err != nil { + return err + } + if err := requireField(e.Kind, "network message kind"); err != nil { + return err + } + if err := requireField(e.Text, "network message text"); err != nil { + return err + } + return nil +} + +// NetworkMessageQuery filters persisted network timeline lookups. +type NetworkMessageQuery struct { + SessionID string + Channel string + PeerFrom string + MessageID string + Since time.Time + Limit int +} + +// Validate ensures the query uses sane bounds. +func (q NetworkMessageQuery) Validate() error { + return requirePositiveLimit(q.Limit, "network message limit") +} + // ReconcileResult reports which sessions were indexed or marked orphaned. type ReconcileResult struct { Indexed []string diff --git a/openapi/agh.json b/openapi/agh.json index 44825cee0..94c043c9c 100644 --- a/openapi/agh.json +++ b/openapi/agh.json @@ -3251,6 +3251,11 @@ "nullable": true, "type": "string" }, + "last_success_at": { + "format": "date-time", + "nullable": true, + "type": "string" + }, "route_count": { "type": "integer" }, @@ -3597,6 +3602,11 @@ "nullable": true, "type": "string" }, + "last_success_at": { + "format": "date-time", + "nullable": true, + "type": "string" + }, "route_count": { "type": "integer" }, @@ -3704,6 +3714,104 @@ "x-agh-transports": ["http", "uds"] } }, + "/api/bridges/providers": { + "get": { + "operationId": "listBridgeProviders", + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "properties": { + "providers": { + "items": { + "properties": { + "description": { + "type": "string" + }, + "display_name": { + "type": "string" + }, + "enabled": { + "type": "boolean" + }, + "extension_name": { + "type": "string" + }, + "health": { + "type": "string" + }, + "health_message": { + "type": "string" + }, + "platform": { + "type": "string" + }, + "state": { + "type": "string" + } + }, + "required": [ + "display_name", + "enabled", + "extension_name", + "health", + "platform", + "state" + ], + "type": "object" + }, + "type": "array" + } + }, + "required": ["providers"], + "type": "object" + } + } + }, + "description": "OK" + }, + "500": { + "content": { + "application/json": { + "schema": { + "properties": { + "error": { + "type": "string" + } + }, + "required": ["error"], + "type": "object" + } + } + }, + "description": "Internal server error" + }, + "503": { + "content": { + "application/json": { + "schema": { + "properties": { + "error": { + "type": "string" + } + }, + "required": ["error"], + "type": "object" + } + } + }, + "description": "Bridge service is not configured" + }, + "default": { + "description": "" + } + }, + "summary": "List installed bridge-capable providers", + "tags": ["bridges"], + "x-agh-transports": ["http", "uds"] + } + }, "/api/bridges/{id}": { "get": { "operationId": "getBridge", @@ -3833,6 +3941,11 @@ "nullable": true, "type": "string" }, + "last_success_at": { + "format": "date-time", + "nullable": true, + "type": "string" + }, "route_count": { "type": "integer" }, @@ -4089,6 +4202,11 @@ "nullable": true, "type": "string" }, + "last_success_at": { + "format": "date-time", + "nullable": true, + "type": "string" + }, "route_count": { "type": "integer" }, @@ -4325,6 +4443,11 @@ "nullable": true, "type": "string" }, + "last_success_at": { + "format": "date-time", + "nullable": true, + "type": "string" + }, "route_count": { "type": "integer" }, @@ -4561,6 +4684,11 @@ "nullable": true, "type": "string" }, + "last_success_at": { + "format": "date-time", + "nullable": true, + "type": "string" + }, "route_count": { "type": "integer" }, @@ -4797,6 +4925,11 @@ "nullable": true, "type": "string" }, + "last_success_at": { + "format": "date-time", + "nullable": true, + "type": "string" + }, "route_count": { "type": "integer" }, @@ -7259,102 +7392,56 @@ "x-agh-transports": ["http", "uds"] } }, - "/api/observe/events": { + "/api/network/channels": { "get": { - "operationId": "listObserveEvents", - "parameters": [ - { - "description": "Session id", - "in": "query", - "name": "session_id", - "schema": { - "type": "string" - } - }, - { - "description": "Agent name", - "in": "query", - "name": "agent_name", - "schema": { - "type": "string" - } - }, - { - "description": "Event type", - "in": "query", - "name": "type", - "schema": { - "type": "string" - } - }, - { - "description": "Only events emitted since this timestamp", - "in": "query", - "name": "since", - "schema": { - "format": "date-time", - "type": "string" - } - }, - { - "description": "Maximum number of records to return", - "in": "query", - "name": "limit", - "schema": { - "format": "int32", - "type": "integer" - } - } - ], + "operationId": "listNetworkChannels", "responses": { "200": { "content": { "application/json": { "schema": { "properties": { - "events": { + "channels": { "items": { "properties": { - "agent_name": { + "channel": { "type": "string" }, - "id": { + "last_message_at": { + "format": "date-time", + "nullable": true, "type": "string" }, - "session_id": { - "type": "string" + "local_peer_count": { + "type": "integer" }, - "summary": { - "type": "string" + "message_count": { + "type": "integer" }, - "timestamp": { - "format": "date-time", - "type": "string" + "peer_count": { + "type": "integer" }, - "type": { - "type": "string" + "remote_peer_count": { + "type": "integer" + }, + "session_count": { + "type": "integer" } }, - "required": [ - "agent_name", - "id", - "session_id", - "timestamp", - "type" - ], + "required": ["channel", "peer_count"], "type": "object" }, "type": "array" } }, - "required": ["events"], + "required": ["channels"], "type": "object" } } }, "description": "OK" }, - "400": { + "500": { "content": { "application/json": { "schema": { @@ -7368,9 +7455,9 @@ } } }, - "description": "Invalid filter" + "description": "Internal server error" }, - "500": { + "503": { "content": { "application/json": { "schema": { @@ -7384,50 +7471,1839 @@ } } }, - "description": "Internal server error" + "description": "Network runtime is not configured" }, "default": { "description": "" } }, - "summary": "List observability events", - "tags": ["observe"], + "summary": "List materialized network channels", + "tags": ["network"], "x-agh-transports": ["http", "uds"] - } - }, - "/api/observe/health": { - "get": { - "operationId": "getObserveHealth", + }, + "post": { + "operationId": "createNetworkChannel", + "requestBody": { + "content": { + "application/json": { + "schema": { + "properties": { + "agent_names": { + "items": { + "type": "string" + }, + "type": "array" + }, + "channel": { + "type": "string" + }, + "workspace_id": { + "type": "string" + } + }, + "required": ["agent_names", "channel", "workspace_id"], + "type": "object" + } + } + }, + "description": "JSON request body", + "required": true + }, "responses": { - "200": { + "201": { "content": { "application/json": { "schema": { "properties": { - "automation": { + "channel": { "properties": { - "enabled": { - "type": "boolean" - }, - "jobs": { - "properties": { - "enabled": { - "type": "integer" - }, - "total": { - "type": "integer" - } - }, - "required": ["enabled", "total"], - "type": "object" + "channel": { + "type": "string" }, - "next_fire": { + "last_message_at": { "format": "date-time", "nullable": true, "type": "string" }, - "scheduler_running": { - "type": "boolean" + "local_peer_count": { + "type": "integer" + }, + "message_count": { + "type": "integer" + }, + "peer_count": { + "type": "integer" + }, + "peers": { + "items": { + "properties": { + "channel": { + "type": "string" + }, + "display_name": { + "type": "string" + }, + "expires_at": { + "format": "date-time", + "nullable": true, + "type": "string" + }, + "joined_at": { + "format": "date-time", + "nullable": true, + "type": "string" + }, + "last_seen": { + "format": "date-time", + "nullable": true, + "type": "string" + }, + "local": { + "type": "boolean" + }, + "peer_card": { + "properties": { + "artifacts_supported": { + "items": { + "type": "string" + }, + "type": "array" + }, + "capabilities": { + "items": { + "type": "string" + }, + "type": "array" + }, + "display_name": { + "nullable": true, + "type": "string" + }, + "ext": { + "additionalProperties": {}, + "type": "object" + }, + "peer_id": { + "type": "string" + }, + "profiles_supported": { + "items": { + "type": "string" + }, + "type": "array" + }, + "trust_modes_supported": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "artifacts_supported", + "capabilities", + "peer_id", + "profiles_supported", + "trust_modes_supported" + ], + "type": "object" + }, + "peer_id": { + "type": "string" + }, + "session_id": { + "nullable": true, + "type": "string" + } + }, + "required": [ + "channel", + "local", + "peer_card", + "peer_id" + ], + "type": "object" + }, + "type": "array" + }, + "remote_peer_count": { + "type": "integer" + }, + "session_count": { + "type": "integer" + }, + "sessions": { + "items": { + "properties": { + "acp_caps": { + "nullable": true, + "properties": { + "supported_models": { + "items": { + "type": "string" + }, + "type": "array" + }, + "supported_modes": { + "items": { + "type": "string" + }, + "type": "array" + }, + "supports_load_session": { + "type": "boolean" + } + }, + "required": [ + "supports_load_session" + ], + "type": "object" + }, + "acp_session_id": { + "type": "string" + }, + "agent_name": { + "type": "string" + }, + "channel": { + "type": "string" + }, + "created_at": { + "format": "date-time", + "type": "string" + }, + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "state": { + "enum": [ + "starting", + "active", + "stopping", + "stopped" + ], + "type": "string" + }, + "stop_detail": { + "type": "string" + }, + "stop_reason": { + "enum": [ + "completed", + "user_canceled", + "max_iterations", + "loop_detected", + "timeout", + "budget_exceeded", + "error", + "agent_crashed", + "hook_stopped", + "shutdown" + ], + "type": "string" + }, + "updated_at": { + "format": "date-time", + "type": "string" + }, + "workspace_id": { + "type": "string" + }, + "workspace_path": { + "type": "string" + } + }, + "required": [ + "agent_name", + "created_at", + "id", + "state", + "updated_at" + ], + "type": "object" + }, + "type": "array" + } + }, + "required": ["channel", "peer_count"], + "type": "object" + } + }, + "required": ["channel"], + "type": "object" + } + } + }, + "description": "Created" + }, + "400": { + "content": { + "application/json": { + "schema": { + "properties": { + "error": { + "type": "string" + } + }, + "required": ["error"], + "type": "object" + } + } + }, + "description": "Invalid network channel request" + }, + "404": { + "content": { + "application/json": { + "schema": { + "properties": { + "error": { + "type": "string" + } + }, + "required": ["error"], + "type": "object" + } + } + }, + "description": "Workspace not found" + }, + "500": { + "content": { + "application/json": { + "schema": { + "properties": { + "error": { + "type": "string" + } + }, + "required": ["error"], + "type": "object" + } + } + }, + "description": "Internal server error" + }, + "503": { + "content": { + "application/json": { + "schema": { + "properties": { + "error": { + "type": "string" + } + }, + "required": ["error"], + "type": "object" + } + } + }, + "description": "Network runtime is not configured" + }, + "default": { + "description": "" + } + }, + "summary": "Create a network channel by spawning agent sessions", + "tags": ["network"], + "x-agh-transports": ["http", "uds"] + } + }, + "/api/network/channels/{channel}": { + "get": { + "operationId": "getNetworkChannel", + "parameters": [ + { + "description": "Network channel", + "in": "path", + "name": "channel", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "properties": { + "channel": { + "properties": { + "channel": { + "type": "string" + }, + "last_message_at": { + "format": "date-time", + "nullable": true, + "type": "string" + }, + "local_peer_count": { + "type": "integer" + }, + "message_count": { + "type": "integer" + }, + "peer_count": { + "type": "integer" + }, + "peers": { + "items": { + "properties": { + "channel": { + "type": "string" + }, + "display_name": { + "type": "string" + }, + "expires_at": { + "format": "date-time", + "nullable": true, + "type": "string" + }, + "joined_at": { + "format": "date-time", + "nullable": true, + "type": "string" + }, + "last_seen": { + "format": "date-time", + "nullable": true, + "type": "string" + }, + "local": { + "type": "boolean" + }, + "peer_card": { + "properties": { + "artifacts_supported": { + "items": { + "type": "string" + }, + "type": "array" + }, + "capabilities": { + "items": { + "type": "string" + }, + "type": "array" + }, + "display_name": { + "nullable": true, + "type": "string" + }, + "ext": { + "additionalProperties": {}, + "type": "object" + }, + "peer_id": { + "type": "string" + }, + "profiles_supported": { + "items": { + "type": "string" + }, + "type": "array" + }, + "trust_modes_supported": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "artifacts_supported", + "capabilities", + "peer_id", + "profiles_supported", + "trust_modes_supported" + ], + "type": "object" + }, + "peer_id": { + "type": "string" + }, + "session_id": { + "nullable": true, + "type": "string" + } + }, + "required": [ + "channel", + "local", + "peer_card", + "peer_id" + ], + "type": "object" + }, + "type": "array" + }, + "remote_peer_count": { + "type": "integer" + }, + "session_count": { + "type": "integer" + }, + "sessions": { + "items": { + "properties": { + "acp_caps": { + "nullable": true, + "properties": { + "supported_models": { + "items": { + "type": "string" + }, + "type": "array" + }, + "supported_modes": { + "items": { + "type": "string" + }, + "type": "array" + }, + "supports_load_session": { + "type": "boolean" + } + }, + "required": [ + "supports_load_session" + ], + "type": "object" + }, + "acp_session_id": { + "type": "string" + }, + "agent_name": { + "type": "string" + }, + "channel": { + "type": "string" + }, + "created_at": { + "format": "date-time", + "type": "string" + }, + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "state": { + "enum": [ + "starting", + "active", + "stopping", + "stopped" + ], + "type": "string" + }, + "stop_detail": { + "type": "string" + }, + "stop_reason": { + "enum": [ + "completed", + "user_canceled", + "max_iterations", + "loop_detected", + "timeout", + "budget_exceeded", + "error", + "agent_crashed", + "hook_stopped", + "shutdown" + ], + "type": "string" + }, + "updated_at": { + "format": "date-time", + "type": "string" + }, + "workspace_id": { + "type": "string" + }, + "workspace_path": { + "type": "string" + } + }, + "required": [ + "agent_name", + "created_at", + "id", + "state", + "updated_at" + ], + "type": "object" + }, + "type": "array" + } + }, + "required": ["channel", "peer_count"], + "type": "object" + } + }, + "required": ["channel"], + "type": "object" + } + } + }, + "description": "OK" + }, + "400": { + "content": { + "application/json": { + "schema": { + "properties": { + "error": { + "type": "string" + } + }, + "required": ["error"], + "type": "object" + } + } + }, + "description": "Invalid network channel" + }, + "404": { + "content": { + "application/json": { + "schema": { + "properties": { + "error": { + "type": "string" + } + }, + "required": ["error"], + "type": "object" + } + } + }, + "description": "Network channel not found" + }, + "500": { + "content": { + "application/json": { + "schema": { + "properties": { + "error": { + "type": "string" + } + }, + "required": ["error"], + "type": "object" + } + } + }, + "description": "Internal server error" + }, + "503": { + "content": { + "application/json": { + "schema": { + "properties": { + "error": { + "type": "string" + } + }, + "required": ["error"], + "type": "object" + } + } + }, + "description": "Network runtime is not configured" + }, + "default": { + "description": "" + } + }, + "summary": "Get one network channel detail", + "tags": ["network"], + "x-agh-transports": ["http", "uds"] + } + }, + "/api/network/channels/{channel}/messages": { + "get": { + "operationId": "listNetworkChannelMessages", + "parameters": [ + { + "description": "Network channel", + "in": "path", + "name": "channel", + "required": true, + "schema": { + "type": "string" + } + }, + { + "description": "Maximum number of timeline messages to return", + "in": "query", + "name": "limit", + "schema": { + "format": "int32", + "type": "integer" + } + } + ], + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "properties": { + "messages": { + "items": { + "properties": { + "channel": { + "type": "string" + }, + "display_name": { + "type": "string" + }, + "intent": { + "type": "string" + }, + "local": { + "type": "boolean" + }, + "message_id": { + "type": "string" + }, + "peer_id": { + "type": "string" + }, + "session_id": { + "type": "string" + }, + "text": { + "type": "string" + }, + "timestamp": { + "format": "date-time", + "type": "string" + } + }, + "required": [ + "channel", + "message_id", + "peer_id", + "text", + "timestamp" + ], + "type": "object" + }, + "type": "array" + } + }, + "required": ["messages"], + "type": "object" + } + } + }, + "description": "OK" + }, + "400": { + "content": { + "application/json": { + "schema": { + "properties": { + "error": { + "type": "string" + } + }, + "required": ["error"], + "type": "object" + } + } + }, + "description": "Invalid network channel" + }, + "404": { + "content": { + "application/json": { + "schema": { + "properties": { + "error": { + "type": "string" + } + }, + "required": ["error"], + "type": "object" + } + } + }, + "description": "Network channel not found" + }, + "500": { + "content": { + "application/json": { + "schema": { + "properties": { + "error": { + "type": "string" + } + }, + "required": ["error"], + "type": "object" + } + } + }, + "description": "Internal server error" + }, + "503": { + "content": { + "application/json": { + "schema": { + "properties": { + "error": { + "type": "string" + } + }, + "required": ["error"], + "type": "object" + } + } + }, + "description": "Network runtime is not configured" + }, + "default": { + "description": "" + } + }, + "summary": "List the read-only timeline for one network channel", + "tags": ["network"], + "x-agh-transports": ["http", "uds"] + } + }, + "/api/network/inbox": { + "get": { + "operationId": "listNetworkInbox", + "parameters": [ + { + "description": "Target local session id", + "in": "query", + "name": "session_id", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "properties": { + "messages": { + "items": { + "properties": { + "body": {}, + "causation_id": { + "nullable": true, + "type": "string" + }, + "channel": { + "type": "string" + }, + "expires_at": { + "format": "int64", + "nullable": true, + "type": "integer" + }, + "ext": { + "additionalProperties": {}, + "type": "object" + }, + "from": { + "type": "string" + }, + "id": { + "type": "string" + }, + "interaction_id": { + "nullable": true, + "type": "string" + }, + "kind": { + "type": "string" + }, + "proof": { + "additionalProperties": {}, + "type": "object" + }, + "protocol": { + "type": "string" + }, + "reply_to": { + "nullable": true, + "type": "string" + }, + "to": { + "nullable": true, + "type": "string" + }, + "trace_id": { + "nullable": true, + "type": "string" + }, + "ts": { + "format": "int64", + "type": "integer" + } + }, + "required": [ + "body", + "channel", + "from", + "id", + "kind", + "protocol", + "ts" + ], + "type": "object" + }, + "type": "array" + } + }, + "required": ["messages"], + "type": "object" + } + } + }, + "description": "OK" + }, + "400": { + "content": { + "application/json": { + "schema": { + "properties": { + "error": { + "type": "string" + } + }, + "required": ["error"], + "type": "object" + } + } + }, + "description": "Invalid inbox request" + }, + "404": { + "content": { + "application/json": { + "schema": { + "properties": { + "error": { + "type": "string" + } + }, + "required": ["error"], + "type": "object" + } + } + }, + "description": "Network target not found" + }, + "500": { + "content": { + "application/json": { + "schema": { + "properties": { + "error": { + "type": "string" + } + }, + "required": ["error"], + "type": "object" + } + } + }, + "description": "Internal server error" + }, + "503": { + "content": { + "application/json": { + "schema": { + "properties": { + "error": { + "type": "string" + } + }, + "required": ["error"], + "type": "object" + } + } + }, + "description": "Network runtime is not configured" + }, + "default": { + "description": "" + } + }, + "summary": "List queued network inbox messages for one local session", + "tags": ["network"], + "x-agh-transports": ["http", "uds"] + } + }, + "/api/network/peers": { + "get": { + "operationId": "listNetworkPeers", + "parameters": [ + { + "description": "Filter peers by channel", + "in": "query", + "name": "channel", + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "properties": { + "peers": { + "items": { + "properties": { + "channel": { + "type": "string" + }, + "display_name": { + "type": "string" + }, + "expires_at": { + "format": "date-time", + "nullable": true, + "type": "string" + }, + "joined_at": { + "format": "date-time", + "nullable": true, + "type": "string" + }, + "last_seen": { + "format": "date-time", + "nullable": true, + "type": "string" + }, + "local": { + "type": "boolean" + }, + "peer_card": { + "properties": { + "artifacts_supported": { + "items": { + "type": "string" + }, + "type": "array" + }, + "capabilities": { + "items": { + "type": "string" + }, + "type": "array" + }, + "display_name": { + "nullable": true, + "type": "string" + }, + "ext": { + "additionalProperties": {}, + "type": "object" + }, + "peer_id": { + "type": "string" + }, + "profiles_supported": { + "items": { + "type": "string" + }, + "type": "array" + }, + "trust_modes_supported": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "artifacts_supported", + "capabilities", + "peer_id", + "profiles_supported", + "trust_modes_supported" + ], + "type": "object" + }, + "peer_id": { + "type": "string" + }, + "session_id": { + "nullable": true, + "type": "string" + } + }, + "required": [ + "channel", + "local", + "peer_card", + "peer_id" + ], + "type": "object" + }, + "type": "array" + } + }, + "required": ["peers"], + "type": "object" + } + } + }, + "description": "OK" + }, + "400": { + "content": { + "application/json": { + "schema": { + "properties": { + "error": { + "type": "string" + } + }, + "required": ["error"], + "type": "object" + } + } + }, + "description": "Invalid network filter" + }, + "500": { + "content": { + "application/json": { + "schema": { + "properties": { + "error": { + "type": "string" + } + }, + "required": ["error"], + "type": "object" + } + } + }, + "description": "Internal server error" + }, + "503": { + "content": { + "application/json": { + "schema": { + "properties": { + "error": { + "type": "string" + } + }, + "required": ["error"], + "type": "object" + } + } + }, + "description": "Network runtime is not configured" + }, + "default": { + "description": "" + } + }, + "summary": "List visible network peers", + "tags": ["network"], + "x-agh-transports": ["http", "uds"] + } + }, + "/api/network/peers/{peer_id}": { + "get": { + "operationId": "getNetworkPeer", + "parameters": [ + { + "description": "Network peer id", + "in": "path", + "name": "peer_id", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "properties": { + "peer": { + "properties": { + "channel": { + "type": "string" + }, + "display_name": { + "type": "string" + }, + "expires_at": { + "format": "date-time", + "nullable": true, + "type": "string" + }, + "joined_at": { + "format": "date-time", + "nullable": true, + "type": "string" + }, + "last_seen": { + "format": "date-time", + "nullable": true, + "type": "string" + }, + "local": { + "type": "boolean" + }, + "metrics": { + "properties": { + "delivered": { + "format": "int64", + "type": "integer" + }, + "received": { + "format": "int64", + "type": "integer" + }, + "rejected": { + "format": "int64", + "type": "integer" + }, + "sent": { + "format": "int64", + "type": "integer" + } + }, + "type": "object" + }, + "peer_card": { + "properties": { + "artifacts_supported": { + "items": { + "type": "string" + }, + "type": "array" + }, + "capabilities": { + "items": { + "type": "string" + }, + "type": "array" + }, + "display_name": { + "nullable": true, + "type": "string" + }, + "ext": { + "additionalProperties": {}, + "type": "object" + }, + "peer_id": { + "type": "string" + }, + "profiles_supported": { + "items": { + "type": "string" + }, + "type": "array" + }, + "trust_modes_supported": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "artifacts_supported", + "capabilities", + "peer_id", + "profiles_supported", + "trust_modes_supported" + ], + "type": "object" + }, + "peer_id": { + "type": "string" + }, + "session_id": { + "nullable": true, + "type": "string" + } + }, + "required": ["metrics", "peer_card", "peer_id"], + "type": "object" + } + }, + "required": ["peer"], + "type": "object" + } + } + }, + "description": "OK" + }, + "404": { + "content": { + "application/json": { + "schema": { + "properties": { + "error": { + "type": "string" + } + }, + "required": ["error"], + "type": "object" + } + } + }, + "description": "Network peer not found" + }, + "500": { + "content": { + "application/json": { + "schema": { + "properties": { + "error": { + "type": "string" + } + }, + "required": ["error"], + "type": "object" + } + } + }, + "description": "Internal server error" + }, + "503": { + "content": { + "application/json": { + "schema": { + "properties": { + "error": { + "type": "string" + } + }, + "required": ["error"], + "type": "object" + } + } + }, + "description": "Network runtime is not configured" + }, + "default": { + "description": "" + } + }, + "summary": "Get one visible network peer detail", + "tags": ["network"], + "x-agh-transports": ["http", "uds"] + } + }, + "/api/network/send": { + "post": { + "operationId": "sendNetworkMessage", + "requestBody": { + "content": { + "application/json": { + "schema": { + "properties": { + "body": {}, + "causation_id": { + "type": "string" + }, + "channel": { + "type": "string" + }, + "expires_at": { + "format": "int64", + "nullable": true, + "type": "integer" + }, + "ext": { + "additionalProperties": {}, + "type": "object" + }, + "id": { + "type": "string" + }, + "interaction_id": { + "type": "string" + }, + "kind": { + "type": "string" + }, + "reply_to": { + "type": "string" + }, + "session_id": { + "type": "string" + }, + "to": { + "type": "string" + }, + "trace_id": { + "type": "string" + } + }, + "required": ["body", "channel", "kind", "session_id"], + "type": "object" + } + } + }, + "description": "JSON request body", + "required": true + }, + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "properties": { + "message": { + "properties": { + "causation_id": { + "type": "string" + }, + "channel": { + "type": "string" + }, + "expires_at": { + "format": "int64", + "nullable": true, + "type": "integer" + }, + "ext": { + "additionalProperties": {}, + "type": "object" + }, + "id": { + "type": "string" + }, + "interaction_id": { + "type": "string" + }, + "kind": { + "type": "string" + }, + "reply_to": { + "type": "string" + }, + "session_id": { + "type": "string" + }, + "to": { + "type": "string" + }, + "trace_id": { + "type": "string" + } + }, + "required": ["channel", "id", "kind", "session_id"], + "type": "object" + } + }, + "required": ["message"], + "type": "object" + } + } + }, + "description": "OK" + }, + "400": { + "content": { + "application/json": { + "schema": { + "properties": { + "error": { + "type": "string" + } + }, + "required": ["error"], + "type": "object" + } + } + }, + "description": "Invalid network send request" + }, + "404": { + "content": { + "application/json": { + "schema": { + "properties": { + "error": { + "type": "string" + } + }, + "required": ["error"], + "type": "object" + } + } + }, + "description": "Network target not found" + }, + "500": { + "content": { + "application/json": { + "schema": { + "properties": { + "error": { + "type": "string" + } + }, + "required": ["error"], + "type": "object" + } + } + }, + "description": "Internal server error" + }, + "503": { + "content": { + "application/json": { + "schema": { + "properties": { + "error": { + "type": "string" + } + }, + "required": ["error"], + "type": "object" + } + } + }, + "description": "Network runtime is not configured" + }, + "default": { + "description": "" + } + }, + "summary": "Send one network message", + "tags": ["network"], + "x-agh-transports": ["http", "uds"] + } + }, + "/api/network/status": { + "get": { + "operationId": "getNetworkStatus", + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "properties": { + "network": { + "properties": { + "channels": { + "type": "integer" + }, + "delivery_workers": { + "type": "integer" + }, + "enabled": { + "type": "boolean" + }, + "handoff_tagged_events": { + "format": "int64", + "type": "integer" + }, + "kind_metrics": { + "items": { + "properties": { + "delivered": { + "format": "int64", + "type": "integer" + }, + "kind": { + "type": "string" + }, + "received": { + "format": "int64", + "type": "integer" + }, + "rejected": { + "format": "int64", + "type": "integer" + }, + "sent": { + "format": "int64", + "type": "integer" + } + }, + "required": ["kind"], + "type": "object" + }, + "type": "array" + }, + "last_disconnect": { + "type": "string" + }, + "listener_host": { + "type": "string" + }, + "listener_port": { + "type": "integer" + }, + "local_peers": { + "type": "integer" + }, + "messages_delivered": { + "format": "int64", + "type": "integer" + }, + "messages_received": { + "format": "int64", + "type": "integer" + }, + "messages_rejected": { + "format": "int64", + "type": "integer" + }, + "messages_sent": { + "format": "int64", + "type": "integer" + }, + "queued_messages": { + "type": "integer" + }, + "queued_sessions": { + "type": "integer" + }, + "remote_peers": { + "type": "integer" + }, + "status": { + "type": "string" + }, + "workflow_tagged_events": { + "format": "int64", + "type": "integer" + } + }, + "required": ["enabled", "status"], + "type": "object" + } + }, + "required": ["network"], + "type": "object" + } + } + }, + "description": "OK" + }, + "500": { + "content": { + "application/json": { + "schema": { + "properties": { + "error": { + "type": "string" + } + }, + "required": ["error"], + "type": "object" + } + } + }, + "description": "Internal server error" + }, + "default": { + "description": "" + } + }, + "summary": "Get the network runtime status snapshot", + "tags": ["network"], + "x-agh-transports": ["http", "uds"] + } + }, + "/api/observe/events": { + "get": { + "operationId": "listObserveEvents", + "parameters": [ + { + "description": "Session id", + "in": "query", + "name": "session_id", + "schema": { + "type": "string" + } + }, + { + "description": "Agent name", + "in": "query", + "name": "agent_name", + "schema": { + "type": "string" + } + }, + { + "description": "Event type", + "in": "query", + "name": "type", + "schema": { + "type": "string" + } + }, + { + "description": "Only events emitted since this timestamp", + "in": "query", + "name": "since", + "schema": { + "format": "date-time", + "type": "string" + } + }, + { + "description": "Maximum number of records to return", + "in": "query", + "name": "limit", + "schema": { + "format": "int32", + "type": "integer" + } + } + ], + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "properties": { + "events": { + "items": { + "properties": { + "agent_name": { + "type": "string" + }, + "id": { + "type": "string" + }, + "session_id": { + "type": "string" + }, + "summary": { + "type": "string" + }, + "timestamp": { + "format": "date-time", + "type": "string" + }, + "type": { + "type": "string" + } + }, + "required": [ + "agent_name", + "id", + "session_id", + "timestamp", + "type" + ], + "type": "object" + }, + "type": "array" + } + }, + "required": ["events"], + "type": "object" + } + } + }, + "description": "OK" + }, + "400": { + "content": { + "application/json": { + "schema": { + "properties": { + "error": { + "type": "string" + } + }, + "required": ["error"], + "type": "object" + } + } + }, + "description": "Invalid filter" + }, + "500": { + "content": { + "application/json": { + "schema": { + "properties": { + "error": { + "type": "string" + } + }, + "required": ["error"], + "type": "object" + } + } + }, + "description": "Internal server error" + }, + "default": { + "description": "" + } + }, + "summary": "List observability events", + "tags": ["observe"], + "x-agh-transports": ["http", "uds"] + } + }, + "/api/observe/health": { + "get": { + "operationId": "getObserveHealth", + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "properties": { + "automation": { + "properties": { + "enabled": { + "type": "boolean" + }, + "jobs": { + "properties": { + "enabled": { + "type": "integer" + }, + "total": { + "type": "integer" + } + }, + "required": ["enabled", "total"], + "type": "object" + }, + "next_fire": { + "format": "date-time", + "nullable": true, + "type": "string" + }, + "scheduler_running": { + "type": "boolean" }, "triggers": { "properties": { @@ -10914,6 +12790,9 @@ { "name": "daemon" }, + { + "name": "network" + }, { "name": "extensions" }, diff --git a/sdk/examples/telegram-reference/extension.toml b/sdk/examples/telegram-reference/extension.toml index 169c88cb5..9b8527025 100644 --- a/sdk/examples/telegram-reference/extension.toml +++ b/sdk/examples/telegram-reference/extension.toml @@ -7,6 +7,10 @@ min_agh_version = "0.5.0" [capabilities] provides = ["bridge.adapter"] +[bridge] +platform = "telegram" +display_name = "Telegram" + [actions] requires = [ "bridges/messages/ingest", diff --git a/skills-lock.json b/skills-lock.json index c921cd26b..23bd7f8c9 100644 --- a/skills-lock.json +++ b/skills-lock.json @@ -61,6 +61,11 @@ "sourceType": "github", "computedHash": "8bc7a75020275e4ad1813cb4b616dc9e0c7624a6e71178f98341e69c39bd65f4" }, + "kb": { + "source": "compozy/kb", + "sourceType": "github", + "computedHash": "1950daed243399488ea990aa67b636ccb307997ebe071f484496869539594dcd" + }, "kodebase": { "source": "pedronauck/kodebase-go", "sourceType": "github", @@ -101,6 +106,11 @@ "sourceType": "github", "computedHash": "8aa98ed5bea287bf04295aec57b4fca25cf724069aaf58c8a66ac68bf791360d" }, + "systematic-qa": { + "source": "pedronauck/skills", + "sourceType": "github", + "computedHash": "cbc36a65128986475cb78c7a025b57abcde1383653af2616a40f59d546435295" + }, "tmux": { "source": "steipete/clawdis", "sourceType": "github", diff --git a/web/src/components/app-sidebar.test.tsx b/web/src/components/app-sidebar.test.tsx index 24243df39..46ea18dad 100644 --- a/web/src/components/app-sidebar.test.tsx +++ b/web/src/components/app-sidebar.test.tsx @@ -15,12 +15,14 @@ vi.mock("lucide-react", () => ({ Bot: () => bot, ChevronRight: () => chevron, Loader2: () => loader, + Network: () => network, PanelLeftClose: () => panel-close, PanelLeftOpen: () => panel-open, Plus: () => plus, Search: () => search, Settings: () => settings, Terminal: () => terminal, + Waypoints: () => waypoints, Wrench: () => wrench, })); @@ -253,6 +255,16 @@ describe("AppSidebar", () => { expect(screen.getByTestId("nav-knowledge")).toHaveAttribute("href", "/knowledge"); }); + it("renders Bridges nav item linking to /bridges", () => { + render(); + expect(screen.getByTestId("nav-bridges")).toHaveAttribute("href", "/bridges"); + }); + + it("renders Network nav item linking to /network", () => { + render(); + expect(screen.getByTestId("nav-network")).toHaveAttribute("href", "/network"); + }); + it("renders Automation nav item linking to /automation", () => { render(); expect(screen.getByTestId("nav-automation")).toHaveAttribute("href", "/automation"); @@ -279,6 +291,22 @@ describe("AppSidebar", () => { expect(indicator.className).toContain("bg-[color:var(--color-accent)]"); }); + it("shows active indicator on active Bridges nav", () => { + matchedRoute["/bridges"] = true; + render(); + const indicator = screen.getByTestId("nav-active-bridges"); + expect(indicator.className).toContain("w-[3px]"); + expect(indicator.className).toContain("bg-[color:var(--color-accent)]"); + }); + + it("shows active indicator on active Network nav", () => { + matchedRoute["/network"] = true; + render(); + const indicator = screen.getByTestId("nav-active-network"); + expect(indicator.className).toContain("w-[3px]"); + expect(indicator.className).toContain("bg-[color:var(--color-accent)]"); + }); + it("shows active indicator on active Skills nav", () => { matchedRoute["/skills"] = true; render(); @@ -290,6 +318,8 @@ describe("AppSidebar", () => { it("does not show active indicator when nav is not active", () => { render(); expect(screen.queryByTestId("nav-active-automation")).not.toBeInTheDocument(); + expect(screen.queryByTestId("nav-active-bridges")).not.toBeInTheDocument(); + expect(screen.queryByTestId("nav-active-network")).not.toBeInTheDocument(); expect(screen.queryByTestId("nav-active-knowledge")).not.toBeInTheDocument(); expect(screen.queryByTestId("nav-active-skills")).not.toBeInTheDocument(); }); diff --git a/web/src/components/app-sidebar.tsx b/web/src/components/app-sidebar.tsx index 448fb36c7..810ecabd7 100644 --- a/web/src/components/app-sidebar.tsx +++ b/web/src/components/app-sidebar.tsx @@ -4,12 +4,14 @@ import { Bot, ChevronRight, Loader2, + Network, PanelLeftClose, PanelLeftOpen, Plus, Search, Settings, Terminal, + Waypoints, Wrench, } from "lucide-react"; import { useMemo, type ReactNode } from "react"; @@ -371,6 +373,8 @@ function SidebarPanel({
} label="Automation" /> + } label="Bridges" /> + } label="Network" /> } label="Knowledge" /> } label="Skills" />
diff --git a/web/src/generated/agh-openapi.d.ts b/web/src/generated/agh-openapi.d.ts index e25751f23..df8f0f45a 100644 --- a/web/src/generated/agh-openapi.d.ts +++ b/web/src/generated/agh-openapi.d.ts @@ -215,6 +215,23 @@ export interface paths { patch?: never; trace?: never; }; + "/api/bridges/providers": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + /** List installed bridge-capable providers */ + get: operations["listBridgeProviders"]; + put?: never; + post?: never; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; "/api/bridges/{id}": { parameters: { query?: never; @@ -508,6 +525,143 @@ export interface paths { patch?: never; trace?: never; }; + "/api/network/channels": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + /** List materialized network channels */ + get: operations["listNetworkChannels"]; + put?: never; + /** Create a network channel by spawning agent sessions */ + post: operations["createNetworkChannel"]; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + "/api/network/channels/{channel}": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + /** Get one network channel detail */ + get: operations["getNetworkChannel"]; + put?: never; + post?: never; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + "/api/network/channels/{channel}/messages": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + /** List the read-only timeline for one network channel */ + get: operations["listNetworkChannelMessages"]; + put?: never; + post?: never; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + "/api/network/inbox": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + /** List queued network inbox messages for one local session */ + get: operations["listNetworkInbox"]; + put?: never; + post?: never; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + "/api/network/peers": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + /** List visible network peers */ + get: operations["listNetworkPeers"]; + put?: never; + post?: never; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + "/api/network/peers/{peer_id}": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + /** Get one visible network peer detail */ + get: operations["getNetworkPeer"]; + put?: never; + post?: never; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + "/api/network/send": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + get?: never; + put?: never; + /** Send one network message */ + post: operations["sendNetworkMessage"]; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + "/api/network/status": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + /** Get the network runtime status snapshot */ + get: operations["getNetworkStatus"]; + put?: never; + post?: never; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; "/api/observe/events": { parameters: { query?: never; @@ -2551,6 +2705,8 @@ export interface operations { last_error?: string; /** Format: date-time */ last_error_at?: string | null; + /** Format: date-time */ + last_success_at?: string | null; route_count: number; /** @enum {string} */ status: "auth_required" | "degraded" | "disabled" | "error" | "ready" | "starting"; @@ -2682,6 +2838,8 @@ export interface operations { last_error?: string; /** Format: date-time */ last_error_at?: string | null; + /** Format: date-time */ + last_success_at?: string | null; route_count: number; /** @enum {string} */ status: "auth_required" | "degraded" | "disabled" | "error" | "ready" | "starting"; @@ -2741,6 +2899,65 @@ export interface operations { }; }; }; + listBridgeProviders: { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description OK */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + providers: { + description?: string; + display_name: string; + enabled: boolean; + extension_name: string; + health: string; + health_message?: string; + platform: string; + state: string; + }[]; + }; + }; + }; + /** @description Internal server error */ + 500: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + error: string; + }; + }; + }; + /** @description Bridge service is not configured */ + 503: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + error: string; + }; + }; + }; + default: { + headers: { + [name: string]: unknown; + }; + content?: never; + }; + }; + }; getBridge: { parameters: { query?: never; @@ -2794,6 +3011,8 @@ export interface operations { last_error?: string; /** Format: date-time */ last_error_at?: string | null; + /** Format: date-time */ + last_success_at?: string | null; route_count: number; /** @enum {string} */ status: "auth_required" | "degraded" | "disabled" | "error" | "ready" | "starting"; @@ -2908,6 +3127,8 @@ export interface operations { last_error?: string; /** Format: date-time */ last_error_at?: string | null; + /** Format: date-time */ + last_success_at?: string | null; route_count: number; /** @enum {string} */ status: "auth_required" | "degraded" | "disabled" | "error" | "ready" | "starting"; @@ -3020,6 +3241,8 @@ export interface operations { last_error?: string; /** Format: date-time */ last_error_at?: string | null; + /** Format: date-time */ + last_success_at?: string | null; route_count: number; /** @enum {string} */ status: "auth_required" | "degraded" | "disabled" | "error" | "ready" | "starting"; @@ -3132,6 +3355,8 @@ export interface operations { last_error?: string; /** Format: date-time */ last_error_at?: string | null; + /** Format: date-time */ + last_success_at?: string | null; route_count: number; /** @enum {string} */ status: "auth_required" | "degraded" | "disabled" | "error" | "ready" | "starting"; @@ -3244,6 +3469,8 @@ export interface operations { last_error?: string; /** Format: date-time */ last_error_at?: string | null; + /** Format: date-time */ + last_success_at?: string | null; route_count: number; /** @enum {string} */ status: "auth_required" | "degraded" | "disabled" | "error" | "ready" | "starting"; @@ -4653,6 +4880,905 @@ export interface operations { }; }; }; + listNetworkChannels: { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description OK */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + channels: { + channel: string; + /** Format: date-time */ + last_message_at?: string | null; + local_peer_count?: number; + message_count?: number; + peer_count: number; + remote_peer_count?: number; + session_count?: number; + }[]; + }; + }; + }; + /** @description Internal server error */ + 500: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + error: string; + }; + }; + }; + /** @description Network runtime is not configured */ + 503: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + error: string; + }; + }; + }; + default: { + headers: { + [name: string]: unknown; + }; + content?: never; + }; + }; + }; + createNetworkChannel: { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + /** @description JSON request body */ + requestBody: { + content: { + "application/json": { + agent_names: string[]; + channel: string; + workspace_id: string; + }; + }; + }; + responses: { + /** @description Created */ + 201: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + channel: { + channel: string; + /** Format: date-time */ + last_message_at?: string | null; + local_peer_count?: number; + message_count?: number; + peer_count: number; + peers?: { + channel: string; + display_name?: string; + /** Format: date-time */ + expires_at?: string | null; + /** Format: date-time */ + joined_at?: string | null; + /** Format: date-time */ + last_seen?: string | null; + local: boolean; + peer_card: { + artifacts_supported: string[]; + capabilities: string[]; + display_name?: string | null; + ext?: { + [key: string]: unknown; + }; + peer_id: string; + profiles_supported: string[]; + trust_modes_supported: string[]; + }; + peer_id: string; + session_id?: string | null; + }[]; + remote_peer_count?: number; + session_count?: number; + sessions?: { + acp_caps?: { + supported_models?: string[]; + supported_modes?: string[]; + supports_load_session: boolean; + } | null; + acp_session_id?: string; + agent_name: string; + channel?: string; + /** Format: date-time */ + created_at: string; + id: string; + name?: string; + /** @enum {string} */ + state: "starting" | "active" | "stopping" | "stopped"; + stop_detail?: string; + /** @enum {string} */ + stop_reason?: + | "completed" + | "user_canceled" + | "max_iterations" + | "loop_detected" + | "timeout" + | "budget_exceeded" + | "error" + | "agent_crashed" + | "hook_stopped" + | "shutdown"; + /** Format: date-time */ + updated_at: string; + workspace_id?: string; + workspace_path?: string; + }[]; + }; + }; + }; + }; + /** @description Invalid network channel request */ + 400: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + error: string; + }; + }; + }; + /** @description Workspace not found */ + 404: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + error: string; + }; + }; + }; + /** @description Internal server error */ + 500: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + error: string; + }; + }; + }; + /** @description Network runtime is not configured */ + 503: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + error: string; + }; + }; + }; + default: { + headers: { + [name: string]: unknown; + }; + content?: never; + }; + }; + }; + getNetworkChannel: { + parameters: { + query?: never; + header?: never; + path: { + /** @description Network channel */ + channel: string; + }; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description OK */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + channel: { + channel: string; + /** Format: date-time */ + last_message_at?: string | null; + local_peer_count?: number; + message_count?: number; + peer_count: number; + peers?: { + channel: string; + display_name?: string; + /** Format: date-time */ + expires_at?: string | null; + /** Format: date-time */ + joined_at?: string | null; + /** Format: date-time */ + last_seen?: string | null; + local: boolean; + peer_card: { + artifacts_supported: string[]; + capabilities: string[]; + display_name?: string | null; + ext?: { + [key: string]: unknown; + }; + peer_id: string; + profiles_supported: string[]; + trust_modes_supported: string[]; + }; + peer_id: string; + session_id?: string | null; + }[]; + remote_peer_count?: number; + session_count?: number; + sessions?: { + acp_caps?: { + supported_models?: string[]; + supported_modes?: string[]; + supports_load_session: boolean; + } | null; + acp_session_id?: string; + agent_name: string; + channel?: string; + /** Format: date-time */ + created_at: string; + id: string; + name?: string; + /** @enum {string} */ + state: "starting" | "active" | "stopping" | "stopped"; + stop_detail?: string; + /** @enum {string} */ + stop_reason?: + | "completed" + | "user_canceled" + | "max_iterations" + | "loop_detected" + | "timeout" + | "budget_exceeded" + | "error" + | "agent_crashed" + | "hook_stopped" + | "shutdown"; + /** Format: date-time */ + updated_at: string; + workspace_id?: string; + workspace_path?: string; + }[]; + }; + }; + }; + }; + /** @description Invalid network channel */ + 400: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + error: string; + }; + }; + }; + /** @description Network channel not found */ + 404: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + error: string; + }; + }; + }; + /** @description Internal server error */ + 500: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + error: string; + }; + }; + }; + /** @description Network runtime is not configured */ + 503: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + error: string; + }; + }; + }; + default: { + headers: { + [name: string]: unknown; + }; + content?: never; + }; + }; + }; + listNetworkChannelMessages: { + parameters: { + query?: { + /** @description Maximum number of timeline messages to return */ + limit?: number; + }; + header?: never; + path: { + /** @description Network channel */ + channel: string; + }; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description OK */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + messages: { + channel: string; + display_name?: string; + intent?: string; + local?: boolean; + message_id: string; + peer_id: string; + session_id?: string; + text: string; + /** Format: date-time */ + timestamp: string; + }[]; + }; + }; + }; + /** @description Invalid network channel */ + 400: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + error: string; + }; + }; + }; + /** @description Network channel not found */ + 404: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + error: string; + }; + }; + }; + /** @description Internal server error */ + 500: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + error: string; + }; + }; + }; + /** @description Network runtime is not configured */ + 503: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + error: string; + }; + }; + }; + default: { + headers: { + [name: string]: unknown; + }; + content?: never; + }; + }; + }; + listNetworkInbox: { + parameters: { + query: { + /** @description Target local session id */ + session_id: string; + }; + header?: never; + path?: never; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description OK */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + messages: { + body: unknown; + causation_id?: string | null; + channel: string; + /** Format: int64 */ + expires_at?: number | null; + ext?: { + [key: string]: unknown; + }; + from: string; + id: string; + interaction_id?: string | null; + kind: string; + proof?: { + [key: string]: unknown; + }; + protocol: string; + reply_to?: string | null; + to?: string | null; + trace_id?: string | null; + /** Format: int64 */ + ts: number; + }[]; + }; + }; + }; + /** @description Invalid inbox request */ + 400: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + error: string; + }; + }; + }; + /** @description Network target not found */ + 404: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + error: string; + }; + }; + }; + /** @description Internal server error */ + 500: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + error: string; + }; + }; + }; + /** @description Network runtime is not configured */ + 503: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + error: string; + }; + }; + }; + default: { + headers: { + [name: string]: unknown; + }; + content?: never; + }; + }; + }; + listNetworkPeers: { + parameters: { + query?: { + /** @description Filter peers by channel */ + channel?: string; + }; + header?: never; + path?: never; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description OK */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + peers: { + channel: string; + display_name?: string; + /** Format: date-time */ + expires_at?: string | null; + /** Format: date-time */ + joined_at?: string | null; + /** Format: date-time */ + last_seen?: string | null; + local: boolean; + peer_card: { + artifacts_supported: string[]; + capabilities: string[]; + display_name?: string | null; + ext?: { + [key: string]: unknown; + }; + peer_id: string; + profiles_supported: string[]; + trust_modes_supported: string[]; + }; + peer_id: string; + session_id?: string | null; + }[]; + }; + }; + }; + /** @description Invalid network filter */ + 400: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + error: string; + }; + }; + }; + /** @description Internal server error */ + 500: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + error: string; + }; + }; + }; + /** @description Network runtime is not configured */ + 503: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + error: string; + }; + }; + }; + default: { + headers: { + [name: string]: unknown; + }; + content?: never; + }; + }; + }; + getNetworkPeer: { + parameters: { + query?: never; + header?: never; + path: { + /** @description Network peer id */ + peer_id: string; + }; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description OK */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + peer: { + channel?: string; + display_name?: string; + /** Format: date-time */ + expires_at?: string | null; + /** Format: date-time */ + joined_at?: string | null; + /** Format: date-time */ + last_seen?: string | null; + local?: boolean; + metrics: { + /** Format: int64 */ + delivered?: number; + /** Format: int64 */ + received?: number; + /** Format: int64 */ + rejected?: number; + /** Format: int64 */ + sent?: number; + }; + peer_card: { + artifacts_supported: string[]; + capabilities: string[]; + display_name?: string | null; + ext?: { + [key: string]: unknown; + }; + peer_id: string; + profiles_supported: string[]; + trust_modes_supported: string[]; + }; + peer_id: string; + session_id?: string | null; + }; + }; + }; + }; + /** @description Network peer not found */ + 404: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + error: string; + }; + }; + }; + /** @description Internal server error */ + 500: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + error: string; + }; + }; + }; + /** @description Network runtime is not configured */ + 503: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + error: string; + }; + }; + }; + default: { + headers: { + [name: string]: unknown; + }; + content?: never; + }; + }; + }; + sendNetworkMessage: { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + /** @description JSON request body */ + requestBody: { + content: { + "application/json": { + body: unknown; + causation_id?: string; + channel: string; + /** Format: int64 */ + expires_at?: number | null; + ext?: { + [key: string]: unknown; + }; + id?: string; + interaction_id?: string; + kind: string; + reply_to?: string; + session_id: string; + to?: string; + trace_id?: string; + }; + }; + }; + responses: { + /** @description OK */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + message: { + causation_id?: string; + channel: string; + /** Format: int64 */ + expires_at?: number | null; + ext?: { + [key: string]: unknown; + }; + id: string; + interaction_id?: string; + kind: string; + reply_to?: string; + session_id: string; + to?: string; + trace_id?: string; + }; + }; + }; + }; + /** @description Invalid network send request */ + 400: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + error: string; + }; + }; + }; + /** @description Network target not found */ + 404: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + error: string; + }; + }; + }; + /** @description Internal server error */ + 500: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + error: string; + }; + }; + }; + /** @description Network runtime is not configured */ + 503: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + error: string; + }; + }; + }; + default: { + headers: { + [name: string]: unknown; + }; + content?: never; + }; + }; + }; + getNetworkStatus: { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description OK */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + network: { + channels?: number; + delivery_workers?: number; + enabled: boolean; + /** Format: int64 */ + handoff_tagged_events?: number; + kind_metrics?: { + /** Format: int64 */ + delivered?: number; + kind: string; + /** Format: int64 */ + received?: number; + /** Format: int64 */ + rejected?: number; + /** Format: int64 */ + sent?: number; + }[]; + last_disconnect?: string; + listener_host?: string; + listener_port?: number; + local_peers?: number; + /** Format: int64 */ + messages_delivered?: number; + /** Format: int64 */ + messages_received?: number; + /** Format: int64 */ + messages_rejected?: number; + /** Format: int64 */ + messages_sent?: number; + queued_messages?: number; + queued_sessions?: number; + remote_peers?: number; + status: string; + /** Format: int64 */ + workflow_tagged_events?: number; + }; + }; + }; + }; + /** @description Internal server error */ + 500: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + error: string; + }; + }; + }; + default: { + headers: { + [name: string]: unknown; + }; + content?: never; + }; + }; + }; listObserveEvents: { parameters: { query?: { diff --git a/web/src/routeTree.gen.ts b/web/src/routeTree.gen.ts index c35bb710b..b5d6d4e62 100644 --- a/web/src/routeTree.gen.ts +++ b/web/src/routeTree.gen.ts @@ -13,7 +13,9 @@ import { Route as DesignSystemRouteImport } from './routes/design-system' import { Route as AppRouteImport } from './routes/_app' import { Route as AppIndexRouteImport } from './routes/_app/index' import { Route as AppSkillsRouteImport } from './routes/_app/skills' +import { Route as AppNetworkRouteImport } from './routes/_app/network' import { Route as AppKnowledgeRouteImport } from './routes/_app/knowledge' +import { Route as AppBridgesRouteImport } from './routes/_app/bridges' import { Route as AppAutomationRouteImport } from './routes/_app/automation' import { Route as AppSessionIdRouteImport } from './routes/_app/session.$id' @@ -36,11 +38,21 @@ const AppSkillsRoute = AppSkillsRouteImport.update({ path: '/skills', getParentRoute: () => AppRoute, } as any) +const AppNetworkRoute = AppNetworkRouteImport.update({ + id: '/network', + path: '/network', + getParentRoute: () => AppRoute, +} as any) const AppKnowledgeRoute = AppKnowledgeRouteImport.update({ id: '/knowledge', path: '/knowledge', getParentRoute: () => AppRoute, } as any) +const AppBridgesRoute = AppBridgesRouteImport.update({ + id: '/bridges', + path: '/bridges', + getParentRoute: () => AppRoute, +} as any) const AppAutomationRoute = AppAutomationRouteImport.update({ id: '/automation', path: '/automation', @@ -56,14 +68,18 @@ export interface FileRoutesByFullPath { '/': typeof AppIndexRoute '/design-system': typeof DesignSystemRoute '/automation': typeof AppAutomationRoute + '/bridges': typeof AppBridgesRoute '/knowledge': typeof AppKnowledgeRoute + '/network': typeof AppNetworkRoute '/skills': typeof AppSkillsRoute '/session/$id': typeof AppSessionIdRoute } export interface FileRoutesByTo { '/design-system': typeof DesignSystemRoute '/automation': typeof AppAutomationRoute + '/bridges': typeof AppBridgesRoute '/knowledge': typeof AppKnowledgeRoute + '/network': typeof AppNetworkRoute '/skills': typeof AppSkillsRoute '/': typeof AppIndexRoute '/session/$id': typeof AppSessionIdRoute @@ -73,7 +89,9 @@ export interface FileRoutesById { '/_app': typeof AppRouteWithChildren '/design-system': typeof DesignSystemRoute '/_app/automation': typeof AppAutomationRoute + '/_app/bridges': typeof AppBridgesRoute '/_app/knowledge': typeof AppKnowledgeRoute + '/_app/network': typeof AppNetworkRoute '/_app/skills': typeof AppSkillsRoute '/_app/': typeof AppIndexRoute '/_app/session/$id': typeof AppSessionIdRoute @@ -84,14 +102,18 @@ export interface FileRouteTypes { | '/' | '/design-system' | '/automation' + | '/bridges' | '/knowledge' + | '/network' | '/skills' | '/session/$id' fileRoutesByTo: FileRoutesByTo to: | '/design-system' | '/automation' + | '/bridges' | '/knowledge' + | '/network' | '/skills' | '/' | '/session/$id' @@ -100,7 +122,9 @@ export interface FileRouteTypes { | '/_app' | '/design-system' | '/_app/automation' + | '/_app/bridges' | '/_app/knowledge' + | '/_app/network' | '/_app/skills' | '/_app/' | '/_app/session/$id' @@ -141,6 +165,13 @@ declare module '@tanstack/react-router' { preLoaderRoute: typeof AppSkillsRouteImport parentRoute: typeof AppRoute } + '/_app/network': { + id: '/_app/network' + path: '/network' + fullPath: '/network' + preLoaderRoute: typeof AppNetworkRouteImport + parentRoute: typeof AppRoute + } '/_app/knowledge': { id: '/_app/knowledge' path: '/knowledge' @@ -148,6 +179,13 @@ declare module '@tanstack/react-router' { preLoaderRoute: typeof AppKnowledgeRouteImport parentRoute: typeof AppRoute } + '/_app/bridges': { + id: '/_app/bridges' + path: '/bridges' + fullPath: '/bridges' + preLoaderRoute: typeof AppBridgesRouteImport + parentRoute: typeof AppRoute + } '/_app/automation': { id: '/_app/automation' path: '/automation' @@ -167,7 +205,9 @@ declare module '@tanstack/react-router' { interface AppRouteChildren { AppAutomationRoute: typeof AppAutomationRoute + AppBridgesRoute: typeof AppBridgesRoute AppKnowledgeRoute: typeof AppKnowledgeRoute + AppNetworkRoute: typeof AppNetworkRoute AppSkillsRoute: typeof AppSkillsRoute AppIndexRoute: typeof AppIndexRoute AppSessionIdRoute: typeof AppSessionIdRoute @@ -175,7 +215,9 @@ interface AppRouteChildren { const AppRouteChildren: AppRouteChildren = { AppAutomationRoute: AppAutomationRoute, + AppBridgesRoute: AppBridgesRoute, AppKnowledgeRoute: AppKnowledgeRoute, + AppNetworkRoute: AppNetworkRoute, AppSkillsRoute: AppSkillsRoute, AppIndexRoute: AppIndexRoute, AppSessionIdRoute: AppSessionIdRoute, diff --git a/web/src/routes/_app/-automation.integration.test.tsx b/web/src/routes/_app/-automation.integration.test.tsx index cae797826..b64f0466d 100644 --- a/web/src/routes/_app/-automation.integration.test.tsx +++ b/web/src/routes/_app/-automation.integration.test.tsx @@ -1,9 +1,16 @@ -import { render, screen, within } from "@testing-library/react"; +import { fireEvent, render, screen, waitFor, within } from "@testing-library/react"; import userEvent from "@testing-library/user-event"; import { beforeEach, describe, expect, it, vi } from "vitest"; import type { AutomationJob, AutomationRun, AutomationTrigger } from "@/systems/automation"; +const { toast } = vi.hoisted(() => ({ + toast: { + error: vi.fn(), + success: vi.fn(), + }, +})); + let mockJobs: AutomationJob[] = []; let mockJobsLoading = false; let mockJobsError: Error | null = null; @@ -50,32 +57,36 @@ vi.mock("@tanstack/react-router", () => ({ }), })); +vi.mock("sonner", () => ({ + toast, +})); + vi.mock("@/systems/workspace", () => ({ useActiveWorkspace: () => ({ workspaces: [ { - id: "ws_test", - root_dir: "/workspace", add_dirs: [], - name: "test-workspace", created_at: "2026-04-03T12:00:00Z", + id: "ws_test", + name: "test-workspace", + root_dir: "/workspace", updated_at: "2026-04-03T12:00:00Z", }, ], hasWorkspaces: true, activeWorkspace: { - id: "ws_test", - root_dir: "/workspace", add_dirs: [], - name: "test-workspace", created_at: "2026-04-03T12:00:00Z", + id: "ws_test", + name: "test-workspace", + root_dir: "/workspace", updated_at: "2026-04-03T12:00:00Z", }, activeWorkspaceId: "ws_test", - setActiveWorkspaceId: vi.fn(), clearActiveWorkspaceSelection: vi.fn(), - isLoading: false, isError: false, + isLoading: false, + setActiveWorkspaceId: vi.fn(), }), })); @@ -85,61 +96,61 @@ vi.mock("@/systems/automation", async () => { ...actual, useAutomationJobs: () => ({ data: mockJobs, - isLoading: mockJobsLoading, error: mockJobsError, + isLoading: mockJobsLoading, }), useAutomationTriggers: () => ({ data: mockTriggers, - isLoading: mockTriggersLoading, error: mockTriggersError, + isLoading: mockTriggersLoading, }), useAutomationJob: () => ({ data: mockJobDetail, - isLoading: mockJobDetailLoading, error: mockJobDetailError, + isLoading: mockJobDetailLoading, }), useAutomationTrigger: () => ({ data: mockTriggerDetail, - isLoading: mockTriggerDetailLoading, error: mockTriggerDetailError, + isLoading: mockTriggerDetailLoading, }), useAutomationJobRuns: () => ({ data: mockJobRuns, - isLoading: mockJobRunsLoading, error: mockJobRunsError, + isLoading: mockJobRunsLoading, }), useAutomationTriggerRuns: () => ({ data: mockTriggerRuns, - isLoading: mockTriggerRunsLoading, error: mockTriggerRunsError, + isLoading: mockTriggerRunsLoading, }), useCreateAutomationJob: () => ({ - mutateAsync: mockCreateJobMutateAsync, isPending: mockCreateJobPending, + mutateAsync: mockCreateJobMutateAsync, }), useUpdateAutomationJob: () => ({ - mutateAsync: mockUpdateJobMutateAsync, isPending: mockUpdateJobPending, + mutateAsync: mockUpdateJobMutateAsync, }), useDeleteAutomationJob: () => ({ - mutateAsync: mockDeleteJobMutateAsync, isPending: mockDeleteJobPending, + mutateAsync: mockDeleteJobMutateAsync, }), useTriggerAutomationJob: () => ({ - mutateAsync: mockTriggerJobMutateAsync, isPending: mockTriggerJobPending, + mutateAsync: mockTriggerJobMutateAsync, }), useCreateAutomationTrigger: () => ({ - mutateAsync: mockCreateTriggerMutateAsync, isPending: mockCreateTriggerPending, + mutateAsync: mockCreateTriggerMutateAsync, }), useUpdateAutomationTrigger: () => ({ - mutateAsync: mockUpdateTriggerMutateAsync, isPending: mockUpdateTriggerPending, + mutateAsync: mockUpdateTriggerMutateAsync, }), useDeleteAutomationTrigger: () => ({ - mutateAsync: mockDeleteTriggerMutateAsync, isPending: mockDeleteTriggerPending, + mutateAsync: mockDeleteTriggerMutateAsync, }), }; }); @@ -148,64 +159,64 @@ import { Route } from "./automation"; function makeJob(overrides: Partial = {}): AutomationJob { return { + agent_name: "reviewer", + created_at: "2026-04-11T09:00:00Z", + enabled: true, + fire_limit: { max: 12, window: "1h" }, id: "job_daily_review", name: "daily-review", - agent_name: "reviewer", + next_run: "2026-04-12T09:00:00Z", prompt: "Review recent changes.", + retry: { strategy: "none", max_retries: 3, base_delay: "2s" }, + schedule: { mode: "cron", expr: "0 9 * * *" }, scope: "workspace", - workspace_id: "ws_test", source: "dynamic", - enabled: true, - schedule: { mode: "cron", expr: "0 9 * * *" }, - retry: { strategy: "none", max_retries: 3, base_delay: "2s" }, - fire_limit: { max: 12, window: "1h" }, - next_run: "2026-04-12T09:00:00Z", - created_at: "2026-04-11T09:00:00Z", updated_at: "2026-04-11T09:05:00Z", + workspace_id: "ws_test", ...overrides, }; } function makeTrigger(overrides: Partial = {}): AutomationTrigger { return { - id: "trg_push_review", - name: "push-review", agent_name: "reviewer", - prompt: "Review push event {{ .Data.branch }}.", + created_at: "2026-04-11T08:00:00Z", + enabled: true, + endpoint_slug: "push-review", event: "ext.github.push", filter: { "data.branch": "main" }, + fire_limit: { max: 12, window: "1h" }, + id: "trg_push_review", + name: "push-review", + prompt: "Review push event {{ .Data.branch }}.", + retry: { strategy: "backoff", max_retries: 4, base_delay: "5s" }, scope: "workspace", - workspace_id: "ws_test", source: "dynamic", - enabled: true, - retry: { strategy: "backoff", max_retries: 4, base_delay: "5s" }, - fire_limit: { max: 12, window: "1h" }, - endpoint_slug: "push-review", - webhook_id: "wbh_push_review", - created_at: "2026-04-11T08:00:00Z", updated_at: "2026-04-11T08:10:00Z", + webhook_id: "wbh_push_review", + workspace_id: "ws_test", ...overrides, }; } function makeRun(overrides: Partial = {}): AutomationRun { return { - id: "run_001", - status: "completed", attempt: 1, + ended_at: "2026-04-11T10:05:00Z", + id: "run_001", job_id: "job_daily_review", session_id: "sess_001", started_at: "2026-04-11T10:00:00Z", - ended_at: "2026-04-11T10:05:00Z", + status: "completed", ...overrides, }; } -// eslint-disable-next-line @typescript-eslint/no-explicit-any -const AutomationPage = (Route as any).component as () => React.ReactNode; +const AutomationPage = (Route as unknown as { component: () => React.ReactNode }).component; describe("Automation route integration", () => { beforeEach(() => { + vi.useRealTimers(); mockJobs = [makeJob()]; mockJobsLoading = false; mockJobsError = null; @@ -222,7 +233,7 @@ describe("Automation route integration", () => { mockJobRunsLoading = false; mockJobRunsError = null; mockTriggerRuns = [ - makeRun({ id: "run_trigger", trigger_id: "trg_push_review", job_id: undefined }), + makeRun({ id: "run_trigger", job_id: undefined, trigger_id: "trg_push_review" }), ]; mockTriggerRunsLoading = false; mockTriggerRunsError = null; @@ -242,16 +253,21 @@ describe("Automation route integration", () => { mockCreateTriggerMutateAsync.mockReset(); mockUpdateTriggerMutateAsync.mockReset(); mockDeleteTriggerMutateAsync.mockReset(); + toast.success.mockReset(); + toast.error.mockReset(); mockCreateJobMutateAsync.mockResolvedValue( makeJob({ id: "job_created", name: "nightly-docs" }) ); + mockCreateTriggerMutateAsync.mockResolvedValue( + makeTrigger({ id: "trg_created", name: "qa-trigger-browser", event: "ext.test.qa" }) + ); mockTriggerJobMutateAsync.mockResolvedValue( makeRun({ + ended_at: undefined, id: "run_queued", - status: "running", started_at: "2026-04-11T11:00:00Z", - ended_at: undefined, + status: "running", }) ); }); @@ -271,21 +287,21 @@ describe("Automation route integration", () => { expect(screen.getByTestId("automation-error")).toHaveTextContent("boom"); }); - it("renders the jobs list, detail pane, and run history from mocked API-backed hooks", () => { + it("renders the jobs list, schedule detail, and run history from mocked hooks", () => { render(); const detailPanel = screen.getByTestId("automation-detail-panel"); expect(screen.getByText("Automation")).toBeInTheDocument(); expect(screen.getByTestId("automation-list-panel")).toBeInTheDocument(); - expect(detailPanel).toBeInTheDocument(); expect(screen.getByTestId("automation-item-job_daily_review")).toBeInTheDocument(); expect(within(detailPanel).getByText("daily-review")).toBeInTheDocument(); expect(within(detailPanel).getByText("Review recent changes.")).toBeInTheDocument(); + expect(within(detailPanel).getByText("0 9 * * *")).toBeInTheDocument(); expect(screen.getByTestId("automation-run-run_001")).toBeInTheDocument(); }); - it("switches to trigger management and shows trigger detail content", async () => { + it("switches to trigger management and shows trigger activation content", async () => { const user = userEvent.setup(); render(); @@ -295,31 +311,139 @@ describe("Automation route integration", () => { expect(screen.getByTestId("automation-item-trg_push_review")).toBeInTheDocument(); expect(within(detailPanel).getByRole("heading", { name: "push-review" })).toBeInTheDocument(); - expect(within(detailPanel).getByText("ext.github.push", { selector: "p" })).toBeInTheDocument(); + expect(within(detailPanel).getAllByText("ext.github.push")).toHaveLength(2); + expect(within(detailPanel).getByText("Dispatches to")).toBeInTheDocument(); }); - it("submits a workspace-scoped job create payload using the active workspace id", async () => { + it("opens a create job modal and submits a workspace-scoped payload", async () => { const user = userEvent.setup(); render(); await user.click(screen.getByTestId("create-automation-btn")); - await user.type(screen.getByTestId("job-name-input"), "nightly-docs"); - await user.type(screen.getByTestId("job-agent-input"), "writer"); - await user.type( - screen.getByTestId("job-prompt-input"), - "Summarize docs changes and publish a digest." - ); + + expect(screen.getByTestId("automation-job-form")).toBeInTheDocument(); + + fireEvent.change(screen.getByTestId("job-name-input"), { + target: { value: "nightly-docs" }, + }); + fireEvent.change(screen.getByTestId("job-agent-input"), { + target: { value: "writer" }, + }); + fireEvent.change(screen.getByTestId("job-prompt-input"), { + target: { value: "Summarize docs changes and publish a digest." }, + }); await user.click(screen.getByTestId("submit-job-form")); - expect(mockCreateJobMutateAsync).toHaveBeenCalledWith( - expect.objectContaining({ - scope: "workspace", - workspace_id: "ws_test", - name: "nightly-docs", - agent_name: "writer", - }) + await waitFor(() => { + expect(mockCreateJobMutateAsync).toHaveBeenCalledWith( + expect.objectContaining({ + agent_name: "writer", + name: "nightly-docs", + retry: { strategy: "none", max_retries: 0, base_delay: "" }, + scope: "workspace", + workspace_id: "ws_test", + }) + ); + expect(toast.success).toHaveBeenCalledWith("Created job nightly-docs."); + }); + }); + + it("opens a create trigger modal and submits a valid retry-none payload", async () => { + const user = userEvent.setup(); + render(); + + await user.click(screen.getByTestId("automation-kind-triggers")); + await user.click(screen.getByTestId("create-automation-btn")); + + fireEvent.change(screen.getByTestId("trigger-name-input"), { + target: { value: "qa-trigger-browser" }, + }); + fireEvent.change(screen.getByTestId("trigger-agent-input"), { + target: { value: "reviewer" }, + }); + fireEvent.change(screen.getByTestId("trigger-event-input"), { + target: { value: "ext.test.qa" }, + }); + fireEvent.change(screen.getByTestId("trigger-prompt-input"), { + target: { value: "Review {{ .EventName }}." }, + }); + + await user.click(screen.getByTestId("submit-trigger-form")); + + await waitFor(() => { + expect(mockCreateTriggerMutateAsync).toHaveBeenCalledWith( + expect.objectContaining({ + agent_name: "reviewer", + event: "ext.test.qa", + name: "qa-trigger-browser", + retry: { strategy: "none", max_retries: 0, base_delay: "" }, + scope: "workspace", + workspace_id: "ws_test", + }) + ); + expect(toast.success).toHaveBeenCalledWith("Created trigger qa-trigger-browser."); + }); + }); + + it("uses the original job id when the visible selection changes during edit", async () => { + const user = userEvent.setup(); + mockUpdateJobMutateAsync.mockResolvedValue( + makeJob({ id: "job_daily_review", name: "daily-review-updated" }) ); - expect(await screen.findByText("Created job nightly-docs.")).toBeInTheDocument(); + + const { rerender } = render(); + + await user.click(screen.getByTestId("edit-automation-btn")); + fireEvent.change(screen.getByTestId("job-name-input"), { + target: { value: "daily-review-updated" }, + }); + + mockJobs = [ + makeJob({ + id: "job_release_notes", + name: "release-notes", + prompt: "Review the release notes.", + }), + ]; + rerender(); + + await user.click(screen.getByTestId("submit-job-form")); + + await waitFor(() => { + expect(mockUpdateJobMutateAsync).toHaveBeenCalledWith({ + data: expect.objectContaining({ name: "daily-review-updated" }), + id: "job_daily_review", + }); + }); + }); + + it("renders the no-runs state when the selected job has not executed yet", () => { + mockJobRuns = []; + + render(); + + expect(screen.getByText("No runs recorded yet")).toBeInTheDocument(); + expect( + screen.getByText("Runs will appear here after the first scheduled or manual execution.") + ).toBeInTheDocument(); + }); + + it("renders jobs and triggers empty states when no automation exists", async () => { + const user = userEvent.setup(); + mockJobs = []; + mockJobDetail = undefined; + mockJobRuns = []; + mockTriggers = []; + mockTriggerDetail = undefined; + mockTriggerRuns = []; + + render(); + + expect(screen.getByText("No jobs configured")).toBeInTheDocument(); + + await user.click(screen.getByTestId("automation-kind-triggers")); + + expect(screen.getByText("No triggers configured")).toBeInTheDocument(); }); it("queues a manual run and prepends it to run history", async () => { @@ -328,8 +452,10 @@ describe("Automation route integration", () => { await user.click(screen.getByTestId("trigger-job-btn")); - expect(mockTriggerJobMutateAsync).toHaveBeenCalledWith({ id: "job_daily_review" }); - expect(await screen.findByText("Queued run run_queued.")).toBeInTheDocument(); - expect(screen.getByTestId("automation-run-run_queued")).toBeInTheDocument(); + await waitFor(() => { + expect(mockTriggerJobMutateAsync).toHaveBeenCalledWith({ id: "job_daily_review" }); + expect(toast.success).toHaveBeenCalledWith("Queued run run_queued."); + expect(screen.getByTestId("automation-run-run_queued")).toBeInTheDocument(); + }); }); }); diff --git a/web/src/routes/_app/-bridges.test.tsx b/web/src/routes/_app/-bridges.test.tsx new file mode 100644 index 000000000..8d8666e7a --- /dev/null +++ b/web/src/routes/_app/-bridges.test.tsx @@ -0,0 +1,389 @@ +import { fireEvent, render, screen, waitFor, within } from "@testing-library/react"; +import userEvent from "@testing-library/user-event"; +import { beforeEach, describe, expect, it, vi } from "vitest"; + +import type { + BridgeDetailResponse, + BridgeProvider, + BridgeRoute, + BridgesListResponse, + CreateBridgeResponse, + TestBridgeDeliveryResponse, +} from "@/systems/bridges"; + +const { toast } = vi.hoisted(() => ({ + toast: { + error: vi.fn(), + success: vi.fn(), + }, +})); + +let mockBridgesData: BridgesListResponse | undefined; +let mockBridgesLoading = false; +let mockBridgesError: Error | null = null; + +let mockProvidersData: BridgeProvider[] | undefined; +let mockProvidersLoading = false; +let mockProvidersError: Error | null = null; + +let mockBridgeDetail: BridgeDetailResponse | undefined; +let mockBridgeDetailLoading = false; +let mockBridgeDetailError: Error | null = null; + +let mockBridgeRoutes: BridgeRoute[] | undefined; +let mockBridgeRoutesLoading = false; +let mockBridgeRoutesError: Error | null = null; + +const mockCreateBridgeMutateAsync = vi.fn(); +const mockTestDeliveryMutateAsync = vi.fn(); +let mockCreateBridgePending = false; +let mockTestDeliveryPending = false; + +let mockActiveWorkspaceId: string | null = "ws_test"; +let mockActiveWorkspaceName = "test-workspace"; + +vi.mock("@tanstack/react-router", () => ({ + createFileRoute: () => (opts: { component: () => React.ReactNode }) => ({ + component: opts.component, + }), +})); + +vi.mock("sonner", () => ({ + toast, +})); + +vi.mock("@/systems/workspace", () => ({ + WorkspacePageShell: ({ + children, + controls, + meta, + title, + }: { + children: React.ReactNode; + controls?: React.ReactNode; + meta?: React.ReactNode; + title: string; + }) => ( +
+
+

{title}

+ {controls ?
{controls}
: null} + {meta ?
{meta}
: null} +
+ {children} +
+ ), + useActiveWorkspace: () => ({ + workspaces: mockActiveWorkspaceId + ? [ + { + add_dirs: [], + created_at: "2026-04-03T12:00:00Z", + id: mockActiveWorkspaceId, + name: mockActiveWorkspaceName, + root_dir: "/workspace", + updated_at: "2026-04-03T12:00:00Z", + }, + ] + : [], + hasWorkspaces: Boolean(mockActiveWorkspaceId), + activeWorkspace: mockActiveWorkspaceId + ? { + add_dirs: [], + created_at: "2026-04-03T12:00:00Z", + id: mockActiveWorkspaceId, + name: mockActiveWorkspaceName, + root_dir: "/workspace", + updated_at: "2026-04-03T12:00:00Z", + } + : undefined, + activeWorkspaceId: mockActiveWorkspaceId, + clearActiveWorkspaceSelection: vi.fn(), + isError: false, + isLoading: false, + setActiveWorkspaceId: vi.fn(), + }), +})); + +vi.mock("@/systems/bridges", async () => { + const actual = await vi.importActual("@/systems/bridges"); + + return { + ...actual, + useBridges: () => ({ + data: mockBridgesData, + error: mockBridgesError, + isLoading: mockBridgesLoading, + }), + useBridgeProviders: () => ({ + data: mockProvidersData, + error: mockProvidersError, + isLoading: mockProvidersLoading, + }), + useBridge: () => ({ + data: mockBridgeDetail, + error: mockBridgeDetailError, + isLoading: mockBridgeDetailLoading, + }), + useBridgeRoutes: () => ({ + data: mockBridgeRoutes, + error: mockBridgeRoutesError, + isLoading: mockBridgeRoutesLoading, + }), + useCreateBridge: () => ({ + isPending: mockCreateBridgePending, + mutateAsync: mockCreateBridgeMutateAsync, + }), + useTestBridgeDelivery: () => ({ + isPending: mockTestDeliveryPending, + mutateAsync: mockTestDeliveryMutateAsync, + }), + }; +}); + +import { Route } from "./bridges"; + +function makeBridge(overrides: Partial = {}) { + return { + created_at: "2026-04-13T12:00:00Z", + display_name: "Support", + enabled: true, + extension_name: "ext-telegram", + id: "brg_support", + platform: "telegram", + routing_policy: { include_group: true, include_peer: true, include_thread: true }, + scope: "workspace" as const, + status: "ready" as const, + updated_at: "2026-04-13T12:30:00Z", + workspace_id: "ws_test", + ...overrides, + }; +} + +function makeHealth( + overrides: Partial[string]> = {} +) { + return { + auth_failures_total: 0, + bridge_instance_id: "brg_support", + delivery_backlog: 1, + delivery_dropped_total: 0, + delivery_failures_total: 0, + last_success_at: "2026-04-13T12:20:00Z", + route_count: 1, + status: "ready" as const, + ...overrides, + }; +} + +function makeProvider(overrides: Partial = {}): BridgeProvider { + return { + display_name: "Telegram", + enabled: true, + extension_name: "ext-telegram", + health: "healthy", + platform: "telegram", + state: "active", + ...overrides, + }; +} + +function makeRoute(overrides: Partial = {}): BridgeRoute { + return { + agent_name: "support-agent", + bridge_instance_id: "brg_support", + created_at: "2026-04-13T12:00:00Z", + last_activity_at: "2026-04-13T12:15:00Z", + peer_id: "peer_123", + routing_key_hash: "abc123", + scope: "workspace", + session_id: "sess_123", + updated_at: "2026-04-13T12:15:00Z", + workspace_id: "ws_test", + ...overrides, + }; +} + +const BridgesPage = (Route as unknown as { component: () => React.ReactNode }).component; + +describe("BridgesPage", () => { + beforeEach(() => { + vi.useRealTimers(); + mockBridgesData = { + bridge_health: { + brg_support: makeHealth(), + }, + bridges: [makeBridge()], + }; + mockBridgesLoading = false; + mockBridgesError = null; + mockProvidersData = [makeProvider()]; + mockProvidersLoading = false; + mockProvidersError = null; + mockBridgeDetail = { + bridge: makeBridge(), + health: makeHealth(), + }; + mockBridgeDetailLoading = false; + mockBridgeDetailError = null; + mockBridgeRoutes = [makeRoute()]; + mockBridgeRoutesLoading = false; + mockBridgeRoutesError = null; + mockCreateBridgePending = false; + mockTestDeliveryPending = false; + mockActiveWorkspaceId = "ws_test"; + mockActiveWorkspaceName = "test-workspace"; + + mockCreateBridgeMutateAsync.mockReset(); + mockTestDeliveryMutateAsync.mockReset(); + toast.success.mockReset(); + toast.error.mockReset(); + + mockCreateBridgeMutateAsync.mockResolvedValue({ + bridge: makeBridge({ id: "brg_created", status: "starting" }), + health: makeHealth({ bridge_instance_id: "brg_created", status: "starting" }), + } satisfies CreateBridgeResponse); + mockTestDeliveryMutateAsync.mockResolvedValue({ + delivery_target: { + bridge_instance_id: "brg_support", + mode: "reply", + peer_id: "peer_123", + }, + message: "Ping", + status: "resolved", + } satisfies TestBridgeDeliveryResponse); + }); + + it("renders loading and error states from the list queries", () => { + mockBridgesLoading = true; + mockProvidersLoading = true; + mockBridgesData = undefined; + mockProvidersData = undefined; + const { rerender } = render(); + + expect(screen.getByTestId("bridges-loading")).toBeInTheDocument(); + + mockBridgesLoading = false; + mockProvidersLoading = false; + mockBridgesData = undefined; + mockBridgesError = new Error("boom"); + rerender(); + + expect(screen.getByTestId("bridges-error")).toHaveTextContent("boom"); + }); + + it("renders the empty state with provider cards when no bridge exists yet", () => { + mockBridgesData = { + bridge_health: {}, + bridges: [], + }; + + render(); + + expect(screen.getByTestId("bridges-empty-state")).toBeInTheDocument(); + expect(screen.getByText("No bridges configured")).toBeInTheDocument(); + expect(screen.getByText("Telegram")).toBeInTheDocument(); + }); + + it("renders the selected bridge detail and route list", () => { + render(); + + const detailPanel = screen.getByTestId("bridge-detail-panel"); + + expect(screen.getByText("Bridges")).toBeInTheDocument(); + expect(screen.getByTestId("bridge-list-panel")).toBeInTheDocument(); + expect(screen.getByTestId("bridge-item-brg_support")).toBeInTheDocument(); + expect(within(detailPanel).getByText("Support")).toBeInTheDocument(); + expect(within(detailPanel).getByText("support-agent")).toBeInTheDocument(); + expect(screen.getByTestId("bridge-route-sess_123")).toBeInTheDocument(); + }); + + it("renders the no routes detail variant when the selected bridge has no routes", () => { + mockBridgeRoutes = []; + + render(); + + expect(screen.getByTestId("bridge-routes-empty")).toHaveTextContent("No routes"); + }); + + it("opens the create bridge dialog and submits a workspace-scoped payload", async () => { + mockBridgesData = { + bridge_health: {}, + bridges: [], + }; + + render(); + + fireEvent.click(screen.getByTestId("bridge-empty-create-btn")); + + expect(screen.getByTestId("bridge-create-dialog")).toBeInTheDocument(); + + fireEvent.click(screen.getByTestId("submit-bridge-create")); + + await waitFor(() => { + expect(mockCreateBridgeMutateAsync).toHaveBeenCalledWith({ + delivery_defaults: undefined, + display_name: "Telegram", + enabled: true, + extension_name: "ext-telegram", + platform: "telegram", + routing_policy: { include_group: true, include_peer: true, include_thread: true }, + scope: "workspace", + status: "starting", + workspace_id: "ws_test", + }); + expect(toast.success).toHaveBeenCalledWith("Created bridge Support."); + }); + }); + + it("blocks workspace-scoped bridge creation when the active workspace disappears", async () => { + const user = userEvent.setup(); + mockBridgesData = { + bridge_health: {}, + bridges: [], + }; + + const { rerender } = render(); + + await user.click(screen.getByTestId("bridge-empty-create-btn")); + + mockActiveWorkspaceId = null; + mockActiveWorkspaceName = ""; + rerender(); + + fireEvent.submit(screen.getByTestId("bridge-create-dialog")); + + expect(mockCreateBridgeMutateAsync).not.toHaveBeenCalled(); + expect(toast.error).toHaveBeenCalledWith( + "Select an active workspace before creating a workspace-scoped bridge." + ); + }); + + it("opens test delivery and shows the resolved target result", async () => { + const user = userEvent.setup(); + render(); + + await user.click(screen.getByTestId("open-test-delivery-btn")); + + expect(screen.getByTestId("bridge-test-delivery-dialog")).toBeInTheDocument(); + + await user.clear(screen.getByTestId("test-delivery-message")); + await user.type(screen.getByTestId("test-delivery-message"), "Ping"); + await user.click(screen.getByTestId("submit-test-delivery")); + + await waitFor(() => { + expect(mockTestDeliveryMutateAsync).toHaveBeenCalledWith({ + data: { + message: "Ping", + target: { + bridge_instance_id: "brg_support", + }, + }, + id: "brg_support", + }); + }); + + expect(screen.getByTestId("bridge-test-delivery-result")).toHaveTextContent("peer:peer_123"); + expect(toast.success).toHaveBeenCalledWith("Resolved delivery target for Support."); + }); +}); diff --git a/web/src/routes/_app/-network.test.tsx b/web/src/routes/_app/-network.test.tsx new file mode 100644 index 000000000..10cf1922b --- /dev/null +++ b/web/src/routes/_app/-network.test.tsx @@ -0,0 +1,573 @@ +import { fireEvent, render, screen, waitFor, within } from "@testing-library/react"; +import userEvent from "@testing-library/user-event"; +import type { ReactNode } from "react"; +import { beforeEach, describe, expect, it, vi } from "vitest"; + +import type { + CreateNetworkChannelResponse, + NetworkChannel, + NetworkChannelMessage, + NetworkChannelsResponse, + NetworkPeerDetail, + NetworkPeerSummary, + NetworkStatus, +} from "@/systems/network"; + +const { toast } = vi.hoisted(() => ({ + toast: { + error: vi.fn(), + success: vi.fn(), + }, +})); + +let mockActiveWorkspaceId: string | null = "ws_main"; +let mockActiveWorkspaceName = "Polybot"; + +let mockWorkspaceAgents = [ + { name: "polybot-main", prompt: "coordinate", provider: "anthropic" }, + { name: "coder-agent-01", prompt: "code", provider: "openai" }, +]; +let mockAgents = [ + ...mockWorkspaceAgents, + { name: "researcher-01", prompt: "research", provider: "openai" }, +]; + +let mockNetworkStatus: NetworkStatus | undefined; +let mockNetworkChannels: NetworkChannelsResponse | undefined; +let mockNetworkChannelsLoading = false; +let mockNetworkChannelsError: Error | null = null; +let mockChannelDetail: NetworkChannel | undefined; +let mockChannelDetailLoading = false; +let mockChannelDetailError: Error | null = null; +let mockChannelMessages: NetworkChannelMessage[] | undefined; +let mockChannelMessagesLoading = false; +let mockChannelMessagesError: Error | null = null; +let mockNetworkPeers: NetworkPeerSummary[] | undefined; +let mockNetworkPeersLoading = false; +let mockNetworkPeersError: Error | null = null; +let mockPeerDetail: NetworkPeerDetail | undefined; +let mockPeerDetailLoading = false; +let mockPeerDetailError: Error | null = null; +let lastNetworkChannelsOptions: { enabled?: boolean } | undefined; +let lastNetworkPeersArgs: + | [channel: string | undefined, options: { enabled?: boolean } | undefined] + | undefined; + +const mockCreateNetworkChannelMutateAsync = vi.fn(); +let mockCreateNetworkChannelPending = false; + +vi.mock("@tanstack/react-router", () => ({ + Link: ({ + children, + to, + params, + ...props + }: { + children: ReactNode; + params?: { id?: string }; + to: string; + [key: string]: unknown; + }) => { + const href = params?.id ? to.replace("$id", params.id) : to; + return ( + + {children} + + ); + }, + createFileRoute: () => (opts: { component: () => ReactNode }) => ({ + component: opts.component, + }), +})); + +vi.mock("sonner", () => ({ + toast, +})); + +vi.mock("@/systems/workspace", () => ({ + WorkspacePageShell: ({ + children, + controls, + meta, + title, + }: { + children: ReactNode; + controls?: ReactNode; + meta?: ReactNode; + title: string; + }) => ( +
+
+

{title}

+ {controls ?
{controls}
: null} + {meta ?
{meta}
: null} +
+ {children} +
+ ), + useActiveWorkspace: () => ({ + workspaces: mockActiveWorkspaceId + ? [ + { + add_dirs: [], + created_at: "2026-04-13T12:00:00Z", + id: mockActiveWorkspaceId, + name: mockActiveWorkspaceName, + root_dir: "/workspace/polybot", + updated_at: "2026-04-13T12:00:00Z", + }, + ] + : [], + hasWorkspaces: Boolean(mockActiveWorkspaceId), + activeWorkspace: mockActiveWorkspaceId + ? { + add_dirs: [], + created_at: "2026-04-13T12:00:00Z", + id: mockActiveWorkspaceId, + name: mockActiveWorkspaceName, + root_dir: "/workspace/polybot", + updated_at: "2026-04-13T12:00:00Z", + } + : undefined, + activeWorkspaceId: mockActiveWorkspaceId, + clearActiveWorkspaceSelection: vi.fn(), + isError: false, + isLoading: false, + setActiveWorkspaceId: vi.fn(), + }), + useWorkspace: () => ({ + data: mockActiveWorkspaceId + ? { + agents: mockWorkspaceAgents, + sessions: [], + skills: [], + workspace: { + add_dirs: [], + created_at: "2026-04-13T12:00:00Z", + id: mockActiveWorkspaceId, + name: mockActiveWorkspaceName, + root_dir: "/workspace/polybot", + updated_at: "2026-04-13T12:00:00Z", + }, + } + : undefined, + error: null, + isLoading: false, + }), +})); + +vi.mock("@/systems/agent", () => ({ + AgentIcon: ({ provider }: { provider: string }) => ( + + ), + useAgents: () => ({ + data: mockAgents, + error: null, + isLoading: false, + }), +})); + +vi.mock("@/systems/network", async () => { + const actual = await vi.importActual("@/systems/network"); + + return { + ...actual, + useNetworkStatus: () => ({ + data: mockNetworkStatus, + error: null, + isLoading: false, + }), + useNetworkChannels: (options?: { enabled?: boolean }) => { + lastNetworkChannelsOptions = options; + return { + data: mockNetworkChannels, + error: mockNetworkChannelsError, + isLoading: mockNetworkChannelsLoading, + }; + }, + useNetworkChannel: () => ({ + data: mockChannelDetail, + error: mockChannelDetailError, + isLoading: mockChannelDetailLoading, + }), + useNetworkChannelMessages: () => ({ + data: mockChannelMessages, + error: mockChannelMessagesError, + isLoading: mockChannelMessagesLoading, + }), + useNetworkPeers: (channel?: string, options?: { enabled?: boolean }) => { + lastNetworkPeersArgs = [channel, options]; + return { + data: mockNetworkPeers, + error: mockNetworkPeersError, + isLoading: mockNetworkPeersLoading, + }; + }, + useNetworkPeer: () => ({ + data: mockPeerDetail, + error: mockPeerDetailError, + isLoading: mockPeerDetailLoading, + }), + useCreateNetworkChannel: () => ({ + isPending: mockCreateNetworkChannelPending, + mutateAsync: mockCreateNetworkChannelMutateAsync, + }), + }; +}); + +import { Route } from "./network"; + +function makeChannelSummary( + overrides: Partial = {} +): NetworkChannelsResponse["channels"][number] { + return { + channel: "general", + last_message_at: "2026-04-13T10:45:00Z", + local_peer_count: 1, + message_count: 6, + peer_count: 3, + remote_peer_count: 2, + session_count: 1, + ...overrides, + }; +} + +function makeChannelMessage(overrides: Partial = {}): NetworkChannelMessage { + return { + channel: "general", + display_name: "polybot-main", + intent: "announce", + local: true, + message_id: "msg_1", + peer_id: "peer_local", + session_id: "sess_local", + text: "Dispatching task deploy-api-v2.3 to coder-agent-01", + timestamp: "2026-04-13T10:42:00Z", + ...overrides, + }; +} + +function makeChannel(overrides: Partial = {}): NetworkChannel { + return { + channel: "general", + last_message_at: "2026-04-13T10:45:00Z", + local_peer_count: 1, + message_count: 6, + peer_count: 3, + peers: [ + { + channel: "general", + display_name: "polybot-main", + joined_at: "2026-04-13T10:00:00Z", + last_seen: "2026-04-13T10:45:00Z", + local: true, + peer_card: { + artifacts_supported: [], + capabilities: [], + peer_id: "peer_local", + profiles_supported: [], + trust_modes_supported: [], + }, + peer_id: "peer_local", + session_id: "sess_local", + }, + ], + remote_peer_count: 2, + session_count: 1, + sessions: [ + { + agent_name: "polybot-main", + created_at: "2026-04-13T10:00:00Z", + id: "sess_local", + state: "active", + updated_at: "2026-04-13T10:45:00Z", + workspace_id: "ws_main", + }, + ], + ...overrides, + }; +} + +function makePeerSummary(overrides: Partial = {}): NetworkPeerSummary { + return { + channel: "general", + display_name: "polybot-main", + joined_at: "2026-04-13T10:00:00Z", + last_seen: "2026-04-13T10:45:00Z", + local: true, + peer_card: { + artifacts_supported: [], + capabilities: [], + peer_id: "peer_local", + profiles_supported: [], + trust_modes_supported: [], + }, + peer_id: "peer_local", + session_id: "sess_local", + ...overrides, + }; +} + +function makePeerDetail(overrides: Partial = {}): NetworkPeerDetail { + return { + channel: "general", + display_name: "polybot-main", + joined_at: "2026-04-13T10:00:00Z", + last_seen: "2026-04-13T10:47:00Z", + local: true, + metrics: { + delivered: 12, + received: 12, + rejected: 0, + sent: 14, + }, + peer_card: { + artifacts_supported: [], + capabilities: [], + peer_id: "peer_local", + profiles_supported: [], + trust_modes_supported: [], + }, + peer_id: "peer_local", + session_id: "sess_local", + ...overrides, + }; +} + +const NetworkPage = (Route as unknown as { component: () => ReactNode }).component; + +describe("NetworkPage", () => { + beforeEach(() => { + vi.useRealTimers(); + mockActiveWorkspaceId = "ws_main"; + mockActiveWorkspaceName = "Polybot"; + mockWorkspaceAgents = [ + { name: "polybot-main", prompt: "coordinate", provider: "anthropic" }, + { name: "coder-agent-01", prompt: "code", provider: "openai" }, + ]; + mockAgents = [ + ...mockWorkspaceAgents, + { name: "researcher-01", prompt: "research", provider: "openai" }, + ]; + + mockNetworkStatus = { + channels: 2, + delivery_workers: 4, + enabled: true, + local_peers: 1, + messages_sent: 42, + queued_messages: 3, + remote_peers: 2, + status: "active", + }; + mockNetworkChannels = { + channels: [ + makeChannelSummary(), + makeChannelSummary({ + channel: "deployments", + last_message_at: "2026-04-13T10:40:00Z", + peer_count: 2, + }), + ], + }; + mockNetworkChannelsLoading = false; + mockNetworkChannelsError = null; + mockChannelDetail = makeChannel(); + mockChannelDetailLoading = false; + mockChannelDetailError = null; + mockChannelMessages = [ + makeChannelMessage(), + makeChannelMessage({ + display_name: "coder-agent-01", + local: false, + message_id: "msg_2", + peer_id: "peer_coder", + session_id: undefined, + text: "Acknowledged. Starting deployment pipeline...", + timestamp: "2026-04-13T10:43:00Z", + }), + ]; + mockChannelMessagesLoading = false; + mockChannelMessagesError = null; + mockNetworkPeers = [ + makePeerSummary(), + makePeerSummary({ + display_name: "coder-agent-01", + local: false, + peer_id: "peer_coder", + session_id: undefined, + }), + ]; + mockNetworkPeersLoading = false; + mockNetworkPeersError = null; + mockPeerDetail = makePeerDetail(); + mockPeerDetailLoading = false; + mockPeerDetailError = null; + lastNetworkChannelsOptions = undefined; + lastNetworkPeersArgs = undefined; + + mockCreateNetworkChannelPending = false; + mockCreateNetworkChannelMutateAsync.mockReset(); + mockCreateNetworkChannelMutateAsync.mockResolvedValue({ + channel: makeChannel({ + channel: "deployments", + last_message_at: null, + message_count: 0, + }), + } satisfies CreateNetworkChannelResponse); + toast.error.mockReset(); + toast.success.mockReset(); + }); + + it("renders loading and error states from the active list query", () => { + mockNetworkChannelsLoading = true; + mockNetworkChannels = undefined; + const { rerender } = render(); + + expect(screen.getByTestId("workspace-page-shell")).toBeInTheDocument(); + expect(screen.getByTestId("network-channels-list-loading")).toBeInTheDocument(); + expect(screen.getByTestId("network-channel-loading")).toBeInTheDocument(); + + mockNetworkChannelsLoading = false; + mockNetworkChannels = undefined; + mockNetworkChannelsError = new Error("network down"); + rerender(); + + expect(screen.getByTestId("network-channels-list-error")).toHaveTextContent("network down"); + expect(screen.getByTestId("network-channel-error")).toHaveTextContent("network down"); + }); + + it("renders peer loading and error states inside the panel instead of replacing the page", async () => { + const user = userEvent.setup(); + mockNetworkPeersLoading = true; + mockNetworkPeers = undefined; + const { rerender } = render(); + + await user.click(screen.getByTestId("network-tab-peers")); + + expect(screen.getByTestId("workspace-page-shell")).toBeInTheDocument(); + expect(screen.getByTestId("network-peers-list-loading")).toBeInTheDocument(); + expect(screen.getByTestId("network-peer-loading")).toBeInTheDocument(); + + mockNetworkPeersLoading = false; + mockNetworkPeers = undefined; + mockNetworkPeersError = new Error("peer discovery failed"); + rerender(); + + expect(screen.getByTestId("network-peers-list-error")).toHaveTextContent( + "peer discovery failed" + ); + expect(screen.getByTestId("network-peer-error")).toHaveTextContent("peer discovery failed"); + }); + + it("stops querying runtime collections and renders a disabled state when the network is off", async () => { + const user = userEvent.setup(); + mockNetworkStatus = { + channels: 0, + delivery_workers: 0, + enabled: false, + local_peers: 0, + messages_sent: 0, + queued_messages: 0, + remote_peers: 0, + status: "disabled", + }; + mockNetworkChannels = undefined; + mockNetworkChannelsError = new Error("Service Unavailable"); + mockNetworkPeers = undefined; + mockNetworkPeersError = new Error("Service Unavailable"); + + render(); + + expect(screen.getByTestId("network-disabled-state")).toHaveTextContent("Network disabled"); + expect(screen.queryByTestId("open-network-create-dialog")).not.toBeInTheDocument(); + expect(lastNetworkChannelsOptions).toEqual({ enabled: false }); + expect(lastNetworkPeersArgs).toEqual([undefined, { enabled: false }]); + + await user.click(screen.getByTestId("network-tab-peers")); + + expect(screen.getByTestId("network-disabled-state")).toHaveTextContent("Network disabled"); + expect(screen.queryByTestId("network-channels-list-error")).not.toBeInTheDocument(); + expect(screen.queryByTestId("network-peers-list-error")).not.toBeInTheDocument(); + }); + + it("renders the channels view with metrics and the read-only timeline", () => { + render(); + + expect(screen.getByText("Network")).toBeInTheDocument(); + expect(screen.getByTestId("network-tab-channels")).toHaveAttribute("aria-pressed", "true"); + expect(screen.getByText("Total Peers")).toBeInTheDocument(); + expect(screen.getByTestId("network-channel-item-general")).toBeInTheDocument(); + expect(screen.getByTestId("network-channel-detail-panel")).toBeInTheDocument(); + expect( + screen.getByText("This channel is read-only. Use the CLI to send messages.") + ).toBeInTheDocument(); + expect(screen.getByTestId("network-channel-message-msg_1")).toHaveTextContent( + "Dispatching task deploy-api-v2.3 to coder-agent-01" + ); + expect(screen.getByText("View Session")).toHaveAttribute("href", "/session/sess_local"); + }); + + it("switches to peers and renders identity and metrics for the selected peer", async () => { + const user = userEvent.setup(); + render(); + + await user.click(screen.getByTestId("network-tab-peers")); + + expect(screen.getByTestId("network-tab-peers")).toHaveAttribute("aria-pressed", "true"); + expect(screen.getByTestId("network-peers-list-panel")).toBeInTheDocument(); + const detailPanel = screen.getByTestId("network-peer-detail-panel"); + expect(detailPanel).toBeInTheDocument(); + expect(within(detailPanel).getByText("Peer Identity")).toBeInTheDocument(); + expect(within(detailPanel).getByText("Message Statistics")).toBeInTheDocument(); + expect(within(detailPanel).getAllByText("polybot-main").length).toBeGreaterThan(0); + expect(within(detailPanel).getByText("general")).toBeInTheDocument(); + }); + + it("opens the create dialog and submits the selected agents", async () => { + render(); + + fireEvent.click(screen.getByTestId("open-network-create-dialog")); + expect(screen.getByTestId("network-create-channel-dialog")).toBeInTheDocument(); + expect(screen.queryByTestId("network-agent-option-researcher-01")).not.toBeInTheDocument(); + + const channelNameInput = screen.getByTestId("network-channel-name-input"); + const firstAgent = screen.getByTestId("network-agent-option-polybot-main"); + const secondAgent = screen.getByTestId("network-agent-option-coder-agent-01"); + + fireEvent.change(channelNameInput, { target: { value: "deployments" } }); + fireEvent.click(firstAgent); + fireEvent.click(secondAgent); + + expect(firstAgent).toHaveAttribute("aria-pressed", "true"); + expect(secondAgent).toHaveAttribute("aria-pressed", "true"); + + fireEvent.click(screen.getByTestId("network-create-channel-submit")); + + await waitFor(() => + expect(mockCreateNetworkChannelMutateAsync).toHaveBeenCalledWith({ + agent_names: ["polybot-main", "coder-agent-01"], + channel: "deployments", + workspace_id: "ws_main", + }) + ); + expect(mockCreateNetworkChannelMutateAsync).toHaveBeenCalledOnce(); + expect(toast.success).toHaveBeenCalledWith("Created channel deployments."); + }); + + it("renders truthful empty states for channels and peers", async () => { + const user = userEvent.setup(); + mockNetworkChannels = { channels: [] }; + mockNetworkPeers = []; + + render(); + + expect(screen.getByTestId("network-channels-list-empty")).toBeInTheDocument(); + expect(screen.getByTestId("network-channels-empty-state")).toHaveTextContent("No channels yet"); + + await user.click(screen.getByTestId("network-tab-peers")); + + expect(screen.getByTestId("network-peers-list-empty")).toBeInTheDocument(); + expect(screen.getByTestId("network-peers-empty-state")).toHaveTextContent("No peers connected"); + }); +}); diff --git a/web/src/routes/_app/automation.tsx b/web/src/routes/_app/automation.tsx index 90c5296df..27fce4e52 100644 --- a/web/src/routes/_app/automation.tsx +++ b/web/src/routes/_app/automation.tsx @@ -1,15 +1,23 @@ -import { AlertCircle, Bot, Loader2 } from "lucide-react"; -import { startTransition, useMemo, useState } from "react"; +import { AlertCircle, Loader2, Plus, Zap } from "lucide-react"; +import { startTransition, useDeferredValue, useMemo, useState } from "react"; import { createFileRoute } from "@tanstack/react-router"; +import { toast } from "sonner"; import { PillButton } from "@/components/design-system"; +import { Button } from "@/components/ui/button"; import { AutomationDetailPanel, + AutomationEditorDialog, AutomationListPanel, automationJobToDraft, automationTriggerToDraft, createAutomationJobDraft, createAutomationTriggerDraft, + filterAutomationJobs, + filterAutomationTriggers, + normalizeAutomationRetry, + sortAutomationJobs, + sortAutomationTriggers, useAutomationJob, useAutomationJobs, useAutomationJobRuns, @@ -26,14 +34,13 @@ import { } from "@/systems/automation"; import type { AutomationJob, - AutomationTrigger, AutomationRun, AutomationScopeFilter, + AutomationTrigger, CreateAutomationJobRequest, CreateAutomationTriggerRequest, } from "@/systems/automation"; import { useActiveWorkspace } from "@/systems/workspace"; -import { WorkspacePageShell } from "@/systems/workspace/components/workspace-page-shell"; export const Route = createFileRoute("/_app/automation")({ component: AutomationPage, @@ -45,13 +52,63 @@ type AutomationEditorState = | { draft: CreateAutomationJobRequest; kind: "jobs"; - mode: "create" | "edit"; + mode: "create"; } | { draft: CreateAutomationTriggerRequest; kind: "triggers"; - mode: "create" | "edit"; + mode: "create"; + } + | { + draft: CreateAutomationJobRequest; + id: string; + kind: "jobs"; + mode: "edit"; + } + | { + draft: CreateAutomationTriggerRequest; + id: string; + kind: "triggers"; + mode: "edit"; + }; + +function buildEmptyState({ + activeTab, + hasQuery, + onCreate, +}: { + activeTab: AutomationTab; + hasQuery: boolean; + onCreate: () => void; +}) { + if (hasQuery) { + return { + description: "Try a different search term or adjust the current scope filter.", + icon: "search" as const, + title: activeTab === "jobs" ? "No jobs found" : "No triggers found", + }; + } + + if (activeTab === "jobs") { + return { + actionLabel: "Create Job", + description: + "Scheduled jobs dispatch prompts to agents on a time-based cadence. Create your first job to start automating.", + icon: "jobs" as const, + onAction: onCreate, + title: "No jobs configured", }; + } + + return { + actionLabel: "Create Trigger", + description: + "Event-driven triggers react to daemon events, webhooks, and extension signals. Create your first trigger to enable reactive automation.", + icon: "triggers" as const, + onAction: onCreate, + title: "No triggers configured", + }; +} function AutomationPage() { const { activeWorkspace, activeWorkspaceId } = useActiveWorkspace(); @@ -62,17 +119,17 @@ function AutomationPage() { const [selectedTriggerId, setSelectedTriggerId] = useState(null); const [searchQuery, setSearchQuery] = useState(""); const [editor, setEditor] = useState(null); - const [actionMessage, setActionMessage] = useState(null); - const [actionError, setActionError] = useState(null); const [queuedRun, setQueuedRun] = useState<{ jobId: string; run: AutomationRun } | null>(null); + const deferredSearchQuery = useDeferredValue(searchQuery); const scopedWorkspaceId = scopeFilter === "workspace" ? (activeWorkspaceId ?? undefined) : undefined; + const listFilters = useMemo( () => ({ + limit: 50, scope: scopeFilter === "all" ? undefined : scopeFilter, workspace_id: scopedWorkspaceId, - limit: 50, }), [scopeFilter, scopedWorkspaceId] ); @@ -82,56 +139,72 @@ function AutomationPage() { const jobs = jobsQuery.data ?? []; const triggers = triggersQuery.data ?? []; - const currentList = activeTab === "jobs" ? jobs : triggers; + + const visibleJobs = useMemo( + () => sortAutomationJobs(filterAutomationJobs(jobs, deferredSearchQuery)), + [deferredSearchQuery, jobs] + ); + const visibleTriggers = useMemo( + () => sortAutomationTriggers(filterAutomationTriggers(triggers, deferredSearchQuery)), + [deferredSearchQuery, triggers] + ); + + const currentList = activeTab === "jobs" ? visibleJobs : visibleTriggers; + const currentTotalCount = activeTab === "jobs" ? jobs.length : triggers.length; const currentListLoading = activeTab === "jobs" ? jobsQuery.isLoading : triggersQuery.isLoading; const currentListError = activeTab === "jobs" ? jobsQuery.error : triggersQuery.error; const effectiveSelectedJobId = useMemo(() => { - if (selectedJobId && jobs.some(job => job.id === selectedJobId)) { + if (selectedJobId && visibleJobs.some(job => job.id === selectedJobId)) { return selectedJobId; } - return jobs[0]?.id ?? null; - }, [jobs, selectedJobId]); + + return visibleJobs[0]?.id ?? null; + }, [selectedJobId, visibleJobs]); const effectiveSelectedTriggerId = useMemo(() => { - if (selectedTriggerId && triggers.some(trigger => trigger.id === selectedTriggerId)) { + if (selectedTriggerId && visibleTriggers.some(trigger => trigger.id === selectedTriggerId)) { return selectedTriggerId; } - return triggers[0]?.id ?? null; - }, [selectedTriggerId, triggers]); + + return visibleTriggers[0]?.id ?? null; + }, [selectedTriggerId, visibleTriggers]); const jobDetailQuery = useAutomationJob(effectiveSelectedJobId ?? "", { - enabled: activeTab === "jobs" && editor === null && !!effectiveSelectedJobId, + enabled: activeTab === "jobs" && !!effectiveSelectedJobId, }); const triggerDetailQuery = useAutomationTrigger(effectiveSelectedTriggerId ?? "", { - enabled: activeTab === "triggers" && editor === null && !!effectiveSelectedTriggerId, + enabled: activeTab === "triggers" && !!effectiveSelectedTriggerId, }); const jobRunsQuery = useAutomationJobRuns( effectiveSelectedJobId ?? "", { limit: 10 }, - { enabled: activeTab === "jobs" && editor === null && !!effectiveSelectedJobId } + { enabled: activeTab === "jobs" && !!effectiveSelectedJobId } ); const triggerRunsQuery = useAutomationTriggerRuns( effectiveSelectedTriggerId ?? "", { limit: 10 }, - { enabled: activeTab === "triggers" && editor === null && !!effectiveSelectedTriggerId } + { enabled: activeTab === "triggers" && !!effectiveSelectedTriggerId } ); const createJobMutation = useCreateAutomationJob(); const updateJobMutation = useUpdateAutomationJob(); const deleteJobMutation = useDeleteAutomationJob(); const triggerJobMutation = useTriggerAutomationJob(); - const createTriggerMutation = useCreateAutomationTrigger(); const updateTriggerMutation = useUpdateAutomationTrigger(); const deleteTriggerMutation = useDeleteAutomationTrigger(); const selectedItem = activeTab === "jobs" - ? (jobDetailQuery.data ?? jobs.find(job => job.id === effectiveSelectedJobId)) + ? (jobDetailQuery.data ?? + visibleJobs.find(job => job.id === effectiveSelectedJobId) ?? + jobs.find(job => job.id === effectiveSelectedJobId)) : (triggerDetailQuery.data ?? + visibleTriggers.find(trigger => trigger.id === effectiveSelectedTriggerId) ?? triggers.find(trigger => trigger.id === effectiveSelectedTriggerId)); + const selectedJob = activeTab === "jobs" ? (selectedItem as AutomationJob | undefined) : undefined; const selectedTrigger = @@ -161,8 +234,8 @@ function AutomationPage() { startTransition(() => { setActiveTab(nextTab); setEditor(null); - setActionMessage(null); - setActionError(null); + setSearchQuery(""); + setQueuedRun(null); }); }; @@ -172,29 +245,22 @@ function AutomationPage() { setEditor(null); setSelectedJobId(null); setSelectedTriggerId(null); - setActionMessage(null); - setActionError(null); + setQueuedRun(null); }); }; - const clearFeedback = () => { - setActionMessage(null); - setActionError(null); - }; - const handleCreate = () => { - clearFeedback(); setEditor( activeTab === "jobs" ? { + draft: createAutomationJobDraft(activeWorkspaceId), kind: "jobs", mode: "create", - draft: createAutomationJobDraft(activeWorkspaceId), } : { + draft: createAutomationTriggerDraft(activeWorkspaceId), kind: "triggers", mode: "create", - draft: createAutomationTriggerDraft(activeWorkspaceId), } ); }; @@ -204,19 +270,20 @@ function AutomationPage() { return; } - clearFeedback(); setEditor( activeTab === "jobs" && selectedJob ? { + draft: automationJobToDraft(selectedJob), + id: selectedJob.id, kind: "jobs", mode: "edit", - draft: automationJobToDraft(selectedJob), } : selectedTrigger ? { + draft: automationTriggerToDraft(selectedTrigger), + id: selectedTrigger.id, kind: "triggers", mode: "edit", - draft: automationTriggerToDraft(selectedTrigger), } : null ); @@ -227,24 +294,26 @@ function AutomationPage() { return; } - clearFeedback(); - try { + const payload = { + ...editor.draft, + retry: normalizeAutomationRetry(editor.draft.retry ?? undefined), + }; const job = editor.mode === "create" - ? await createJobMutation.mutateAsync(editor.draft) + ? await createJobMutation.mutateAsync(payload) : await updateJobMutation.mutateAsync({ - id: effectiveSelectedJobId ?? "", - data: editor.draft, + data: payload, + id: editor.id, }); setSelectedJobId(job.id); setEditor(null); - setActionMessage( + toast.success( editor.mode === "create" ? `Created job ${job.name}.` : `Updated job ${job.name}.` ); } catch (error) { - setActionError(error instanceof Error ? error.message : "Failed to save automation job"); + toast.error(error instanceof Error ? error.message : "Failed to save automation job"); } }; @@ -253,26 +322,28 @@ function AutomationPage() { return; } - clearFeedback(); - try { + const payload = { + ...editor.draft, + retry: normalizeAutomationRetry(editor.draft.retry ?? undefined), + }; const trigger = editor.mode === "create" - ? await createTriggerMutation.mutateAsync(editor.draft) + ? await createTriggerMutation.mutateAsync(payload) : await updateTriggerMutation.mutateAsync({ - id: effectiveSelectedTriggerId ?? "", - data: editor.draft, + data: payload, + id: editor.id, }); setSelectedTriggerId(trigger.id); setEditor(null); - setActionMessage( + toast.success( editor.mode === "create" ? `Created trigger ${trigger.name}.` : `Updated trigger ${trigger.name}.` ); } catch (error) { - setActionError(error instanceof Error ? error.message : "Failed to save automation trigger"); + toast.error(error instanceof Error ? error.message : "Failed to save automation trigger"); } }; @@ -281,8 +352,6 @@ function AutomationPage() { return; } - clearFeedback(); - try { if (activeTab === "jobs") { await deleteJobMutation.mutateAsync({ id: selectedItem.id }); @@ -293,9 +362,9 @@ function AutomationPage() { setSelectedTriggerId(null); } - setActionMessage(`Deleted ${selectedItem.name}.`); + toast.success(`Deleted ${selectedItem.name}.`); } catch (error) { - setActionError(error instanceof Error ? error.message : "Failed to delete automation"); + toast.error(error instanceof Error ? error.message : "Failed to delete automation"); } }; @@ -304,24 +373,22 @@ function AutomationPage() { return; } - clearFeedback(); - try { if (activeTab === "jobs") { await updateJobMutation.mutateAsync({ - id: selectedItem.id, data: { enabled }, + id: selectedItem.id, }); } else { await updateTriggerMutation.mutateAsync({ - id: selectedItem.id, data: { enabled }, + id: selectedItem.id, }); } - setActionMessage(`${enabled ? "Enabled" : "Disabled"} ${selectedItem.name}.`); + toast.success(`${enabled ? "Enabled" : "Disabled"} ${selectedItem.name}.`); } catch (error) { - setActionError(error instanceof Error ? error.message : "Failed to update automation state"); + toast.error(error instanceof Error ? error.message : "Failed to update automation state"); } }; @@ -330,18 +397,16 @@ function AutomationPage() { return; } - clearFeedback(); - try { const run = await triggerJobMutation.mutateAsync({ id: selectedItem.id }); setQueuedRun({ jobId: selectedItem.id, run }); - setActionMessage(`Queued run ${run.id}.`); + toast.success(`Queued run ${run.id}.`); } catch (error) { - setActionError(error instanceof Error ? error.message : "Failed to trigger automation job"); + toast.error(error instanceof Error ? error.message : "Failed to trigger automation job"); } }; - if (currentListLoading && currentList.length === 0) { + if (currentListLoading && currentTotalCount === 0) { return (
@@ -349,7 +414,7 @@ function AutomationPage() { ); } - if (currentListError && currentList.length === 0) { + if (currentListError && currentTotalCount === 0) { return (
@@ -362,10 +427,29 @@ function AutomationPage() { ); } + const hasVisibleSearchQuery = deferredSearchQuery.trim() !== ""; + const emptyState = + currentList.length === 0 + ? buildEmptyState({ + activeTab, + hasQuery: hasVisibleSearchQuery, + onCreate: handleCreate, + }) + : null; + return ( - +
+
+ +

+ Automation +

+ + {currentTotalCount} + +
+
+
{(["all", "global", "workspace"] as const).map(scope => ( handleScopeChange(scope)} > {scope.toUpperCase()} @@ -396,47 +481,70 @@ function AutomationPage() { ))}
- } - icon={} - meta={ -
- - {scopeFilter === "workspace" && activeWorkspace - ? `Workspace ${activeWorkspace.name}` - : "Unified jobs and triggers"} - - {actionMessage ? ( - {actionMessage} - ) : null} - {actionError ? ( - {actionError} - ) : null} + +
+
- } - title="Automation" - > - - startTransition(() => { - if (activeTab === "jobs") { - setSelectedJobId(id); - setQueuedRun(null); - } else { - setSelectedTriggerId(id); - } - clearFeedback(); - }) - } - scopeFilter={scopeFilter} - searchQuery={searchQuery} - selectedId={activeTab === "jobs" ? effectiveSelectedJobId : effectiveSelectedTriggerId} - triggers={triggers} - /> - + +
+ + startTransition(() => { + if (activeTab === "jobs") { + setSelectedJobId(id); + setQueuedRun(null); + } else { + setSelectedTriggerId(id); + } + }) + } + scopeFilter={scopeFilter} + searchQuery={searchQuery} + selectedId={activeTab === "jobs" ? effectiveSelectedJobId : effectiveSelectedTriggerId} + totalCount={currentTotalCount} + triggers={visibleTriggers} + /> + { + void handleDelete(); + }} + onEdit={handleEdit} + onToggleEnabled={enabled => { + void handleToggleEnabled(enabled); + }} + onTriggerNow={() => { + void handleTriggerNow(); + }} + runs={displayedRuns} + runsError={runsError} + runsLoading={runsLoading} + /> +
+ + { - void handleDelete(); - }} - onEdit={handleEdit} - onToggleEnabled={enabled => { - void handleToggleEnabled(enabled); - }} - onTriggerNow={() => { - void handleTriggerNow(); - }} - runs={displayedRuns} - runsError={runsError} - runsLoading={runsLoading} /> - +
); } diff --git a/web/src/routes/_app/bridges.tsx b/web/src/routes/_app/bridges.tsx new file mode 100644 index 000000000..7016e8dd0 --- /dev/null +++ b/web/src/routes/_app/bridges.tsx @@ -0,0 +1,384 @@ +import { AlertCircle, Loader2, Plus, Waypoints } from "lucide-react"; +import { startTransition, useDeferredValue, useMemo, useState } from "react"; +import { createFileRoute } from "@tanstack/react-router"; +import { toast } from "sonner"; + +import { PillButton } from "@/components/design-system"; +import { Button } from "@/components/ui/button"; +import { + BridgeCreateDialog, + BridgeDetailPanel, + BridgeEmptyState, + BridgeListPanel, + BridgeTestDeliveryDialog, + compactBridgeDeliveryDefaults, + createBridgeCreateDraft, + createBridgeTestDeliveryDraft, + findBridgeProviderByKey, + isBridgeProviderSelectable, + useBridge, + useBridgeProviders, + useBridgeRoutes, + useBridges, + useCreateBridge, + useTestBridgeDelivery, +} from "@/systems/bridges"; +import type { + BridgeCreateDraft, + BridgeScopeFilter, + BridgeSummary, + BridgeTestDeliveryDraft, + TestBridgeDeliveryResponse, +} from "@/systems/bridges"; +import { useActiveWorkspace, WorkspacePageShell } from "@/systems/workspace"; + +export const Route = createFileRoute("/_app/bridges")({ + component: BridgesPage, +}); + +function matchesBridgeScope( + bridge: BridgeSummary, + activeScope: BridgeScopeFilter, + activeWorkspaceId: string | null +) { + if (activeScope === "all") { + return true; + } + + if (activeScope === "global") { + return bridge.scope === "global"; + } + + return bridge.scope === "workspace" && bridge.workspace_id === activeWorkspaceId; +} + +function matchesBridgeSearch(bridge: BridgeSummary, searchQuery: string) { + if (!searchQuery) { + return true; + } + + const query = searchQuery.toLowerCase(); + return ( + bridge.display_name.toLowerCase().includes(query) || + bridge.platform.toLowerCase().includes(query) || + bridge.extension_name.toLowerCase().includes(query) || + bridge.status.toLowerCase().includes(query) + ); +} + +function sortBridges(bridges: BridgeSummary[]) { + return [...bridges].sort((left, right) => { + if (left.scope !== right.scope) { + return left.scope === "global" ? -1 : 1; + } + + return left.display_name.localeCompare(right.display_name); + }); +} + +function BridgesPage() { + const { activeWorkspace, activeWorkspaceId } = useActiveWorkspace(); + + const [activeScope, setActiveScope] = useState("all"); + const [searchQuery, setSearchQuery] = useState(""); + const [selectedBridgeId, setSelectedBridgeId] = useState(null); + const [isCreateDialogOpen, setCreateDialogOpen] = useState(false); + const [isTestDeliveryDialogOpen, setTestDeliveryDialogOpen] = useState(false); + const [createDraft, setCreateDraft] = useState(() => + createBridgeCreateDraft([], activeWorkspaceId) + ); + const [testDeliveryDraft, setTestDeliveryDraft] = useState(() => + createBridgeTestDeliveryDraft() + ); + const [testDeliveryResult, setTestDeliveryResult] = useState( + null + ); + + const deferredSearchQuery = useDeferredValue(searchQuery); + + const bridgesQuery = useBridges(); + const providersQuery = useBridgeProviders(); + const createBridgeMutation = useCreateBridge(); + const testDeliveryMutation = useTestBridgeDelivery(); + + const bridges = bridgesQuery.data?.bridges ?? []; + const bridgeHealth = bridgesQuery.data?.bridge_health ?? {}; + const providers = providersQuery.data ?? []; + const totalBridgeCount = bridges.length; + const canCreateBridge = providers.some(isBridgeProviderSelectable); + + const visibleBridges = useMemo( + () => + sortBridges( + bridges.filter( + bridge => + matchesBridgeScope(bridge, activeScope, activeWorkspaceId) && + matchesBridgeSearch(bridge, deferredSearchQuery) + ) + ), + [activeScope, activeWorkspaceId, bridges, deferredSearchQuery] + ); + + const effectiveSelectedBridgeId = useMemo(() => { + if (selectedBridgeId && visibleBridges.some(bridge => bridge.id === selectedBridgeId)) { + return selectedBridgeId; + } + + return visibleBridges[0]?.id ?? null; + }, [selectedBridgeId, visibleBridges]); + + const selectedBridgeSummary = useMemo( + () => bridges.find(bridge => bridge.id === effectiveSelectedBridgeId), + [bridges, effectiveSelectedBridgeId] + ); + + const bridgeDetailQuery = useBridge(effectiveSelectedBridgeId ?? "", { + enabled: Boolean(effectiveSelectedBridgeId), + }); + const bridgeRoutesQuery = useBridgeRoutes(effectiveSelectedBridgeId ?? "", { + enabled: Boolean(effectiveSelectedBridgeId), + }); + + const selectedBridge = bridgeDetailQuery.data?.bridge ?? selectedBridgeSummary; + const selectedHealth = + bridgeDetailQuery.data?.health ?? + (effectiveSelectedBridgeId ? bridgeHealth[effectiveSelectedBridgeId] : undefined); + + const isInitialLoading = + (bridgesQuery.isLoading && !bridgesQuery.data) || + (providersQuery.isLoading && !providersQuery.data); + const fatalError = + (!bridgesQuery.data && bridgesQuery.error) || (!providersQuery.data && providersQuery.error); + const detailError = bridgeDetailQuery.error ?? bridgeRoutesQuery.error ?? null; + const detailLoading = + Boolean(effectiveSelectedBridgeId) && + bridgeDetailQuery.isLoading && + !bridgeDetailQuery.data && + !selectedBridgeSummary; + + const listSummary = useMemo(() => { + if (activeScope === "workspace") { + if (!activeWorkspace) { + return "No active workspace selected."; + } + + return `${visibleBridges.length} bridges in ${activeWorkspace.name}`; + } + + if (activeScope === "global") { + return `${visibleBridges.length} global bridges`; + } + + return `${visibleBridges.length} bridges visible`; + }, [activeScope, activeWorkspace, visibleBridges.length]); + + const openCreateDialog = () => { + setCreateDraft(createBridgeCreateDraft(providers, activeWorkspaceId)); + setCreateDialogOpen(true); + }; + + const handleCreateDialogOpenChange = (open: boolean) => { + setCreateDialogOpen(open); + }; + + const openTestDeliveryDialog = () => { + setTestDeliveryDraft(createBridgeTestDeliveryDraft(selectedBridge)); + setTestDeliveryResult(null); + setTestDeliveryDialogOpen(true); + }; + + const handleTestDeliveryDialogOpenChange = (open: boolean) => { + setTestDeliveryDialogOpen(open); + if (!open) { + setTestDeliveryResult(null); + } + }; + + const handleCreateBridge = async () => { + const provider = findBridgeProviderByKey(providers, createDraft.selectedProviderKey); + if (!provider || !isBridgeProviderSelectable(provider)) { + toast.error("Select an available bridge provider before creating the bridge."); + return; + } + if (createDraft.scope === "workspace" && !activeWorkspaceId) { + toast.error("Select an active workspace before creating a workspace-scoped bridge."); + return; + } + + const scope = createDraft.scope; + + try { + const result = await createBridgeMutation.mutateAsync({ + delivery_defaults: compactBridgeDeliveryDefaults(createDraft.deliveryDefaults), + display_name: createDraft.displayName.trim(), + enabled: true, + extension_name: provider.extension_name, + platform: provider.platform, + routing_policy: createDraft.routingPolicy, + scope, + status: "starting", + workspace_id: scope === "workspace" ? (activeWorkspaceId ?? undefined) : undefined, + }); + + startTransition(() => { + setActiveScope(result.bridge.scope); + setSearchQuery(""); + setSelectedBridgeId(result.bridge.id); + }); + setCreateDialogOpen(false); + toast.success(`Created bridge ${result.bridge.display_name}.`); + } catch (error) { + toast.error(error instanceof Error ? error.message : "Failed to create bridge"); + } + }; + + const handleTestDelivery = async () => { + if (!selectedBridge) { + return; + } + + try { + const result = await testDeliveryMutation.mutateAsync({ + id: selectedBridge.id, + data: { + message: createOptionalMessage(testDeliveryDraft.message), + target: { + bridge_instance_id: selectedBridge.id, + ...compactBridgeDeliveryDefaults(testDeliveryDraft.target), + }, + }, + }); + + setTestDeliveryResult(result); + toast.success(`Resolved delivery target for ${selectedBridge.display_name}.`); + } catch (error) { + toast.error(error instanceof Error ? error.message : "Failed to resolve bridge target"); + } + }; + + if (isInitialLoading) { + return ( +
+ +
+ ); + } + + if (fatalError) { + return ( +
+
+ +

+ {fatalError.message ?? "Failed to load bridges"} +

+
+
+ ); + } + + return ( + <> + + {(["all", "global", "workspace"] as const).map(scope => ( + + startTransition(() => { + setActiveScope(scope); + setSelectedBridgeId(null); + }) + } + > + {scope.toUpperCase()} + + ))} +
+ } + icon={} + meta={ + + } + title="Bridges" + > + {totalBridgeCount === 0 ? ( + + ) : ( + <> + + + + )} + + + + + + + ); +} + +function createOptionalMessage(value: string): string | undefined { + const normalized = value.trim(); + return normalized === "" ? undefined : normalized; +} diff --git a/web/src/routes/_app/network.tsx b/web/src/routes/_app/network.tsx new file mode 100644 index 000000000..5a0eb01d0 --- /dev/null +++ b/web/src/routes/_app/network.tsx @@ -0,0 +1,349 @@ +import { Hash, Network as NetworkIcon, Plus, Users } from "lucide-react"; +import { startTransition, useDeferredValue, useMemo, useState } from "react"; +import { createFileRoute } from "@tanstack/react-router"; +import { toast } from "sonner"; + +import { MetricStrip, PillButton } from "@/components/design-system"; +import { Button } from "@/components/ui/button"; +import { + createNetworkChannelDraft, + getNetworkMetricCards, + matchesChannelSearch, + matchesPeerSearch, + NetworkChannelDetailPanel, + NetworkChannelsListPanel, + NetworkCreateChannelDialog, + NetworkEmptyState, + NetworkPeerDetailPanel, + NetworkPeersListPanel, + sortAgentsForNetwork, + sortNetworkChannels, + sortNetworkPeers, + toggleDraftAgent, + useCreateNetworkChannel, + useNetworkChannel, + useNetworkChannelMessages, + useNetworkChannels, + useNetworkPeer, + useNetworkPeers, + useNetworkStatus, +} from "@/systems/network"; +import type { NetworkTab } from "@/systems/network"; +import { useActiveWorkspace, useWorkspace, WorkspacePageShell } from "@/systems/workspace"; + +export const Route = createFileRoute("/_app/network")({ + component: NetworkPage, +}); + +function NetworkPage() { + const { activeWorkspace, activeWorkspaceId } = useActiveWorkspace(); + + const [activeTab, setActiveTab] = useState("channels"); + const [channelSearchQuery, setChannelSearchQuery] = useState(""); + const [peerSearchQuery, setPeerSearchQuery] = useState(""); + const [selectedChannel, setSelectedChannel] = useState(null); + const [selectedPeerId, setSelectedPeerId] = useState(null); + const [isCreateDialogOpen, setCreateDialogOpen] = useState(false); + const [createDraft, setCreateDraft] = useState(createNetworkChannelDraft); + + const deferredChannelSearch = useDeferredValue(channelSearchQuery); + const deferredPeerSearch = useDeferredValue(peerSearchQuery); + + const networkStatusQuery = useNetworkStatus(); + const isNetworkEnabled = networkStatusQuery.data?.enabled === true; + const isNetworkDisabled = networkStatusQuery.data?.enabled === false; + const isNetworkStatusLoading = networkStatusQuery.isLoading && !networkStatusQuery.data; + const networkStatusError = !networkStatusQuery.data ? networkStatusQuery.error : null; + + const networkChannelsQuery = useNetworkChannels({ enabled: isNetworkEnabled }); + const networkPeersQuery = useNetworkPeers(undefined, { enabled: isNetworkEnabled }); + const createChannelMutation = useCreateNetworkChannel(); + const workspaceDetailQuery = useWorkspace(activeWorkspaceId ?? "", { + enabled: Boolean(activeWorkspaceId), + }); + + const allChannels = networkChannelsQuery.data?.channels ?? []; + const allPeers = networkPeersQuery.data ?? []; + const workspaceAgents = workspaceDetailQuery.data?.agents ?? []; + const sortedAgents = useMemo(() => sortAgentsForNetwork(workspaceAgents), [workspaceAgents]); + + const visibleChannels = useMemo( + () => + sortNetworkChannels( + allChannels.filter(channel => matchesChannelSearch(channel, deferredChannelSearch)) + ), + [allChannels, deferredChannelSearch] + ); + const visiblePeers = useMemo( + () => sortNetworkPeers(allPeers.filter(peer => matchesPeerSearch(peer, deferredPeerSearch))), + [allPeers, deferredPeerSearch] + ); + + const effectiveSelectedChannel = useMemo(() => { + if (selectedChannel && visibleChannels.some(channel => channel.channel === selectedChannel)) { + return selectedChannel; + } + + return visibleChannels[0]?.channel ?? null; + }, [selectedChannel, visibleChannels]); + + const effectiveSelectedPeerId = useMemo(() => { + if (selectedPeerId && visiblePeers.some(peer => peer.peer_id === selectedPeerId)) { + return selectedPeerId; + } + + return visiblePeers[0]?.peer_id ?? null; + }, [selectedPeerId, visiblePeers]); + + const channelDetailQuery = useNetworkChannel(effectiveSelectedChannel ?? "", { + enabled: isNetworkEnabled && activeTab === "channels" && Boolean(effectiveSelectedChannel), + }); + const channelMessagesQuery = useNetworkChannelMessages(effectiveSelectedChannel ?? "", { + enabled: isNetworkEnabled && activeTab === "channels" && Boolean(effectiveSelectedChannel), + }); + const peerDetailQuery = useNetworkPeer(effectiveSelectedPeerId ?? "", { + enabled: isNetworkEnabled && activeTab === "peers" && Boolean(effectiveSelectedPeerId), + }); + + const pageMetrics = getNetworkMetricCards(networkStatusQuery.data, allChannels.length); + const headerCount = activeTab === "channels" ? allChannels.length : allPeers.length; + const isChannelsListLoading = + !isNetworkDisabled && + ((isNetworkStatusLoading && !networkStatusQuery.data) || + (networkChannelsQuery.isLoading && !networkChannelsQuery.data)); + const channelsListError = !isNetworkDisabled + ? (networkStatusError ?? (!networkChannelsQuery.data ? networkChannelsQuery.error : null)) + : null; + const isPeersListLoading = + !isNetworkDisabled && + ((isNetworkStatusLoading && !networkStatusQuery.data) || + (networkPeersQuery.isLoading && !networkPeersQuery.data)); + const peersListError = !isNetworkDisabled + ? (networkStatusError ?? (!networkPeersQuery.data ? networkPeersQuery.error : null)) + : null; + + const handleOpenCreateDialog = () => { + setCreateDraft(createNetworkChannelDraft()); + setCreateDialogOpen(true); + }; + + const handleCreateChannel = async () => { + if (!activeWorkspaceId) { + toast.error("Select an active workspace before creating a channel."); + return; + } + + const channelName = createDraft.channelName.trim(); + if (!channelName) { + toast.error("Provide a channel name before creating the channel."); + return; + } + + if (createDraft.selectedAgentNames.length === 0) { + toast.error("Select at least one local agent before creating the channel."); + return; + } + + try { + const result = await createChannelMutation.mutateAsync({ + agent_names: createDraft.selectedAgentNames, + channel: channelName, + workspace_id: activeWorkspaceId, + }); + + startTransition(() => { + setActiveTab("channels"); + setChannelSearchQuery(""); + setSelectedChannel(result.channel.channel); + }); + setCreateDialogOpen(false); + setCreateDraft(createNetworkChannelDraft()); + toast.success(`Created channel ${result.channel.channel}.`); + } catch (error) { + toast.error(error instanceof Error ? error.message : "Failed to create network channel"); + } + }; + + const canSubmitCreate = + isNetworkEnabled && + Boolean(activeWorkspaceId) && + createDraft.channelName.trim() !== "" && + createDraft.selectedAgentNames.length > 0; + + return ( + <> + } + count={headerCount} + controls={ +
+ setActiveTab("channels")} + > + Channels + + setActiveTab("peers")} + > + Peers + +
+ } + meta={ + activeTab === "channels" && isNetworkEnabled ? ( + + ) : null + } + > +
+
+
+ {pageMetrics.map(metric => ( + + ))} +
+
+ + {isNetworkDisabled ? ( + } + testId="network-disabled-state" + title="Network disabled" + /> + ) : activeTab === "channels" ? ( +
+ + {isChannelsListLoading ? ( + + ) : channelsListError ? ( + + ) : allChannels.length === 0 ? ( + } + onAction={handleOpenCreateDialog} + testId="network-channels-empty-state" + title="No channels yet" + /> + ) : visibleChannels.length === 0 ? ( + } + testId="network-channels-empty-state" + title="No channels found" + /> + ) : ( + + )} +
+ ) : ( +
+ + {isPeersListLoading ? ( + + ) : peersListError ? ( + + ) : allPeers.length === 0 ? ( + } + testId="network-peers-empty-state" + title="No peers connected" + /> + ) : visiblePeers.length === 0 ? ( + } + testId="network-peers-empty-state" + title="No peers found" + /> + ) : ( + + )} +
+ )} +
+
+ + + setCreateDraft(currentDraft => ({ + ...currentDraft, + channelName, + })) + } + onOpenChange={setCreateDialogOpen} + onSubmit={handleCreateChannel} + onToggleAgent={agentName => + setCreateDraft(currentDraft => toggleDraftAgent(currentDraft, agentName)) + } + open={isCreateDialogOpen} + workspaceName={activeWorkspace?.name ?? null} + /> + + ); +} diff --git a/web/src/systems/automation/components/automation-detail-panel.test.tsx b/web/src/systems/automation/components/automation-detail-panel.test.tsx index ed9d42686..073df3d37 100644 --- a/web/src/systems/automation/components/automation-detail-panel.test.tsx +++ b/web/src/systems/automation/components/automation-detail-panel.test.tsx @@ -2,7 +2,6 @@ import { fireEvent, render, screen } from "@testing-library/react"; import { describe, expect, it, vi } from "vitest"; import { AutomationDetailPanel } from "./automation-detail-panel"; -import { createAutomationJobDraft, createAutomationTriggerDraft } from "../lib/automation-drafts"; const jobFixture = { id: "job_daily_review", @@ -58,8 +57,7 @@ function renderPanel(overrides: Partial render( } describe("AutomationDetailPanel", () => { - it("renders loading, error, and empty states", () => { - const loading = renderPanel({ isLoading: true, item: undefined }); + it("renders loading state", () => { + renderPanel({ isLoading: true, item: undefined }); expect(screen.getByTestId("automation-detail-loading")).toBeInTheDocument(); + }); - loading.onDelete.mockReset(); - + it("renders error state", () => { renderPanel({ error: new Error("boom"), item: undefined }); expect(screen.getByTestId("automation-detail-error")).toBeInTheDocument(); - - renderPanel({ item: undefined }); - expect(screen.getByTestId("automation-detail-empty")).toBeInTheDocument(); }); - it("renders editor variants for jobs and triggers", () => { - const jobEditor = { - kind: "jobs" as const, - mode: "create" as const, - draft: createAutomationJobDraft("ws_alpha"), - isPending: false, - onCancel: vi.fn(), - onChange: vi.fn(), - onSubmit: vi.fn(), - }; - renderPanel({ editor: jobEditor, item: undefined }); - expect(screen.getByTestId("automation-job-form")).toBeInTheDocument(); - - const triggerEditor = { - kind: "triggers" as const, - mode: "edit" as const, - draft: createAutomationTriggerDraft("ws_alpha"), - isPending: false, - onCancel: vi.fn(), - onChange: vi.fn(), - onSubmit: vi.fn(), - }; - renderPanel({ editor: triggerEditor, item: undefined }); - expect(screen.getByTestId("automation-trigger-form")).toBeInTheDocument(); + it("renders route-level empty state", () => { + renderPanel({ + emptyState: { + actionLabel: "Create Job", + description: "Create the first job.", + icon: "jobs", + onAction: vi.fn(), + title: "No jobs configured", + }, + item: undefined, + }); + expect(screen.getByTestId("automation-detail-empty")).toBeInTheDocument(); + expect(screen.getByText("No jobs configured")).toBeInTheDocument(); }); it("renders dynamic job details and dispatches action callbacks", () => { @@ -140,6 +124,19 @@ describe("AutomationDetailPanel", () => { expect(onDelete).toHaveBeenCalledOnce(); }); + it("renders manual jobs without implying a cron schedule", () => { + renderPanel({ + item: { + ...jobFixture, + schedule: undefined, + }, + }); + + expect(screen.getByText("manual")).toBeInTheDocument(); + expect(screen.getAllByText("Manual")).toHaveLength(2); + expect(screen.queryByText("Cron schedule")).not.toBeInTheDocument(); + }); + it("renders config trigger details without mutable actions", () => { renderPanel({ item: triggerFixture, @@ -151,7 +148,7 @@ describe("AutomationDetailPanel", () => { expect( screen.getByText( - "Config-sourced automation can only toggle enabled state from the UI. Definition changes stay in configuration files." + "This automation is defined in configuration files. Only the enabled state can be toggled from the UI." ) ).toBeInTheDocument(); expect(screen.getByText("Webhook id")).toBeInTheDocument(); diff --git a/web/src/systems/automation/components/automation-detail-panel.tsx b/web/src/systems/automation/components/automation-detail-panel.tsx index 78bb802a4..8fd487e8e 100644 --- a/web/src/systems/automation/components/automation-detail-panel.tsx +++ b/web/src/systems/automation/components/automation-detail-panel.tsx @@ -1,51 +1,55 @@ -import { Loader2, Play, Trash2 } from "lucide-react"; +import { + ArrowRight, + Bot, + CalendarDays, + Clock3, + Loader2, + Lock, + Pencil, + Play, + RefreshCw, + Search, + Trash2, + Zap, +} from "lucide-react"; +import type { ComponentType } from "react"; -import { cn } from "@/lib/utils"; +import { Pill } from "@/components/design-system"; +import { Button } from "@/components/ui/button"; +import { + Empty, + EmptyContent, + EmptyDescription, + EmptyHeader, + EmptyMedia, + EmptyTitle, +} from "@/components/ui/empty"; -import { AutomationJobForm } from "./automation-job-form"; import { AutomationRunHistory } from "./automation-run-history"; -import { AutomationTriggerForm } from "./automation-trigger-form"; import { + automationScopeLabel, + automationSemanticTone, automationSourceLabel, - automationStatusTone, + automationSourceTone, describeFireLimit, describeRetry, describeSchedule, - describeTrigger, + formatDate, formatDateTime, formatRelativeTime, } from "../lib/automation-formatters"; -import type { - AutomationJob, - AutomationRun, - AutomationTrigger, - CreateAutomationJobRequest, - CreateAutomationTriggerRequest, -} from "../types"; - -type AutomationEditorState = - | { - draft: CreateAutomationJobRequest; - isPending: boolean; - kind: "jobs"; - mode: "create" | "edit"; - onCancel: () => void; - onChange: (draft: CreateAutomationJobRequest) => void; - onSubmit: () => void; - } - | { - draft: CreateAutomationTriggerRequest; - isPending: boolean; - kind: "triggers"; - mode: "create" | "edit"; - onCancel: () => void; - onChange: (draft: CreateAutomationTriggerRequest) => void; - onSubmit: () => void; - }; +import type { AutomationJob, AutomationRun, AutomationTrigger } from "../types"; + +export interface AutomationDetailEmptyState { + actionLabel?: string; + description: string; + icon: "jobs" | "search" | "triggers"; + onAction?: () => void; + title: string; +} interface AutomationDetailPanelProps { - activeWorkspaceId?: string | null; - editor: AutomationEditorState | null; + emptyState?: AutomationDetailEmptyState | null; error: Error | null; isDeleting: boolean; isLoading: boolean; @@ -62,26 +66,202 @@ interface AutomationDetailPanelProps { runsLoading: boolean; } -const TONE_CLASSES = { - accent: "bg-[color:var(--color-accent-tint)] text-[color:var(--color-accent)]", - success: "bg-[color:var(--color-success-tint)] text-[color:var(--color-success)]", - warning: "bg-[color:var(--color-warning-tint)] text-[color:var(--color-warning)]", - danger: "bg-[color:var(--color-danger-tint)] text-[color:var(--color-danger)]", - neutral: "bg-[color:var(--color-neutral-tint)] text-[color:var(--color-text-tertiary)]", -} as const; +function AutomationTag({ + children, + tone, +}: { + children: string; + tone: "amber" | "danger" | "green" | "neutral" | "violet"; +}) { + return ( + + {children} + + ); +} + +function SectionEyebrow({ children }: { children: string }) { + return ( +

+ {children} +

+ ); +} + +function MetaChip({ + children, + icon, +}: { + children: string; + icon?: ComponentType<{ className?: string }>; +}) { + const Icon = icon; -function MetadataRow({ label, value }: { label: string; value: string }) { return ( -
- {label} - {value} + + {Icon ? : null} + {children} + + ); +} + +function EmptyState({ + actionLabel, + description, + icon, + onAction, + title, +}: AutomationDetailEmptyState) { + const Icon = icon === "jobs" ? Clock3 : icon === "triggers" ? Zap : Search; + + return ( +
+ + + + + +
+ + {title} + + + {description} + +
+
+ {actionLabel && onAction ? ( + + + + ) : null} +
); } +function JobScheduleCard({ job }: { job: AutomationJob }) { + const mode = job.schedule ? (job.schedule.mode ?? "cron") : "manual"; + const ScheduleIcon = mode === "every" ? RefreshCw : mode === "at" ? CalendarDays : Clock3; + const scheduleValue = + mode === "cron" + ? (job.schedule?.expr ?? "Cron schedule") + : mode === "every" + ? (job.schedule?.interval ?? "Interval") + : mode === "at" + ? formatDate(job.schedule?.time) + : describeSchedule(job.schedule); + + return ( +
+ SCHEDULE +
+
+
+ +
+
+

+ {mode} +

+

+ {scheduleValue} +

+

+ {describeSchedule(job.schedule)} +

+
+
+ +
+

+ Next run +

+

+ {formatRelativeTime(job.next_run)} +

+

+ {formatDateTime(job.next_run)} +

+
+
+
+ ); +} + +function TriggerActivationCard({ trigger }: { trigger: AutomationTrigger }) { + const matches = Object.entries(trigger.filter ?? {}); + + return ( +
+ ACTIVATION +
+
+

+ When +

+ + {trigger.event} + +
+ + + +
+

+ Matches +

+
+ {matches.length > 0 ? ( + matches.map(([key, value]) => ( + + {`${key} ${value}`} + + )) + ) : ( + + No filters + + )} +
+
+ + + +
+

+ Dispatches to +

+
+ + {trigger.agent_name} +
+
+
+
+ ); +} + export function AutomationDetailPanel({ - activeWorkspaceId, - editor, + emptyState, error, isDeleting, isLoading, @@ -97,30 +277,6 @@ export function AutomationDetailPanel({ runsError, runsLoading, }: AutomationDetailPanelProps) { - if (editor) { - return editor.kind === "jobs" ? ( - - ) : ( - - ); - } - if (isLoading) { return (
; + } + return (
-
-
-
-

- {item.name} -

- - {item.enabled ? "enabled" : "disabled"} - - - {automationSourceLabel(item.source)} - -
-

- {isJob - ? describeSchedule((item as AutomationJob).schedule) - : describeTrigger(item as AutomationTrigger)} -

-
- Agent {item.agent_name} - Scope {item.scope} - Updated {formatDateTime(item.updated_at)} +
+
+
+
+
+

+ {item.name} +

+ + {item.enabled ? "ENABLED" : "DISABLED"} + + + {automationSourceLabel(item.source)} + + {item.source === "config" ? ( + + ) : null} +
+ +

+ {`Agent: ${item.agent_name} · Scope: ${item.scope} · Updated ${formatDate(item.updated_at)}`} +

-
-
- - {isDynamic ? ( - - ) : null} - {isJob ? ( - - ) : null} - {isDynamic ? ( - - ) : null} -
-
- - {!isDynamic ? ( -
- Config-sourced automation can only toggle enabled state from the UI. Definition changes - stay in configuration files. + {isTogglePending ? "Saving..." : item.enabled ? "Disable" : "Enable"} + + {isJob ? ( + + ) : null} + {isDynamic ? ( + + ) : null} + {isDynamic ? ( + + ) : null} +
- ) : null} - -
-
-
-
-

- Prompt -

-

- The exact prompt payload that will be sent to the agent session. -

-
-
-              {item.prompt}
-            
-
- - -
-
-
-

- Metadata -

-

- Operational state, retry posture, and scope binding for this automation. + {!isDynamic ? ( +

+ +

+ This automation is defined in configuration files. Only the enabled state can be + toggled from the UI.

-
- - - - - - - {item.workspace_id ? : null} - {isJob ? ( - <> - - - - ) : ( - <> - - {(item as AutomationTrigger).endpoint_slug ? ( - - ) : null} - {(item as AutomationTrigger).webhook_id ? ( - - ) : null} - - )} + ) : null} + + {job ? : null} + {trigger ? : null} + +
+
+ {isJob ? "PROMPT" : "PROMPT TEMPLATE"} + {!isJob ? GO TEMPLATE : null} +
+
+            {item.prompt}
+          
+
+ {describeRetry(item.retry)} + {describeFireLimit(item.fire_limit)} + {automationScopeLabel(item.scope)}
+ + + + {trigger?.webhook_id ? ( +
+
+
+ Event +

+ {trigger.event} +

+
+
+ Endpoint +

+ {trigger.endpoint_slug ?? "Unavailable"} +

+
+
+ Webhook id +

+ {trigger.webhook_id} +

+
+
+
+ ) : null}
-
+
); } diff --git a/web/src/systems/automation/components/automation-editor-dialog.tsx b/web/src/systems/automation/components/automation-editor-dialog.tsx new file mode 100644 index 000000000..5a1f383af --- /dev/null +++ b/web/src/systems/automation/components/automation-editor-dialog.tsx @@ -0,0 +1,87 @@ +import { X } from "lucide-react"; + +import { Button } from "@/components/ui/button"; +import { Dialog, DialogClose, DialogContent } from "@/components/ui/dialog"; + +import { AutomationJobForm } from "./automation-job-form"; +import { AutomationTriggerForm } from "./automation-trigger-form"; +import type { CreateAutomationJobRequest, CreateAutomationTriggerRequest } from "../types"; + +type AutomationDialogEditorState = + | { + draft: CreateAutomationJobRequest; + isPending: boolean; + kind: "jobs"; + mode: "create" | "edit"; + onCancel: () => void; + onChange: (draft: CreateAutomationJobRequest) => void; + onSubmit: () => void; + } + | { + draft: CreateAutomationTriggerRequest; + isPending: boolean; + kind: "triggers"; + mode: "create" | "edit"; + onCancel: () => void; + onChange: (draft: CreateAutomationTriggerRequest) => void; + onSubmit: () => void; + }; + +interface AutomationEditorDialogProps { + activeWorkspaceId?: string | null; + editor: AutomationDialogEditorState | null; +} + +export function AutomationEditorDialog({ activeWorkspaceId, editor }: AutomationEditorDialogProps) { + return ( + { + if (!open) { + editor?.onCancel(); + } + }} + > + {editor ? ( + + + } + > + + Close editor + + {editor.kind === "jobs" ? ( + + ) : ( + + )} + + ) : null} + + ); +} diff --git a/web/src/systems/automation/components/automation-form-primitives.tsx b/web/src/systems/automation/components/automation-form-primitives.tsx index 693fc360d..1bcf878df 100644 --- a/web/src/systems/automation/components/automation-form-primitives.tsx +++ b/web/src/systems/automation/components/automation-form-primitives.tsx @@ -29,7 +29,7 @@ export function AutomationField({ label, hint, children }: AutomationFieldProps) function inputBaseClassName() { return cn( - "w-full rounded-lg border border-[color:var(--color-divider)] bg-[color:var(--color-surface)]", + "w-full rounded-lg border border-[color:var(--color-divider)] bg-[color:var(--color-surface-elevated)]", "px-3 py-2 text-sm text-[color:var(--color-text-primary)] outline-none transition-colors", "placeholder:text-[color:var(--color-text-tertiary)] focus:border-[color:var(--color-accent)]" ); @@ -69,7 +69,7 @@ export function AutomationCheckbox({ }: AutomationCheckboxProps) { return (