From 79fa1b160abac7b1ca8a5100e36b12c5723e2a11 Mon Sep 17 00:00:00 2001 From: Number531 <120485065+Number531@users.noreply.github.com> Date: Thu, 7 May 2026 02:12:40 -0400 Subject: [PATCH 1/2] feat(skill/schema-doc-validator): operator skill doc validator with SQL syntax MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NEW skill that validates operator-facing skill docs against current code-side schema, metrics, alerts, endpoints, and column definitions. Prevents the class of bug where docs reference non-existent surfaces and operator queries fail at runtime with 'column does not exist' (caught PR #96 three times). Files: - SKILL.md (frontmatter + workflow + invocation + output spec) - scripts/validate.sh (entry point: pre-flight, truth extract, rule dispatch, format report, exit code) - scripts/extract-truth.py (reads postgres.js, sdkMetrics.js, alerts.yml, alertingRules.js, 5 router files → emits truth.json) - scripts/rules/01-frontmatter.py (YAML shape validation) - scripts/rules/02-metric-refs.py (claude_* metrics; histogram bucket suffix derived from _ms base names) - scripts/rules/03-alert-refs.py (PascalCase alert names exist) - scripts/rules/04-table-refs.py (snake_case table refs with table-shaped suffixes resolve) - scripts/rules/05-column-refs.py (table.column references resolve — catches python_code_hash / match_method / kg_error class) - scripts/rules/06-endpoint-refs.py (/api/... paths exist; WARNING severity) - scripts/rules/07-sql-syntax.py (SQL fenced blocks: balanced delimiters, read-only enforcement, alias.column resolution, bare column resolution via FROM tables + AS aliases + JSONB keys) - scripts/format-report.py (JSON → markdown with severity grouping) - references/rules-catalog.md (rule descriptions, sample violations, remediation, opt-out markers) - references/extraction-paths.md (truth source file paths + extraction regexes) - references/output-spec.md (markdown + JSON output formats; exit codes) Opt-out marker convention: `` on the same line (or preceding the SQL block) skips that rule for the corresponding line/block. Used for documented schema references (DDL) and intentional example violations in remediation prose. Verified end-to-end: - Clean run on all current skills: 0 CRITICAL violations after applying noqa:07 to user-management/SKILL.md (CREATE TABLE schema reference) - Negative test (fictional columns python_code_hash + fake_field + confidence_score): 3 CRITICAL violations correctly flagged - Positive test (FROM clauses with AS aliases like 'AS event_count'): no false positives - Histogram bucket validation: claude_request_duration_ms_bucket valid even though only base name claude_request_duration_ms in sdkMetrics.js Truth at validation time: 33 tables, 34 metrics, 13 alerts, 67 endpoints. Co-Authored-By: Claude Opus 4.7 (1M context) --- .claude/skills/schema-doc-validator/SKILL.md | 122 +++++++ .../references/extraction-paths.md | 59 ++++ .../references/output-spec.md | 86 +++++ .../references/rules-catalog.md | 75 ++++ .../scripts/extract-truth.py | 172 ++++++++++ .../scripts/format-report.py | 107 ++++++ .../scripts/rules/01-frontmatter.py | 130 +++++++ .../scripts/rules/02-metric-refs.py | 74 ++++ .../scripts/rules/03-alert-refs.py | 78 +++++ .../scripts/rules/04-table-refs.py | 86 +++++ .../scripts/rules/05-column-refs.py | 92 +++++ .../scripts/rules/06-endpoint-refs.py | 90 +++++ .../scripts/rules/07-sql-syntax.py | 322 ++++++++++++++++++ .../schema-doc-validator/scripts/validate.sh | 128 +++++++ .claude/skills/user-management/SKILL.md | 1 + 15 files changed, 1622 insertions(+) create mode 100644 .claude/skills/schema-doc-validator/SKILL.md create mode 100644 .claude/skills/schema-doc-validator/references/extraction-paths.md create mode 100644 .claude/skills/schema-doc-validator/references/output-spec.md create mode 100644 .claude/skills/schema-doc-validator/references/rules-catalog.md create mode 100755 .claude/skills/schema-doc-validator/scripts/extract-truth.py create mode 100644 .claude/skills/schema-doc-validator/scripts/format-report.py create mode 100755 .claude/skills/schema-doc-validator/scripts/rules/01-frontmatter.py create mode 100755 .claude/skills/schema-doc-validator/scripts/rules/02-metric-refs.py create mode 100755 .claude/skills/schema-doc-validator/scripts/rules/03-alert-refs.py create mode 100755 .claude/skills/schema-doc-validator/scripts/rules/04-table-refs.py create mode 100755 .claude/skills/schema-doc-validator/scripts/rules/05-column-refs.py create mode 100755 .claude/skills/schema-doc-validator/scripts/rules/06-endpoint-refs.py create mode 100755 .claude/skills/schema-doc-validator/scripts/rules/07-sql-syntax.py create mode 100755 .claude/skills/schema-doc-validator/scripts/validate.sh diff --git a/.claude/skills/schema-doc-validator/SKILL.md b/.claude/skills/schema-doc-validator/SKILL.md new file mode 100644 index 000000000..3ed8d35a6 --- /dev/null +++ b/.claude/skills/schema-doc-validator/SKILL.md @@ -0,0 +1,122 @@ +--- +name: schema-doc-validator +description: > + Validate operator-facing skill docs and reference markdown against current code-side + schema, metrics, alerts, endpoints, and column definitions. Catches the class of + bug where docs reference non-existent tables, columns, metrics, or alert names — + prevents operator SQL queries from failing at runtime with "column does not exist". + Use when reviewing skill changes pre-merge, after schema migrations, or during + v7.0.x correctness sweeps. Triggers: "validate skill docs", "audit schema references", + "check skill correctness", "schema doc lint", "/schema-doc-validator". + Supports flags: --target , --rules core|strict, --format json|markdown. +--- + +# Schema Doc Validator + +## Workflow + +Execute `scripts/validate.sh` from the skill directory. The script: + +1. Runs `extract-truth.py` once — reads source files, builds a unified `truth.json` with all real tables, columns, metrics, alerts, and endpoint paths +2. Iterates over rule scripts in `scripts/rules/01-*.py` through `07-*.py` +3. Each rule emits `{rule_id, severity, file, line, message, remediation}` JSON tuples +4. `format-report.py` aggregates and emits a markdown report (or JSON with `--format json`) +5. Exit code 0 if no CRITICAL violations, 1 if any CRITICAL + +```bash +bash /Users/ej/Super-Legal/.claude/skills/schema-doc-validator/scripts/validate.sh +``` + +## Invocation + +```bash +/schema-doc-validator # all skill docs, core rules +/schema-doc-validator --target '.claude/skills/**/SKILL.md' # restrict to specific files +/schema-doc-validator --rules strict # stricter rule set (also flags warnings) +/schema-doc-validator --format json # machine-readable output +``` + +## Rules + +| ID | Name | What it checks | Severity | +|---|---|---|---| +| 01 | frontmatter | YAML frontmatter has `name` (kebab-case) + `description` (≥30 chars) | CRITICAL | +| 02 | metric-refs | Every `claude_*` metric token exists in `src/utils/sdkMetrics.js` | CRITICAL | +| 03 | alert-refs | Every alert name exists in `prometheus/alerts.yml` or `src/config/alertingRules.js` | CRITICAL | +| 04 | table-refs | Every table-name reference exists in `src/db/postgres.js` | CRITICAL | +| 05 | column-refs | Every `table.column` or bare column reference resolves against schema truth (catches the `python_code_hash` / `match_method` / `kg_error` class of bug) | CRITICAL | +| 06 | endpoint-refs | Every `/api/...` path exists in router files | WARNING | +| 07 | sql-syntax | SQL fenced blocks parse + reference real columns + are read-only (no DML) | CRITICAL | + +See `references/rules-catalog.md` for sample violations + remediation per rule. + +## Truth Sources + +| Source | What truth | File | +|---|---|---| +| Tables + columns | `{table_name: [col1, col2, ...]}` | `super-legal-mcp-refactored/src/db/postgres.js` | +| Metrics | `[claude_*_total, _state, _ms_bucket, ...]` | `super-legal-mcp-refactored/src/utils/sdkMetrics.js` | +| Alerts | `[ClaudeToolErrorRateHigh, HookCircuitBreakerOpen, ...]` | `super-legal-mcp-refactored/prometheus/alerts.yml` + `src/config/alertingRules.js` | +| Endpoints | `[/api/db/sessions, /api/admin/halt, ...]` | `super-legal-mcp-refactored/src/server/{dbFrontendRouter,adminRouter,authRouter,claude-sdk-server}.js` | + +See `references/extraction-paths.md` for line-number references. + +## Output Format + +``` +## Schema Doc Validation Report +Timestamp: 2026-05-07T... | Target: | Rules: core + +### Summary +Files scanned: 19 | Violations: N | Errors: M | Warnings: K + +### CRITICAL (operator queries will fail at runtime) + +- session-diagnostics/SKILL.md:108 [05-column-refs] + Column reference does not exist (e.g., 'confidence_score' should be 'confidence') + Remediation: replace stale column reference with real column name + +### WARNING (stale references) +- ... + +### Rule Coverage +| Rule | Files Scanned | Violations | +|---|---|---| +| 01 | 19 | 0 | +... + +### Raw Signals +[truth.json + violations.json — for programmatic consumption] +``` + +See `references/output-spec.md` for the full template. + +## Pre-flight Checks + +The script validates these before running rules. If any fail, exits non-zero with a clear message: + +```bash +which python3 # Python 3.6+ required +test -f super-legal-mcp-refactored/src/db/postgres.js # Truth source must exist +test -f super-legal-mcp-refactored/src/utils/sdkMetrics.js +test -f super-legal-mcp-refactored/prometheus/alerts.yml +``` + +## Read-Only Guarantee + +This skill never mutates files, the database, or any external system. It only reads source files and skill docs and emits a report. Safe to invoke from CI, pre-commit hooks, or `/loop`. + +## Troubleshooting + +| Failure | Fix | +|---|---| +| `extract-truth.py` fails: "no such file" | Run from repo root or pass `--repo-root ` to validate.sh | +| Rule 05 reports false positive on alias names (e.g., `s.kg_status`) | Rule 05 follows `FROM table AS alias` and joins to resolve aliases. If still false-positive, add `# noqa: 05` comment after the SQL block | +| Rule 07 fails on multi-statement SQL with `;` separator | The skill currently expects single-statement queries. Split multi-statement blocks into separate fenced blocks | +| Truth source out of date (false negatives) | Truth is regenerated on each run — re-run after pulling latest code | + +## Known Constraints + +- **Regex-based SQL parser**: not full PostgreSQL grammar. Sufficient for SELECT-only operator queries we ship in skill docs (Pattern 11/14 type). For complex JOINs with subqueries, may emit false positives — accept and document via inline `# noqa: 07`. +- **No live DB validation**: catches references to non-existent columns at the schema level, but cannot verify data correctness (e.g., enum values, constraint violations). For data correctness, use `session-diagnostics`. +- **No git history awareness**: only validates current state. Doesn't track which SKILL.md changes broke a previously-valid reference. diff --git a/.claude/skills/schema-doc-validator/references/extraction-paths.md b/.claude/skills/schema-doc-validator/references/extraction-paths.md new file mode 100644 index 000000000..928e18ba4 --- /dev/null +++ b/.claude/skills/schema-doc-validator/references/extraction-paths.md @@ -0,0 +1,59 @@ +# Truth Extraction Paths + +`extract-truth.py` reads the following files (from repo root) to build the +canonical `truth.json` consumed by all rule scripts. + +## Source files + +| Truth | Source file | Extraction pattern | +|---|---|---| +| Tables + columns | `super-legal-mcp-refactored/src/db/postgres.js` | `CREATE TABLE IF NOT EXISTS (\w+) \(...\);` blocks (multi-line) + `ALTER TABLE \w+ ADD COLUMN IF NOT EXISTS (\w+) ...` | +| Metrics | `super-legal-mcp-refactored/src/utils/sdkMetrics.js` | `name: '(claude_*_total\|_state\|_ms\|_bucket\|_sessions)'` regex on each `new client.Counter/Gauge/Histogram(...)` declaration | +| Alerts (yaml) | `super-legal-mcp-refactored/prometheus/alerts.yml` | `^- alert: (\w+)$` per alert rule | +| Alerts (js) | `super-legal-mcp-refactored/src/config/alertingRules.js` | `alert: '(\w+)'` declarations (for reference parity with yml) | +| Endpoints | `super-legal-mcp-refactored/src/server/{dbFrontendRouter,adminRouter,authRouter,claude-sdk-server,documentConversionRouter}.js` | `router.('/api/...')` and `app.('/api/...')` declarations | + +## Adding new truth sources + +If a new schema-defining file is added (e.g., a new router), update +`extract-truth.py`: + +```python +# In main() +server_dir = base / "src" / "server" +router_paths = [ + server_dir / "dbFrontendRouter.js", + server_dir / "adminRouter.js", + # ... add new router here + server_dir / "newRouter.js", +] +``` + +For new tables not in `postgres.js` (e.g., separate migration files), +add a parallel parse step. + +## Repo-root resolution + +`extract-truth.py` walks up from CWD to find a directory containing +`super-legal-mcp-refactored/`. If running from outside the repo, pass +`--repo-root ` (NOT YET IMPLEMENTED — currently exits with error). + +## Output schema (`truth.json`) + +```json +{ + "tables": { + "transcript_events": ["id", "session_id", "session_key", "sequence_number", "event_type", "event_data", "created_at"], + "code_executions": ["id", "session_id", "agent_type", "model_id", ...] + }, + "metrics": ["claude_request_duration_ms", "claude_hook_persistence_failures_total", ...], + "alerts": ["ClaudeToolErrorRateHigh", "HookCircuitBreakerOpen", ...], + "endpoints": ["/api/db/sessions", "/api/admin/sessions/:sessionId/legal-hold", ...], + "_meta": { + "repo_root": "/Users/ej/Super-Legal", + "sources": { "postgres_js": "...", "metrics_js": "...", ... } + } +} +``` + +Truth is regenerated on every `validate.sh` run — no caching, no stale state. diff --git a/.claude/skills/schema-doc-validator/references/output-spec.md b/.claude/skills/schema-doc-validator/references/output-spec.md new file mode 100644 index 000000000..85f5523be --- /dev/null +++ b/.claude/skills/schema-doc-validator/references/output-spec.md @@ -0,0 +1,86 @@ +# Output Specification + +## Markdown format (default) + +``` +## Schema Doc Validation Report +Timestamp: | Target: `` | Rules: `` + +### Summary +Files with violations: N | CRITICAL: M | WARNING: K + +### CRITICAL (operator queries will fail at runtime) +- `path/to/file.md:LINE` [rule_id] + + → Remediation: +- ... + +### WARNING (stale references / non-blocking) +- ... + +### Rule Coverage +| Rule | Violations | +|---|---| +| 01-frontmatter | 0 | +| 02-metric-refs | 0 | +| 03-alert-refs | 0 | +| 04-table-refs | 0 | +| 05-column-refs | 0 | +| 06-endpoint-refs | 12 | +| 07-sql-syntax | 0 | + +### Truth Sources +- Tables: 33 +- Metrics: 34 +- Alerts: 13 +- Endpoints: 67 +``` + +## JSON format (`--format json`) + +```json +{ + "violations": [ + { + "rule_id": "05-column-refs", + "severity": "CRITICAL", + "file": ".claude/skills/session-diagnostics/SKILL.md", + "line": 108, + "message": "Column 'citation_source_links.confidence_score' does not exist...", + "remediation": "Replace stale column reference with real column name" + } + ], + "truth_meta": { + "repo_root": "/Users/ej/Super-Legal", + "sources": { ... } + } +} +``` + +## Exit codes + +- `0` — no CRITICAL violations (WARNINGs may exist) +- `1` — at least one CRITICAL violation +- `2` — pre-flight failure (missing source files, no targets matched) + +## Severity definitions + +- **CRITICAL**: Operator-facing SQL or commands will fail at runtime if copy-pasted. Examples: stale column references, fictional metrics, DML in skill docs. +- **WARNING**: Reference is stale but non-blocking. Examples: endpoint path drift (operator may catch it via 404), non-canonical alert name spelling. + +## Severity tuple convention + +Every rule script emits violation tuples with this exact shape: + +```python +{ + "rule_id": "-", # e.g., "07-sql-syntax" + "severity": "CRITICAL" | "WARNING", + "file": str, # absolute or repo-relative path + "line": int, # 1-indexed + "message": str, # what's wrong + "remediation": str, # what to do about it +} +``` + +This convention is shared with `session-diagnostics` and `post-deploy-verify` skills. diff --git a/.claude/skills/schema-doc-validator/references/rules-catalog.md b/.claude/skills/schema-doc-validator/references/rules-catalog.md new file mode 100644 index 000000000..55a61f21d --- /dev/null +++ b/.claude/skills/schema-doc-validator/references/rules-catalog.md @@ -0,0 +1,75 @@ +# Rules Catalog + +Each rule lives at `scripts/rules/NN-name.py` and emits JSON violation tuples +of shape `{rule_id, severity, file, line, message, remediation}`. + +## Rule 01 — frontmatter + +**Severity**: CRITICAL (missing `name` or `description`); WARNING (name not kebab-case) +**What it checks**: SKILL.md YAML frontmatter has required fields and valid shape. +**Sample violation**: `Frontmatter missing required 'description:' field` +**Remediation**: Add `description: ...` to frontmatter (≥30 chars, list trigger phrases) + +## Rule 02 — metric-refs + +**Severity**: CRITICAL +**What it checks**: Every `claude_*_total` / `_state` / `_ms` / `_bucket` / `_sessions` token mentioned in skill markdown exists as a `name:` field in `super-legal-mcp-refactored/src/utils/sdkMetrics.js`. Histogram base names like `claude_request_duration_ms` automatically grant validity to the `_bucket`, `_count`, `_sum` Prometheus-derived suffixes. +**Sample violation**: `Metric 'claude_hook_persistance_failures_total' referenced but not defined in sdkMetrics.js` (typo) +**Remediation**: Verify metric name spelling against `sdkMetrics.js`. +**Opt-out**: `` on the same line + +## Rule 03 — alert-refs + +**Severity**: CRITICAL +**What it checks**: Every backticked alert name (PascalCase like `ClaudeToolErrorRateHigh`, `HookCircuitBreakerOpen`, `ReconciliationKgBacklog`) exists in `prometheus/alerts.yml` or `src/config/alertingRules.js`. +**Sample violation**: `Alert 'ClaudeMemoryHigh' referenced but not defined in alerts.yml` +**Remediation**: Verify alert name; check the 13 known alerts in `alerts.yml`. + +## Rule 04 — table-refs + +**Severity**: CRITICAL +**What it checks**: Every snake_case table-shaped reference (with recognizable suffixes like `_log`, `_events`, `_links`, `_executions`) in skill markdown exists as a `CREATE TABLE` in `postgres.js`. +**Sample violation**: `Table 'transcripts_log' referenced but not defined in postgres.js` (typo of `transcript_events`) +**Remediation**: Verify table name. Real tables list extracted from postgres.js. + +## Rule 05 — column-refs + +**Severity**: CRITICAL +**What it checks**: Every `table.column` reference in skill markdown — where `table` is a known table name — confirms `column` exists in that table's schema. **This rule catches the `python_code_hash` / `match_method` / `confidence_score` / `kg_error` class of bug** that bit PR #96 three times. +**Sample violation**: `Column 'citation_source_links.confidence_score' does not exist in table 'citation_source_links'. Did you mean 'confidence'?` +**Remediation**: Replace stale column reference with real column name. +**Opt-out**: `` on the same line (use for example violations in remediation prose) + +## Rule 06 — endpoint-refs + +**Severity**: WARNING (not CRITICAL — endpoint name drift is operational documentation, not copy-paste-into-curl) +**What it checks**: Every backticked `/api/...` path in skill markdown matches a `router.('/api/...')` declaration in router files. +**Sample violation**: `Endpoint '/api/admin/halt' not found in router files` (real path is `/api/sessions/:sessionKey/halt`) +**Remediation**: Verify endpoint path. Some warnings may be false positives for frontend-internal endpoints not in scoped router files. + +## Rule 07 — sql-syntax + +**Severity**: CRITICAL (DML/DDL keywords, unbalanced delimiters, table.column mismatches); WARNING (bare column matches a different table — possible alias confusion) +**What it checks**: +- SQL fenced blocks (` ```sql ... ``` `) parse with balanced parens/brackets/quotes +- Read-only enforcement — no `INSERT`/`UPDATE`/`DELETE`/`DROP`/`ALTER`/`TRUNCATE`/`GRANT`/`REVOKE`/`CREATE` +- Starts with `SELECT`/`WITH`/`EXPLAIN` (or comment) +- Every `alias.column` resolves via `FROM table AS alias` aliases +- Every `table.column` resolves against schema truth +- Every bare column reference resolves in at least one `FROM` table OR is an `AS` output alias OR is a JSONB path key +**Sample violations**: +- `DML/DDL keyword 'INSERT' in operator skill SQL. Skill docs must be read-only.` +- `SQL: column 'csl.confidence_score' does not exist in table 'citation_source_links'` +- `SQL: bare column 'fake_field' does not resolve in FROM tables` +**Remediation**: Fix the SQL. For documented schema references (DDL), use `` marker. +**Opt-out**: `` on the line(s) immediately before the ` ```sql ` block, or anywhere in the SQL itself. + +## Adding a new rule + +1. Create `scripts/rules/NN-rule-name.py` with same interface: + - Reads `truth.json` from `sys.argv[1]` + - Reads target files from `sys.argv[2:]` + - Emits a JSON array of violation tuples to stdout +2. Update `validate.sh` if the rule needs special dispatch logic +3. Document in this catalog +4. Add a sample violation + opt-out marker if applicable diff --git a/.claude/skills/schema-doc-validator/scripts/extract-truth.py b/.claude/skills/schema-doc-validator/scripts/extract-truth.py new file mode 100755 index 000000000..2e8f48876 --- /dev/null +++ b/.claude/skills/schema-doc-validator/scripts/extract-truth.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python3 +"""Extract source-of-truth from project source files into truth.json. + +Reads: + - src/db/postgres.js → tables map {table_name: [col, ...]} + - src/utils/sdkMetrics.js → metrics list + - prometheus/alerts.yml → alert names + - src/config/alertingRules.js → alert names (merged) + - src/server/{dbFrontendRouter,adminRouter,authRouter,claude-sdk-server, + documentConversionRouter}.js → endpoint paths + +Emits truth.json on stdout. +""" + +import json +import re +import sys +from pathlib import Path + + +def find_repo_root(start: Path) -> Path: + """Walk up to find the repo root (contains super-legal-mcp-refactored/).""" + cur = start.resolve() + for _ in range(10): + if (cur / "super-legal-mcp-refactored").is_dir(): + return cur + if cur.parent == cur: + break + cur = cur.parent + raise SystemExit( + "Cannot locate repo root (no super-legal-mcp-refactored/ found in any parent dir)" + ) + + +def extract_tables_and_columns(pg_js: str) -> dict: + """Parse postgres.js for CREATE TABLE blocks + ALTER TABLE ADD COLUMN. + + Returns: {table_name: [col1, col2, ...]} + """ + tables: dict = {} + + # CREATE TABLE blocks (multi-line, terminated by `);`) + for m in re.finditer( + r"CREATE TABLE IF NOT EXISTS (\w+)\s*\((.*?)\);", + pg_js, + re.DOTALL, + ): + table = m.group(1) + body = m.group(2) + cols: list = [] + for line in body.split("\n"): + line = line.strip() + if not line or line.startswith("--"): + continue + # `column_name ...` or `column_name TYPE,` + cm = re.match( + r"^(\w+)\s+(?:UUID|BIGSERIAL|SERIAL|VARCHAR|TEXT|" + r"INTEGER|INT|SMALLINT|BIGINT|BOOLEAN|TIMESTAMPTZ|" + r"TIMESTAMP|DECIMAL|NUMERIC|JSONB|JSON|FLOAT|REAL|" + r"BYTEA|DATE)", + line, + ) + if cm: + cols.append(cm.group(1)) + tables[table] = cols + + # ALTER TABLE ADD COLUMN IF NOT EXISTS + for m in re.finditer( + r"ALTER TABLE (\w+)\s+ADD COLUMN IF NOT EXISTS (\w+)\b", + pg_js, + ): + table, col = m.group(1), m.group(2) + if table in tables: + if col not in tables[table]: + tables[table].append(col) + else: + tables[table] = [col] + + return tables + + +def extract_metrics(metrics_js: str) -> list: + """Parse sdkMetrics.js for `name: 'claude_*'` declarations.""" + return sorted( + set( + re.findall( + r"name:\s*['\"]([a-z_][a-z_0-9]*?(?:_total|_state|_ms|_bucket|_sessions))['\"]", + metrics_js, + ) + ) + ) + + +def extract_alerts(alerts_yml: str, alerting_rules_js: str) -> list: + """Parse alerts.yml + alertingRules.js for alert names.""" + yml_alerts = re.findall(r"^\s*-\s*alert:\s*(\w+)", alerts_yml, re.MULTILINE) + js_alerts = re.findall(r"alert:\s*['\"](\w+)['\"]", alerting_rules_js) + return sorted(set(yml_alerts + js_alerts)) + + +def extract_endpoints(router_files: list) -> list: + """Parse router files for `router.('/api/...'`.""" + paths: set = set() + for router_src in router_files: + for m in re.finditer( + r"(?:router|app)\.\w+\(['\"]([^'\"]+)['\"]", + router_src, + ): + path = m.group(1) + if path.startswith("/api/") or path == "/health" or path == "/metrics": + paths.add(path) + return sorted(paths) + + +def main(): + # Resolve repo root from CWD + repo_root = find_repo_root(Path.cwd()) + base = repo_root / "super-legal-mcp-refactored" + + pg_path = base / "src" / "db" / "postgres.js" + metrics_path = base / "src" / "utils" / "sdkMetrics.js" + alerts_yml_path = base / "prometheus" / "alerts.yml" + alerting_rules_path = base / "src" / "config" / "alertingRules.js" + + server_dir = base / "src" / "server" + router_paths = [ + server_dir / "dbFrontendRouter.js", + server_dir / "adminRouter.js", + server_dir / "authRouter.js", + server_dir / "claude-sdk-server.js", + server_dir / "documentConversionRouter.js", + ] + + # Verify all sources exist + missing = [p for p in [pg_path, metrics_path, alerts_yml_path] if not p.exists()] + if missing: + print( + f"ERROR: Missing required truth source(s): {missing}", + file=sys.stderr, + ) + sys.exit(2) + + pg_js = pg_path.read_text() + metrics_js = metrics_path.read_text() + alerts_yml = alerts_yml_path.read_text() if alerts_yml_path.exists() else "" + alerting_rules_js = ( + alerting_rules_path.read_text() if alerting_rules_path.exists() else "" + ) + router_srcs = [p.read_text() for p in router_paths if p.exists()] + + truth = { + "tables": extract_tables_and_columns(pg_js), + "metrics": extract_metrics(metrics_js), + "alerts": extract_alerts(alerts_yml, alerting_rules_js), + "endpoints": extract_endpoints(router_srcs), + "_meta": { + "repo_root": str(repo_root), + "sources": { + "postgres_js": str(pg_path), + "metrics_js": str(metrics_path), + "alerts_yml": str(alerts_yml_path), + "alerting_rules_js": str(alerting_rules_path), + "router_files": [str(p) for p in router_paths if p.exists()], + }, + }, + } + + print(json.dumps(truth, indent=2, sort_keys=True)) + + +if __name__ == "__main__": + main() diff --git a/.claude/skills/schema-doc-validator/scripts/format-report.py b/.claude/skills/schema-doc-validator/scripts/format-report.py new file mode 100644 index 000000000..27c2b2d77 --- /dev/null +++ b/.claude/skills/schema-doc-validator/scripts/format-report.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python3 +"""Format-report: aggregate rule violations into markdown or JSON.""" + +import argparse +import json +import sys +from collections import Counter, defaultdict +from datetime import datetime, timezone +from pathlib import Path + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--truth", required=True) + ap.add_argument("--violations", required=True) + ap.add_argument("--format", choices=["markdown", "json"], default="markdown") + ap.add_argument("--target", default="") + ap.add_argument("--rules", default="core") + args = ap.parse_args() + + truth = json.loads(Path(args.truth).read_text()) + violations = json.loads(Path(args.violations).read_text()) + + if args.format == "json": + json.dump( + {"violations": violations, "truth_meta": truth.get("_meta", {})}, + sys.stdout, + indent=2, + ) + return + + # Markdown report + timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + + # Count files scanned (unique file paths in violations + targets) + files_with_violations = {v["file"] for v in violations} + + counts = Counter(v["severity"] for v in violations) + rule_counts: dict = defaultdict(lambda: {"violations": 0}) + for v in violations: + rule_counts[v["rule_id"]]["violations"] += 1 + + # Group by severity + critical = [v for v in violations if v["severity"] == "CRITICAL"] + warnings = [v for v in violations if v["severity"] == "WARNING"] + + out = [] + out.append("## Schema Doc Validation Report") + out.append( + f"Timestamp: {timestamp} | Target: `{args.target}` | " + f"Rules: `{args.rules}`" + ) + out.append("") + out.append("### Summary") + out.append( + f"Files with violations: {len(files_with_violations)} | " + f"CRITICAL: {counts.get('CRITICAL', 0)} | " + f"WARNING: {counts.get('WARNING', 0)}" + ) + out.append("") + + if critical: + out.append("### CRITICAL (operator queries will fail at runtime)") + for v in critical: + out.append( + f"- `{v['file']}:{v['line']}` [{v['rule_id']}]" + ) + out.append(f" {v['message']}") + out.append(f" → Remediation: {v['remediation']}") + out.append("") + + if warnings: + out.append("### WARNING (stale references / non-blocking)") + for v in warnings: + out.append( + f"- `{v['file']}:{v['line']}` [{v['rule_id']}]" + ) + out.append(f" {v['message']}") + out.append(f" → Remediation: {v['remediation']}") + out.append("") + + if not violations: + out.append("### ✓ No violations detected") + out.append("") + out.append("All schema/metric/alert/endpoint/column references in scanned") + out.append("files resolve against current code-side truth.") + out.append("") + + out.append("### Rule Coverage") + out.append("| Rule | Violations |") + out.append("|---|---|") + for rule in sorted(rule_counts.keys()): + out.append(f"| {rule} | {rule_counts[rule]['violations']} |") + out.append("") + + out.append("### Truth Sources") + out.append("- Tables: " + str(len(truth["tables"]))) + out.append("- Metrics: " + str(len(truth["metrics"]))) + out.append("- Alerts: " + str(len(truth["alerts"]))) + out.append("- Endpoints: " + str(len(truth["endpoints"]))) + out.append("") + + print("\n".join(out)) + + +if __name__ == "__main__": + main() diff --git a/.claude/skills/schema-doc-validator/scripts/rules/01-frontmatter.py b/.claude/skills/schema-doc-validator/scripts/rules/01-frontmatter.py new file mode 100755 index 000000000..bbcb61ea2 --- /dev/null +++ b/.claude/skills/schema-doc-validator/scripts/rules/01-frontmatter.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python3 +"""Rule 01: SKILL.md frontmatter shape validation. + +Checks: YAML frontmatter delimited by `---`, contains `name` (kebab-case) + and `description` (≥30 chars). +""" + +import json +import re +import sys +from pathlib import Path + + +def validate_file(path: Path): + violations = [] + text = path.read_text() + if not text.startswith("---\n"): + violations.append( + { + "rule_id": "01-frontmatter", + "severity": "CRITICAL", + "file": str(path), + "line": 1, + "message": "SKILL.md missing frontmatter open delimiter (---)", + "remediation": "Add `---` as the first line", + } + ) + return violations + + end = text.find("\n---\n", 4) + if end < 0: + violations.append( + { + "rule_id": "01-frontmatter", + "severity": "CRITICAL", + "file": str(path), + "line": 1, + "message": "SKILL.md missing frontmatter close delimiter", + "remediation": "Add `---` after the frontmatter block", + } + ) + return violations + + fm = text[4:end] + + # name field + name_match = re.search(r"^name:\s*(\S+)", fm, re.MULTILINE) + if not name_match: + violations.append( + { + "rule_id": "01-frontmatter", + "severity": "CRITICAL", + "file": str(path), + "line": 2, + "message": "Frontmatter missing required `name:` field", + "remediation": "Add `name: ` to frontmatter", + } + ) + else: + name = name_match.group(1).strip() + if not re.match(r"^[a-z][a-z0-9-]*[a-z0-9]$", name): + violations.append( + { + "rule_id": "01-frontmatter", + "severity": "WARNING", + "file": str(path), + "line": 2, + "message": ( + f"Frontmatter name '{name}' is not kebab-case " + "(lowercase letters/digits/hyphens only)" + ), + "remediation": ( + f"Rename to kebab-case (e.g., '{re.sub(r'[^a-z0-9]+', '-', name.lower())}')" + ), + } + ) + + # description field + desc_match = re.search(r"description:\s*(?:>-?\s*\n((?:^[ \t]+.+\n?)+)|(.+))", fm, re.MULTILINE) + if not desc_match: + violations.append( + { + "rule_id": "01-frontmatter", + "severity": "CRITICAL", + "file": str(path), + "line": 3, + "message": "Frontmatter missing required `description:` field", + "remediation": "Add `description: ...` (≥30 chars, list trigger phrases)", + } + ) + else: + desc = (desc_match.group(1) or desc_match.group(2) or "").strip() + # collapse whitespace + desc_compact = re.sub(r"\s+", " ", desc) + if len(desc_compact) < 30: + violations.append( + { + "rule_id": "01-frontmatter", + "severity": "CRITICAL", + "file": str(path), + "line": 3, + "message": ( + f"Frontmatter description too short ({len(desc_compact)} chars; " + "minimum 30)" + ), + "remediation": "Expand description to at least 30 chars; list trigger phrases", + } + ) + + return violations + + +def main(): + if len(sys.argv) < 3: + print("Usage: 01-frontmatter.py [file2 ...]", file=sys.stderr) + sys.exit(2) + + # truth.json not used by this rule; included for uniform interface + files = [Path(p) for p in sys.argv[2:]] + + all_violations = [] + for path in files: + if path.name == "SKILL.md": + all_violations.extend(validate_file(path)) + + print(json.dumps(all_violations, indent=2)) + + +if __name__ == "__main__": + main() diff --git a/.claude/skills/schema-doc-validator/scripts/rules/02-metric-refs.py b/.claude/skills/schema-doc-validator/scripts/rules/02-metric-refs.py new file mode 100755 index 000000000..7f7cbd932 --- /dev/null +++ b/.claude/skills/schema-doc-validator/scripts/rules/02-metric-refs.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 +"""Rule 02: Validate every claude_* metric reference exists in sdkMetrics.js. + +Scans skill markdown for tokens matching the metric naming convention +(claude_*_total, _state, _ms, _bucket, _sessions) and verifies each +exists in the truth set. +""" + +import json +import re +import sys +from pathlib import Path + +# Metrics referenced as code (`claude_foo_total`) — captured even inside backticks +METRIC_RE = re.compile( + r"\bclaude_[a-z_][a-z_0-9]*?(?:_total|_state|_ms|_bucket|_sessions)\b" +) + + +def validate_file(path: Path, real_metrics: set): + """Validate metric refs. Note: histogram base name '_ms' implies + Prometheus-derived '_ms_bucket' / '_count' / '_sum' suffixes are valid.""" + violations = [] + # Histogram bucket suffix: any _ms metric also has _ms_bucket / _ms_count / _ms_sum + derived = set() + for m in real_metrics: + if m.endswith("_ms"): + derived.add(m + "_bucket") + derived.add(m + "_count") + derived.add(m + "_sum") + valid_metrics = real_metrics | derived + + for lineno, line in enumerate(path.read_text().splitlines(), start=1): + if "noqa:02" in line: + continue + # Skip frontmatter and code blocks unrelated to metrics + for token in METRIC_RE.findall(line): + if token not in valid_metrics: + violations.append( + { + "rule_id": "02-metric-refs", + "severity": "CRITICAL", + "file": str(path), + "line": lineno, + "message": ( + f"Metric '{token}' referenced but not defined in sdkMetrics.js" + ), + "remediation": ( + "Verify metric name. If renamed, update reference; " + "if removed, drop the reference" + ), + } + ) + return violations + + +def main(): + if len(sys.argv) < 3: + print("Usage: 02-metric-refs.py [file2 ...]", file=sys.stderr) + sys.exit(2) + + truth = json.loads(Path(sys.argv[1]).read_text()) + real_metrics = set(truth["metrics"]) + files = [Path(p) for p in sys.argv[2:]] + + all_violations = [] + for path in files: + all_violations.extend(validate_file(path, real_metrics)) + + print(json.dumps(all_violations, indent=2)) + + +if __name__ == "__main__": + main() diff --git a/.claude/skills/schema-doc-validator/scripts/rules/03-alert-refs.py b/.claude/skills/schema-doc-validator/scripts/rules/03-alert-refs.py new file mode 100755 index 000000000..bb6ab8e2f --- /dev/null +++ b/.claude/skills/schema-doc-validator/scripts/rules/03-alert-refs.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 +"""Rule 03: Validate every alert name reference exists in alerts.yml or alertingRules.js. + +Alert names follow a specific naming pattern (PascalCase, often suffixed with +descriptive terms). Scans for backticked alert names and validates against truth. +""" + +import json +import re +import sys +from pathlib import Path + +# Alert names: backticked PascalCase tokens that look like alert names. +# Matches things like `ClaudeToolErrorRateHigh`, `HookCircuitBreakerOpen`, +# `ReconciliationKgBacklog`. Avoids false positives by requiring backtick OR +# that the token ends in a recognizable alert-suffix. +ALERT_RE = re.compile( + r"`((?:Claude|Hook|Reconciliation|StructuredOutput|CircuitBreaker|RateLimit|" + r"Tool|API)[A-Za-z]+)`" +) + + +def validate_file(path: Path, real_alerts: set): + violations = [] + for lineno, line in enumerate(path.read_text().splitlines(), start=1): + for m in ALERT_RE.finditer(line): + token = m.group(1) + # Heuristic: alert names end in descriptive words (e.g., High, Open, Backlog) + # Skip common false positives + if token in {"ClaudeAPI", "HookEvent", "ToolUse", "RateLimit"}: + continue + if token not in real_alerts: + # Check if it's an obvious false positive (e.g., a class name) + # The truth set has 13 alerts; if the token has the right shape but + # isn't there, it's probably a stale reference + if re.match( + r"^(Claude|Hook|Reconciliation|StructuredOutput|CircuitBreaker)" + r"[A-Z][a-zA-Z]+$", + token, + ): + violations.append( + { + "rule_id": "03-alert-refs", + "severity": "CRITICAL", + "file": str(path), + "line": lineno, + "message": ( + f"Alert '{token}' referenced but not defined in " + "alerts.yml or alertingRules.js" + ), + "remediation": ( + "Verify alert name. Real alerts: " + + ", ".join(sorted(real_alerts)[:5]) + + "..." + ), + } + ) + return violations + + +def main(): + if len(sys.argv) < 3: + print("Usage: 03-alert-refs.py [file2 ...]", file=sys.stderr) + sys.exit(2) + + truth = json.loads(Path(sys.argv[1]).read_text()) + real_alerts = set(truth["alerts"]) + files = [Path(p) for p in sys.argv[2:]] + + all_violations = [] + for path in files: + all_violations.extend(validate_file(path, real_alerts)) + + print(json.dumps(all_violations, indent=2)) + + +if __name__ == "__main__": + main() diff --git a/.claude/skills/schema-doc-validator/scripts/rules/04-table-refs.py b/.claude/skills/schema-doc-validator/scripts/rules/04-table-refs.py new file mode 100755 index 000000000..754557e2e --- /dev/null +++ b/.claude/skills/schema-doc-validator/scripts/rules/04-table-refs.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 +"""Rule 04: Validate every table-name reference exists in postgres.js. + +Scans skill markdown for likely table references (snake_case identifiers +with characteristic suffixes like _events, _log, _links, _executions). +""" + +import json +import re +import sys +from pathlib import Path + +# Table-like tokens: backticked snake_case with recognizable table suffixes +TABLE_REFS_RE = re.compile( + r"`([a-z][a-z0-9_]+(?:_log|_events|_links|_executions|_inputs|_writes|" + r"_interventions|_mappings|_embeddings|_states|_provenance|_attempts|_audit|" + r"_lock))`" +) + +# Also match bare references in SQL FROM clauses +FROM_CLAUSE_RE = re.compile(r"\bFROM\s+([a-z][a-z0-9_]+)", re.IGNORECASE) + + +def validate_file(path: Path, real_tables: set): + violations: list = [] + text = path.read_text() + seen: set = set() # dedupe per-file to avoid noise + + for lineno, line in enumerate(text.splitlines(), start=1): + candidates = set() + candidates.update(TABLE_REFS_RE.findall(line)) + + # Match SQL FROM + for m in FROM_CLAUSE_RE.finditer(line): + tok = m.group(1) + if "_" in tok or tok in real_tables: + candidates.add(tok) + + for token in candidates: + key = (token, lineno) + if key in seen: + continue + seen.add(key) + # Skip obvious non-tables + if token.startswith(("idx_", "fk_", "pk_")): + continue + if token in real_tables: + continue + # Some tokens look like tables but aren't (e.g., function/var names); + # only flag tokens that match a stricter table-name shape + if re.match(r"^[a-z]+(_[a-z]+){1,3}$", token): + violations.append( + { + "rule_id": "04-table-refs", + "severity": "CRITICAL", + "file": str(path), + "line": lineno, + "message": ( + f"Table '{token}' referenced but not defined in postgres.js" + ), + "remediation": ( + "Verify table name; if renamed, update reference" + ), + } + ) + return violations + + +def main(): + if len(sys.argv) < 3: + print("Usage: 04-table-refs.py [file2 ...]", file=sys.stderr) + sys.exit(2) + + truth = json.loads(Path(sys.argv[1]).read_text()) + real_tables = set(truth["tables"].keys()) + files = [Path(p) for p in sys.argv[2:]] + + all_violations = [] + for path in files: + all_violations.extend(validate_file(path, real_tables)) + + print(json.dumps(all_violations, indent=2)) + + +if __name__ == "__main__": + main() diff --git a/.claude/skills/schema-doc-validator/scripts/rules/05-column-refs.py b/.claude/skills/schema-doc-validator/scripts/rules/05-column-refs.py new file mode 100755 index 000000000..2b4e7ba03 --- /dev/null +++ b/.claude/skills/schema-doc-validator/scripts/rules/05-column-refs.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 +"""Rule 05: Validate every `table.column` reference resolves against schema. + +This is the rule that catches the python_code_hash / match_method / +confidence_score / kg_error class of bug — operator SQL queries that +reference columns that don't exist in the production schema. +""" + +import json +import re +import sys +from pathlib import Path + +# Match `table.column` references — both backticked and SQL-bare +DOT_REF_RE = re.compile( + r"(?` opt-out marker for documented examples + if "noqa:05" in line: + continue + # Skip JSON paths like `event_data->'bridge_metadata'->>'git_sha'` + # and JS object property accesses like `result.foo` + # by requiring the LHS to be a known table OR a recognized alias + + for m in DOT_REF_RE.finditer(line): + lhs, col = m.group(1), m.group(2) + + # Treat lhs as a real table name + if lhs in tables: + if col not in tables[lhs]: + # Try to suggest similar column name + similar = [ + c + for c in tables[lhs] + if (col[:4] == c[:4]) or (col in c) or (c in col) + ] + sugg = ( + f"Did you mean '{similar[0]}'?" + if similar + else f"Real cols: {sorted(tables[lhs])[:5]}..." + ) + violations.append( + { + "rule_id": "05-column-refs", + "severity": "CRITICAL", + "file": str(path), + "line": lineno, + "message": ( + f"Column '{lhs}.{col}' does not exist in " + f"table '{lhs}'" + ), + "remediation": ( + f"Verify column name. {sugg}" + ), + } + ) + # else: lhs is an alias or non-table; skip (we'll catch alias + # mismatches in Rule 07's SQL parser) + + return violations + + +def main(): + if len(sys.argv) < 3: + print("Usage: 05-column-refs.py [file2 ...]", file=sys.stderr) + sys.exit(2) + + truth = json.loads(Path(sys.argv[1]).read_text()) + tables = truth["tables"] + files = [Path(p) for p in sys.argv[2:]] + + all_violations = [] + for path in files: + all_violations.extend(validate_file(path, tables)) + + print(json.dumps(all_violations, indent=2)) + + +if __name__ == "__main__": + main() diff --git a/.claude/skills/schema-doc-validator/scripts/rules/06-endpoint-refs.py b/.claude/skills/schema-doc-validator/scripts/rules/06-endpoint-refs.py new file mode 100755 index 000000000..38ce77f17 --- /dev/null +++ b/.claude/skills/schema-doc-validator/scripts/rules/06-endpoint-refs.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 +"""Rule 06: Validate every /api/... reference exists in router files. + +Severity: WARNING. Endpoint name drift is less critical than schema drift +since most endpoint references are operational documentation, not +copy-paste-into-curl commands. +""" + +import json +import re +import sys +from pathlib import Path + +ENDPOINT_RE = re.compile(r"`((?:GET|POST|PUT|DELETE)?\s*(/api/[^`\s]+))`") + + +def normalize_path(path: str) -> str: + """Strip method prefix and trailing punctuation.""" + path = re.sub(r"^(GET|POST|PUT|DELETE)\s+", "", path).strip() + path = path.rstrip(".,;:?") + return path + + +def matches_pattern(probe: str, real: str) -> bool: + """Check if probe matches a real router pattern (handling :params).""" + if probe == real: + return True + # Replace :param with regex placeholder in real, match probe + pat = re.sub(r":\w+", r"[^/]+", real) + pat = "^" + pat.rstrip("/") + "/?$" + return bool(re.match(pat, probe.rstrip("/"))) + + +def validate_file(path: Path, real_endpoints: list): + violations: list = [] + text = path.read_text() + + for lineno, line in enumerate(text.splitlines(), start=1): + for m in ENDPOINT_RE.finditer(line): + probe = normalize_path(m.group(2)) + # Replace literal IDs like `` with :sessionKey in probe + probe_normalized = re.sub(r"<[^>]+>", ":x", probe) + + # Match against real endpoints (handle :sessionKey vs :sessionId etc) + real_normalized = [re.sub(r":\w+", ":x", r) for r in real_endpoints] + if probe_normalized in real_normalized: + continue + + # Also try matching by pattern + matched = any( + matches_pattern(probe, real) for real in real_endpoints + ) + if not matched: + violations.append( + { + "rule_id": "06-endpoint-refs", + "severity": "WARNING", + "file": str(path), + "line": lineno, + "message": ( + f"Endpoint '{probe}' not found in router files" + ), + "remediation": ( + "Verify endpoint path. May be a frontend-internal " + "endpoint not documented; or stale ref." + ), + } + ) + + return violations + + +def main(): + if len(sys.argv) < 3: + print("Usage: 06-endpoint-refs.py [file2 ...]", file=sys.stderr) + sys.exit(2) + + truth = json.loads(Path(sys.argv[1]).read_text()) + real_endpoints = truth["endpoints"] + files = [Path(p) for p in sys.argv[2:]] + + all_violations = [] + for path in files: + all_violations.extend(validate_file(path, real_endpoints)) + + print(json.dumps(all_violations, indent=2)) + + +if __name__ == "__main__": + main() diff --git a/.claude/skills/schema-doc-validator/scripts/rules/07-sql-syntax.py b/.claude/skills/schema-doc-validator/scripts/rules/07-sql-syntax.py new file mode 100755 index 000000000..6e4ae22b6 --- /dev/null +++ b/.claude/skills/schema-doc-validator/scripts/rules/07-sql-syntax.py @@ -0,0 +1,322 @@ +#!/usr/bin/env python3 +"""Rule 07: SQL syntax check — regex grammar + read-only enforcement + column resolution. + +Validates SQL fenced blocks (` ```sql ... ``` `) in skill markdown for: + 1. Balanced parentheses / brackets / single-quotes + 2. Read-only — no INSERT, UPDATE, DELETE, DROP, ALTER, TRUNCATE, GRANT, REVOKE + 3. Starts with SELECT / WITH / EXPLAIN / -- comment + 4. Every `table.column` reference (where table is a known table) resolves + 5. Every `alias.column` reference resolves via FROM/JOIN aliases in the same query +""" + +import json +import re +import sys +from pathlib import Path + +SQL_BLOCK_RE = re.compile(r"```sql\n(.*?)```", re.DOTALL) + +DML_BANNED = re.compile( + r"\b(INSERT|UPDATE|DELETE|DROP|ALTER|TRUNCATE|GRANT|REVOKE|CREATE)\b", + re.IGNORECASE, +) + +# Find FROM/JOIN ... AS alias OR FROM table alias +ALIAS_RE = re.compile( + r"\b(?:FROM|JOIN)\s+([a-z][a-z0-9_]+)(?:\s+(?:AS\s+)?([a-z][a-z0-9_]*))?", + re.IGNORECASE, +) + +# alias.column or table.column +DOT_RE = re.compile(r"(? str | None: + """Return error message if any delimiter is unbalanced.""" + counts = {"(": 0, "[": 0} + pairs = {"(": ")", "[": "]"} + in_quote = False + quote_char = None + i = 0 + while i < len(sql): + c = sql[i] + if in_quote: + if c == quote_char: + # check for escape (e.g., '' inside string) + if i + 1 < len(sql) and sql[i + 1] == quote_char: + i += 2 + continue + in_quote = False + quote_char = None + elif c in "'\"": + in_quote = True + quote_char = c + elif c == "-" and i + 1 < len(sql) and sql[i + 1] == "-": + # comment to end of line + i = sql.find("\n", i) + if i < 0: + break + elif c in "([": + counts[c] += 1 + elif c == ")": + counts["("] -= 1 + if counts["("] < 0: + return "Unbalanced parenthesis: extra ')'" + elif c == "]": + counts["["] -= 1 + if counts["["] < 0: + return "Unbalanced bracket: extra ']'" + i += 1 + + if in_quote: + return f"Unbalanced quote: missing closing {quote_char!r}" + if counts["("] != 0: + return f"Unbalanced parens: {counts['(']} unclosed '('" + if counts["["] != 0: + return f"Unbalanced brackets: {counts['[']} unclosed '['" + return None + + +def find_aliases(sql: str) -> dict: + """Return {alias_or_table: real_table_name}.""" + aliases = {} + for m in ALIAS_RE.finditer(sql): + table = m.group(1) + alias = m.group(2) if m.group(2) and m.group(2).upper() not in {"WHERE", "ON", "GROUP", "ORDER", "LIMIT", "HAVING", "LEFT", "RIGHT", "INNER", "OUTER", "FULL", "JOIN"} else None + aliases[table] = table # table itself is its own alias + if alias: + aliases[alias] = table + return aliases + + +def validate_sql_block(sql: str, file: str, base_lineno: int, tables: dict) -> list: + violations = [] + sql_stripped = sql.strip() + + # Read-only check + # Strip comments first to avoid false positives like "-- DELETE old rows" + sql_no_comments = re.sub(r"--[^\n]*", "", sql_stripped) + dml_match = DML_BANNED.search(sql_no_comments) + if dml_match: + violations.append( + { + "rule_id": "07-sql-syntax", + "severity": "CRITICAL", + "file": file, + "line": base_lineno, + "message": ( + f"DML/DDL keyword '{dml_match.group(1).upper()}' " + "in operator skill SQL. Skill docs must be read-only." + ), + "remediation": ( + "Remove the DML/DDL statement. Operator skills should " + "only contain SELECT/WITH/EXPLAIN queries." + ), + } + ) + + # Balanced delimiters + err = check_balanced(sql) + if err: + violations.append( + { + "rule_id": "07-sql-syntax", + "severity": "CRITICAL", + "file": file, + "line": base_lineno, + "message": f"SQL block has unbalanced delimiters: {err}", + "remediation": "Fix the unbalanced delimiter", + } + ) + + # Starts with SELECT/WITH/EXPLAIN (skip comments) + sql_first_kw = re.search( + r"\b(SELECT|WITH|EXPLAIN)\b", sql_no_comments, re.IGNORECASE + ) + if not sql_first_kw and sql_no_comments.strip(): + violations.append( + { + "rule_id": "07-sql-syntax", + "severity": "CRITICAL", + "file": file, + "line": base_lineno, + "message": ( + "SQL block does not start with SELECT/WITH/EXPLAIN" + ), + "remediation": ( + "Operator skill queries should be read-only " + "(SELECT/WITH/EXPLAIN only)" + ), + } + ) + + # Column resolution: find aliases, then check every alias.column reference + aliases = find_aliases(sql) + + # Build the set of valid columns from all FROM tables + valid_bare_columns: set = set() + for table_name in set(aliases.values()): + if table_name in tables: + valid_bare_columns.update(tables[table_name]) + + # Add SELECT-clause aliases (`expr AS alias_name`) to valid set — + # these are output column names, not source columns + for alias_match in re.finditer( + r"\bAS\s+([a-z][a-z0-9_]+)\b", sql, re.IGNORECASE + ): + valid_bare_columns.add(alias_match.group(1)) + + # Add JSONB key references as valid (treat them as opaque) + # event_data->>'git_sha' — 'git_sha' is JSON path, not a column + for jsonkey_match in re.finditer(r"->>?\s*'([a-z_][a-z0-9_]*)'", sql): + valid_bare_columns.add(jsonkey_match.group(1)) + + # Bare-column check: scan SELECT and WHERE clauses for unqualified + # identifiers that don't resolve in any FROM table. We find tokens that + # look like column names (snake_case lowercase) and aren't SQL keywords + # or aliases or string literals. + SQL_KEYWORDS = { + "SELECT", "FROM", "WHERE", "AND", "OR", "NOT", "NULL", "TRUE", "FALSE", + "CASE", "WHEN", "THEN", "ELSE", "END", "AS", "ON", "JOIN", "LEFT", + "RIGHT", "INNER", "OUTER", "FULL", "GROUP", "ORDER", "BY", "HAVING", + "LIMIT", "OFFSET", "ASC", "DESC", "DISTINCT", "ALL", "IN", "EXISTS", + "BETWEEN", "LIKE", "ILIKE", "IS", "INTERVAL", "NOW", "COUNT", "SUM", + "AVG", "MIN", "MAX", "COALESCE", "WITH", "EXPLAIN", "UNION", "EXCEPT", + "INTERSECT", "OVER", "PARTITION", "ROW_NUMBER", "RANK", "FILTER", + "ARRAY", "JSON_AGG", "JSON_BUILD_OBJECT", "TO_CHAR", "EXTRACT", "EPOCH", + "AT", "ZONE", "DATE", "TIMESTAMP", "VARCHAR", "TEXT", "INTEGER", "BIGINT", + "SMALLINT", "BOOLEAN", "JSONB", "NUMERIC", "DECIMAL", "FLOAT", "REAL", + "DOUBLE", "PRECISION", + } + + # Tokenize lightweight: find bare lowercase_snake_case tokens + # outside of string literals and JSON path operators + sql_for_tokens = sql + # remove string literals to avoid scanning their content + sql_for_tokens = re.sub(r"'[^']*'", "''", sql_for_tokens) + # remove JSONB ->>'key' / ->'key' patterns (we don't validate JSON paths) + sql_for_tokens = re.sub(r"->>?\s*'[^']*'", "", sql_for_tokens) + # remove qualified column refs (already handled by DOT_RE check below) + sql_for_tokens = re.sub(r"[a-z_][a-z0-9_]*\.[a-z_][a-z0-9_]*", "", sql_for_tokens) + + if valid_bare_columns: + for tok_match in re.finditer(r"\b([a-z][a-z0-9_]+)\b", sql_for_tokens): + tok = tok_match.group(1) + tok_upper = tok.upper() + if tok_upper in SQL_KEYWORDS: + continue + if tok in aliases: # it's a known alias, not a column + continue + if tok in valid_bare_columns: + continue + # also tolerate common postgres types and operators + if tok in {"id", "true", "false", "null"}: + continue + # if the token looks like a column name (has _ or is a known col + # in some other table), flag as potentially-stale + if "_" in tok and len(tok) > 4: + # check if this token exists in ANY table — if so, it's likely + # a real column from a different table (false positive); flag + # as WARNING. If it exists in NO table, it's CRITICAL. + exists_anywhere = any(tok in cols for cols in tables.values()) + severity = "WARNING" if exists_anywhere else "CRITICAL" + violations.append( + { + "rule_id": "07-sql-syntax", + "severity": severity, + "file": file, + "line": base_lineno, + "message": ( + f"SQL: bare column '{tok}' does not resolve in " + f"FROM tables {sorted(set(aliases.values()))}" + ), + "remediation": ( + f"Verify column. Real cols across FROM tables: " + f"{sorted(valid_bare_columns)[:8]}..." + ), + } + ) + + for m in DOT_RE.finditer(sql): + lhs, col = m.group(1), m.group(2) + # Skip JSON path operators (preceded by ->/->>) + # Skip object property access in non-SQL contexts (already in SQL block) + # Skip if lhs is a SQL keyword + if lhs.upper() in {"WHERE", "FROM", "JOIN", "ON", "AS", "AND", "OR", "GROUP", "ORDER", "BY", "HAVING", "LIMIT", "ASC", "DESC", "INTERVAL", "NOW"}: + continue + # Skip if it looks like a JSONB path traversal + if "->" in sql[max(0, m.start() - 4) : m.start()]: + continue + + # Check if lhs is a known alias or table + if lhs in aliases: + real_table = aliases[lhs] + if real_table in tables and col not in tables[real_table]: + similar = [ + c + for c in tables[real_table] + if (col[:4] == c[:4]) or (col in c) or (c in col) + ][:1] + sugg = ( + f" Did you mean '{similar[0]}'?" + if similar + else "" + ) + violations.append( + { + "rule_id": "07-sql-syntax", + "severity": "CRITICAL", + "file": file, + "line": base_lineno, + "message": ( + f"SQL: column '{lhs}.{col}' does not exist in " + f"table '{real_table}'.{sugg}" + ), + "remediation": ( + f"Use real column from {real_table}: " + f"{sorted(tables[real_table])[:5]}..." + ), + } + ) + + return violations + + +def validate_file(path: Path, tables: dict) -> list: + violations: list = [] + text = path.read_text() + # Track line numbers for sql blocks + for m in SQL_BLOCK_RE.finditer(text): + sql = m.group(1) + # Compute starting line number of this block in the file + prefix = text[: m.start()] + base_lineno = prefix.count("\n") + 2 # +2 for the ```sql line itself + + # Allow `` opt-out marker on the line(s) immediately + # before the ```sql block (for documented schema examples / DDL refs) + preceding = "\n".join(prefix.splitlines()[-5:]) + if "noqa:07" in preceding or "noqa:07" in sql: + continue + + violations.extend(validate_sql_block(sql, str(path), base_lineno, tables)) + return violations + + +def main(): + if len(sys.argv) < 3: + print("Usage: 07-sql-syntax.py [file2 ...]", file=sys.stderr) + sys.exit(2) + + truth = json.loads(Path(sys.argv[1]).read_text()) + tables = truth["tables"] + files = [Path(p) for p in sys.argv[2:]] + + all_violations = [] + for path in files: + all_violations.extend(validate_file(path, tables)) + + print(json.dumps(all_violations, indent=2)) + + +if __name__ == "__main__": + main() diff --git a/.claude/skills/schema-doc-validator/scripts/validate.sh b/.claude/skills/schema-doc-validator/scripts/validate.sh new file mode 100755 index 000000000..01bd94262 --- /dev/null +++ b/.claude/skills/schema-doc-validator/scripts/validate.sh @@ -0,0 +1,128 @@ +#!/usr/bin/env bash +# schema-doc-validator entry point. +# +# Usage: +# validate.sh # all .claude/skills/**/SKILL.md files +# validate.sh --target '.claude/**/*.md' # custom glob +# validate.sh --rules core|strict # rule set selection +# validate.sh --format json|markdown # output format + +set -uo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +RULES_DIR="$SCRIPT_DIR/rules" + +# ── Resolve repo root ────────────────────────────────────────────────────────── +cur="$SCRIPT_DIR" +while [ "$cur" != "/" ]; do + if [ -d "$cur/super-legal-mcp-refactored" ]; then + REPO_ROOT="$cur" + break + fi + cur="$(dirname "$cur")" +done +if [ -z "${REPO_ROOT:-}" ]; then + echo "ERROR: cannot locate repo root (no super-legal-mcp-refactored/ in any parent)" >&2 + exit 2 +fi + +cd "$REPO_ROOT" + +# ── Defaults ─────────────────────────────────────────────────────────────────── +TARGET=".claude/skills/*/SKILL.md .claude/skills/*/references/*.md" +RULES="core" +FORMAT="markdown" + +# ── Parse args ───────────────────────────────────────────────────────────────── +while [[ $# -gt 0 ]]; do + case "$1" in + --target) TARGET="$2"; shift 2 ;; + --rules) RULES="$2"; shift 2 ;; + --format) FORMAT="$2"; shift 2 ;; + *) echo "Unknown flag: $1" >&2; exit 2 ;; + esac +done + +# ── Pre-flight ───────────────────────────────────────────────────────────────── +command -v python3 >/dev/null || { echo "ERROR: python3 not found" >&2; exit 2; } +[ -f super-legal-mcp-refactored/src/db/postgres.js ] || { + echo "ERROR: super-legal-mcp-refactored/src/db/postgres.js not found" >&2 + exit 2 +} + +# ── Step 1: Extract truth ────────────────────────────────────────────────────── +TRUTH_FILE=$(mktemp -t schema-truth.XXXXXX.json) +trap 'rm -f "$TRUTH_FILE"' EXIT + +python3 "$SCRIPT_DIR/extract-truth.py" > "$TRUTH_FILE" || { + echo "ERROR: truth extraction failed" >&2 + exit 2 +} + +# ── Step 2: Resolve target files ────────────────────────────────────────────── +# Expand the glob into an array +# shellcheck disable=SC2206 +TARGET_FILES=( $TARGET ) +EXPANDED=() +for pat in "${TARGET_FILES[@]}"; do + for f in $pat; do + [ -f "$f" ] && EXPANDED+=( "$f" ) + done +done + +if [ ${#EXPANDED[@]} -eq 0 ]; then + echo "ERROR: no files matched target pattern: $TARGET" >&2 + exit 2 +fi + +# ── Step 3: Run rules ────────────────────────────────────────────────────────── +# Determine rule set +if [ "$RULES" = "strict" ]; then + RULE_SCRIPTS=( "$RULES_DIR"/*.py ) +else + # Core: all 7 rules; subset selection deferred to v7.0.x + RULE_SCRIPTS=( "$RULES_DIR"/*.py ) +fi + +VIOLATIONS_FILE=$(mktemp -t schema-violations.XXXXXX.json) +trap 'rm -f "$TRUTH_FILE" "$VIOLATIONS_FILE"' EXIT + +# Aggregate violations from all rules +echo "[]" > "$VIOLATIONS_FILE" +for rule_script in "${RULE_SCRIPTS[@]}"; do + [ -f "$rule_script" ] || continue + rule_name="$(basename "$rule_script" .py)" + # Each rule emits a JSON array of violations + rule_output=$(python3 "$rule_script" "$TRUTH_FILE" "${EXPANDED[@]}" 2>&1) || { + echo "WARN: rule $rule_name errored: $rule_output" >&2 + continue + } + + # Merge into accumulated violations + python3 -c " +import json, sys +existing = json.load(open(sys.argv[1])) +new = json.loads(sys.argv[2]) +existing.extend(new) +json.dump(existing, open(sys.argv[1], 'w')) +" "$VIOLATIONS_FILE" "$rule_output" +done + +# ── Step 4: Format report ───────────────────────────────────────────────────── +python3 "$SCRIPT_DIR/format-report.py" \ + --truth "$TRUTH_FILE" \ + --violations "$VIOLATIONS_FILE" \ + --format "$FORMAT" \ + --target "$TARGET" \ + --rules "$RULES" + +# ── Step 5: Exit code ───────────────────────────────────────────────────────── +# 0 = no CRITICAL violations +# 1 = CRITICAL violations present +HAS_CRITICAL=$(python3 -c " +import json +v = json.load(open('$VIOLATIONS_FILE')) +print('1' if any(x.get('severity') == 'CRITICAL' for x in v) else '0') +") + +exit "$HAS_CRITICAL" diff --git a/.claude/skills/user-management/SKILL.md b/.claude/skills/user-management/SKILL.md index 9054f5390..cd28749fc 100644 --- a/.claude/skills/user-management/SKILL.md +++ b/.claude/skills/user-management/SKILL.md @@ -140,6 +140,7 @@ This is an operational decision — never exposed via API. ## Database Schema + ```sql CREATE TABLE users ( id SERIAL PRIMARY KEY, From 392f873b8167e9dee28f8c2649140fd1ae450723 Mon Sep 17 00:00:00 2001 From: Number531 <120485065+Number531@users.noreply.github.com> Date: Thu, 7 May 2026 02:33:33 -0400 Subject: [PATCH 2/2] =?UTF-8?q?feat(skills):=20post-deploy-verify=20?= =?UTF-8?q?=E2=80=94=20tiered=20post-deployment=20verification?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds post-deploy-verify skill that codifies the manual verification dance run after every deploy. Three tiers: - Tier 1 (~2 min): /health 200, DB connectivity + latency, circuit breaker state, COMMIT_SHA wired through container env, all 7 v7.0.x flags propagated - Tier 2 (~5 min): § 8.4.X V1-V4 protocol — FMP tool invocations, code-execution model M46-M58 distribution, Cloud Trace SubagentStart filter URL, citation verifier accepts FMP, container env audit, bridge_metadata.git_sha probe - Tier 3 (~10 min): reconciliation backlog, claude_hook_persistence_failures_total since deploy, hook circuit breaker state from /metrics, memory baseline (±20% of references/baselines.json), OTel sampler propagation Read-only — never auto-rolls back. Operator decides remediation. Six SQL queries shipped under scripts/queries/ (V1-V4 and Tier 3 probes) intended to run via session-diagnostics or psql. Validated end-to-end against live container at 34.26.70.60:3001 — all 3 tiers run cleanly. Caught real signals: COMMIT_SHA = 'unknown' in current build (deploy.sh build-arg propagation gap), BCRYPT_ROUNDS not set in container env, OTEL_ENABLED missing from /health.feature_flags. Schema-doc-validator passes 0 CRITICAL on all post-deploy-verify references. Co-Authored-By: Claude Opus 4.7 (1M context) --- .claude/skills/post-deploy-verify/SKILL.md | 140 ++++++++++++++++++ .../references/baselines.json | 32 ++++ .../references/failure-patterns.md | 127 ++++++++++++++++ .../references/tier1-checks.md | 42 ++++++ .../references/tier2-checks.md | 28 ++++ .../references/tier3-checks.md | 31 ++++ .../scripts/format-report.py | 114 ++++++++++++++ .../queries/t3-bridge-metadata-git-sha.sql | 13 ++ .../queries/t3-hook-persistence-failures.sql | 15 ++ .../queries/t3-reconciliation-status.sql | 13 ++ .../queries/t3-transcript-event-rate.sql | 14 ++ .../queries/v1-fmp-tool-invocations.sql | 10 ++ .../queries/v2-code-execution-models.sql | 15 ++ .../scripts/verify-tier1.sh | 137 +++++++++++++++++ .../scripts/verify-tier2.sh | 99 +++++++++++++ .../scripts/verify-tier3.sh | 118 +++++++++++++++ .../post-deploy-verify/scripts/verify.sh | 117 +++++++++++++++ 17 files changed, 1065 insertions(+) create mode 100644 .claude/skills/post-deploy-verify/SKILL.md create mode 100644 .claude/skills/post-deploy-verify/references/baselines.json create mode 100644 .claude/skills/post-deploy-verify/references/failure-patterns.md create mode 100644 .claude/skills/post-deploy-verify/references/tier1-checks.md create mode 100644 .claude/skills/post-deploy-verify/references/tier2-checks.md create mode 100644 .claude/skills/post-deploy-verify/references/tier3-checks.md create mode 100644 .claude/skills/post-deploy-verify/scripts/format-report.py create mode 100644 .claude/skills/post-deploy-verify/scripts/queries/t3-bridge-metadata-git-sha.sql create mode 100644 .claude/skills/post-deploy-verify/scripts/queries/t3-hook-persistence-failures.sql create mode 100644 .claude/skills/post-deploy-verify/scripts/queries/t3-reconciliation-status.sql create mode 100644 .claude/skills/post-deploy-verify/scripts/queries/t3-transcript-event-rate.sql create mode 100644 .claude/skills/post-deploy-verify/scripts/queries/v1-fmp-tool-invocations.sql create mode 100644 .claude/skills/post-deploy-verify/scripts/queries/v2-code-execution-models.sql create mode 100755 .claude/skills/post-deploy-verify/scripts/verify-tier1.sh create mode 100755 .claude/skills/post-deploy-verify/scripts/verify-tier2.sh create mode 100755 .claude/skills/post-deploy-verify/scripts/verify-tier3.sh create mode 100755 .claude/skills/post-deploy-verify/scripts/verify.sh diff --git a/.claude/skills/post-deploy-verify/SKILL.md b/.claude/skills/post-deploy-verify/SKILL.md new file mode 100644 index 000000000..de18e5874 --- /dev/null +++ b/.claude/skills/post-deploy-verify/SKILL.md @@ -0,0 +1,140 @@ +--- +name: post-deploy-verify +description: > + Run a tiered post-deployment verification checklist after every deploy completes. + Tier 1 (Critical, ~2 min): /health 200, DB connectivity, COMMIT_SHA wired, flags + propagated. Tier 2 (Integration, ~5 min): § 8.4.X V1-V4 protocol — FMP tools, + M46-M58 code-execution models, Cloud Trace SubagentStart, citation verifier, + container env audit, bridge_metadata.git_sha. Tier 3 (Deep, ~10 min): metrics + baseline, reconciliation status, circuit breakers, Prometheus alerts, OTel + sampler. Read-only — never auto-rolls back. Triggers: "verify deploy", + "post-deploy check", "verify staging", "§ 8.4.X verification", "deploy validation", + "/post-deploy-verify". Supports: --tier 1|2|3|all, --url . +--- + +# Post-Deploy Verify + +## Workflow + +Execute `scripts/verify.sh` from the skill directory (or invoke `/post-deploy-verify`): + +```bash +bash /Users/ej/Super-Legal/.claude/skills/post-deploy-verify/scripts/verify.sh +bash /Users/ej/Super-Legal/.claude/skills/post-deploy-verify/scripts/verify.sh --tier 1 +bash /Users/ej/Super-Legal/.claude/skills/post-deploy-verify/scripts/verify.sh --tier 2 +bash /Users/ej/Super-Legal/.claude/skills/post-deploy-verify/scripts/verify.sh --tier 3 --url http://34.26.70.60:3001 +``` + +The script: +1. Resolves base URL (--url arg → `super-legal-mcp-refactored/scripts/.staging-ip` → `http://localhost:3001`) +2. Pre-flight: validates `which curl`, `which python3`, `which gcloud`, `which jq` +3. Dispatches to selected tier(s); each tier outputs JSON +4. `format-report.py` aggregates → markdown report +5. Exit code: 0 PASSED, 1 WARNING, 2 FAILED + +## Tier 1 — Critical (~2 min) + +| Check | Source | Pass criteria | +|---|---|---| +| `GET /health` returns 200 | curl | HTTP 200 in ≤10s | +| `dependencies.database.status` = `ok` | `/health` JSON | DB connectivity | +| `dependencies.database.latency_ms` < 50 | `/health` JSON | Pool healthy | +| `dependencies.circuit_breaker.state` = `CLOSED` | `/health` JSON | Anthropic API healthy | +| Container env has `COMMIT_SHA` set | `gcloud compute ssh + docker exec env` | Reproducibility wired | +| `COMMIT_SHA` matches `git rev-parse HEAD` | shell | No drift | +| `feature_flags` block matches `flags.env` | `/health.feature_flags` vs `flags.env` parse | Flags propagated | + +Fails immediately on any CRITICAL — operator should investigate before proceeding. + +## Tier 2 — § 8.4.X V1-V4 + Container Env Audit (~5 min) + +Embeds the verification protocol from `super-legal-mcp-refactored/docs/pending-updates/equity-analyst-update.md` § 8.4.X. + +| Check | Pass criteria | +|---|---| +| **V1**: FMP tool invocations in last 5 min | When `FMP_ENABLED=true` + memo run: ≥1 row in `hook_audit_log` with `tool_name LIKE 'mcp__equities__%'`. Otherwise: WARNING "no recent invocations" | +| **V2**: Code-execution models M46–M58 | When FMP active + memo run: ≥1 row in `code_executions` with `model_id IN ('M46',...,'M58')`. Otherwise: WARNING | +| **V3**: Cloud Trace SubagentStart | When FMP active + memo run + sampler ≥0.1: ≥1 span in last hour. (Manual check via Cloud Console; skill prints filter URL) | +| **V4**: Citation verifier accepts FMP | Latest memo's `qa-outputs/citation-verification-certificate.md` shows FMP URLs as `CONFIRMED`. (Manual file inspection; skill prints query path) | +| **`bridge_metadata.git_sha` not 'unknown'** | All recent code-execution audit rows have real git SHA | +| **Container env audit** | `OTEL_TRACES_SAMPLER`, `OTEL_TRACES_SAMPLER_ARG`, `FMP_ENABLED`, `COMMIT_SHA`, `BCRYPT_ROUNDS` all present in container env | + +## Tier 3 — Metrics + Reconciliation + Trace (~10 min) + +| Check | Source | Pass criteria | +|---|---|---| +| Reconciliation enabled, 0 backlog | `/health.reconciliation` | `enabled=true`, `pending_kg=0`, `pending_artifacts=0`, `stuck_kg=0` | +| No new `claude_hook_persistence_failures_total` since deploy | `/metrics` | Counter delta = 0 since deploy | +| All circuit breakers CLOSED | `/metrics` `claude_hook_circuit_breaker_state` | All values = 0 | +| Memory baseline | `/health.performance` or `.memory` | RSS within ±20% of `references/baselines.json` | +| Prometheus alerts not firing | `/metrics` evaluation | All thresholds within bounds | + +## Output Format + +``` +## Post-Deploy Verification Report +Timestamp: | Target: | Tier: +Container: | COMMIT_SHA: + +### Overall: PASSED ✓ | WARNING ⚠ | FAILED ✗ + +### Tier 1: Critical () +✓ /health 200 OK in ms +✓ Database OK, latency ms +✓ Anthropic circuit_breaker: CLOSED (/ failures) +✓ COMMIT_SHA in container = (matches deployed) +✓ flags.env propagated (/ flags match) + +### Tier 2: § 8.4.X V1-V4 + Container Env () +✓ V1 FMP tools: invocations in last 5min +✓ V2 Code models M46-M58: M46(), M48(), M50() +⚠ V3 Cloud Trace: skill prints filter URL — manual check required +⚠ V4 Citation verifier: skill prints qa-outputs path — manual check required +✓ Container env: all required vars present +✓ bridge_metadata.git_sha: all recent rows = + +### Tier 3: Metrics + Reconciliation () +✓ Reconciliation: enabled, 0 backlog +✓ Hook persistence: 0 failures since +✓ All circuit breakers CLOSED +⚠ Memory: RSS MB (baseline MB; ±X.X% — watch trend) +✓ Prometheus alerts: 0 firing + +### Issues detected +NONE | + +### Raw Signals +[Full /health JSON + query results in collapsible blocks] +``` + +## Pre-flight Checks + +```bash +which curl # required +which python3 +which gcloud # for Tier 1 container env probe + Tier 2 SSH +which jq # for /health JSON parsing +``` + +## Read-Only Guarantee + +Never auto-rolls back, never invokes admin endpoints, never executes DML. +All remediation suggestions are printed as commands the operator can run manually. + +## Troubleshooting + +| Failure | Fix | +|---|---| +| Tier 1 `/health` returns 502/503 | Container starting or unhealthy. Wait 60s and retry. If persistent, check `/deploy` Step 8.5 (post-IP container restart) | +| Tier 1 `COMMIT_SHA = 'unknown'` | `--build-arg COMMIT_SHA` missed during last build. See `deploy/SKILL.md`. Re-deploy or `docker exec` to set env post-hoc (not persisted) | +| Tier 2 V1 returns 0 rows but `FMP_ENABLED=true` | Either no memo run since deploy (run a test memo) OR `FMP_API_KEY` invalid/rate-limited (check `claude_api_client_results_total{fetch_source}` distribution — `exa_fallback` dominating = key issue) | +| Tier 2 container env missing vars | Re-run `/deploy` and ensure deploy.sh's CONTAINER_ENV plumbing is current (lines 80-95) | +| Tier 3 reconciliation stuck | See `session-diagnostics` Pattern 14 for forensic SQL | + +See `references/failure-patterns.md` for 15+ documented failure modes. + +## Known Constraints + +- **V3 Cloud Trace check is manual** — gcloud trace logs read API requires interactive auth and project scoping; skill prints the filter URL operator can paste into Cloud Console +- **V4 Citation verifier check is manual** — requires reading the latest memo's `qa-outputs/` directory; skill prints the path +- **Baselines drift over time** — `baselines.json` is regenerated post-deploy. If you bumped flags or memory limits, regenerate baselines before declaring memory drift diff --git a/.claude/skills/post-deploy-verify/references/baselines.json b/.claude/skills/post-deploy-verify/references/baselines.json new file mode 100644 index 000000000..54ef9e82f --- /dev/null +++ b/.claude/skills/post-deploy-verify/references/baselines.json @@ -0,0 +1,32 @@ +{ + "_meta": { + "version": "v7.0.1", + "captured_at": "2026-05-07", + "captured_from": "super-legal-staging-bzx4 (34.26.70.60:3001)", + "notes": "Baseline captured at deploy time. Regenerate after major releases or when memory limits change. ±20% drift is acceptable; >20% warrants investigation." + }, + "memory": { + "rss_mb": 145, + "heap_used_mb": 60, + "heap_total_mb": 63 + }, + "performance": { + "uptime_seconds_at_capture": 33, + "active_streams": 0, + "background_tasks": 0 + }, + "database": { + "expected_latency_ms_max": 50, + "pool_max_connections": 15 + }, + "circuit_breaker": { + "expected_state": "CLOSED", + "expected_failures": 0, + "threshold": 3 + }, + "reconciliation": { + "expected_enabled": true, + "expected_pending_kg": 0, + "expected_pending_artifacts": 0 + } +} diff --git a/.claude/skills/post-deploy-verify/references/failure-patterns.md b/.claude/skills/post-deploy-verify/references/failure-patterns.md new file mode 100644 index 000000000..6efd6ea91 --- /dev/null +++ b/.claude/skills/post-deploy-verify/references/failure-patterns.md @@ -0,0 +1,127 @@ +# Post-Deploy Failure Patterns + +Known failure modes seen in v6.x and v7.0.x deploys, with detection signal and remediation. Operator-only — read-only skill never auto-applies fixes. + +## Tier 1 — Critical + +### P1: `/health` returns 502/503 + +**Signal**: Tier 1 fast-fail with `fatal:true`. +**Cause**: Container starting, OOMed, or instance terminated. +**Remediation**: +```bash +gcloud compute instances list --filter="name~super-legal-staging" --format="table(name,zone,status,networkInterfaces[0].accessConfigs[0].natIP)" +gcloud compute ssh --zone=us-east1-c --command="docker ps -a && docker logs --tail 200 \$(docker ps -q | head -1)" +``` + +### P2: `COMMIT_SHA = 'unknown'` in container + +**Signal**: Tier 1 FAILED on COMMIT_SHA. +**Cause**: `deploy.sh` missed `--build-arg COMMIT_SHA=$(git rev-parse HEAD)` plumbing. +**Remediation**: Re-run deploy after confirming `deploy.sh:54-62` carries the build arg. + +### P3: DB latency > 100ms + +**Signal**: Tier 1 FAILED on DB latency. +**Cause**: Pool saturation, Cloud SQL hot spot, or network partition. +**Remediation**: Query the Postgres system catalog `pg_stat_activity` grouping by `state`. If `idle in transaction` > 5, investigate stuck client. If pool exhausted, restart container. + +### P4: Circuit breaker OPEN + +**Signal**: Tier 1 FAILED on circuit breaker. +**Cause**: Anthropic API failures exceeded threshold (3 consecutive). +**Remediation**: Wait 60s for half-open probe; check `https://status.anthropic.com`. If sustained, container restart resets state. + +### P5: Required env var missing + +**Signal**: Tier 1 FAILED on missing env (e.g., `OTEL_TRACES_SAMPLER`). +**Cause**: `deploy.sh` `CONTAINER_ENV` array doesn't include the var. +**Remediation**: Update deploy.sh, redeploy. Do NOT manually `docker exec ... export` — overwritten on restart. + +## Tier 2 — Integration / § 8.4.X + +### P6: V1 returns 0 FMP invocations + +**Signal**: Tier 2 WARNING on V1. +**Cause**: Either FMP_API_KEY broken, or no equity-research memo run in last 5 min. +**Remediation**: Run a test memo via the frontend. If still 0 after memo completes, check `claude_api_client_results_total{client="fmp"}` distribution. + +### P7: V2 missing M46-M58 invocations + +**Signal**: Tier 2 WARNING on V2. +**Cause**: `CODE_EXECUTION_BRIDGE=false` or model catalog deploy missed. +**Remediation**: Verify `code_executions` table has rows for any model post-deploy. If empty, code-execution bridge is dead — check `claude-sdk-server` logs for `[CODE-EXEC]` errors. + +### P8: V3 Cloud Trace empty + +**Signal**: Tier 2 WARNING on V3 (no SubagentStart spans). +**Cause**: `OTEL_TRACES_SAMPLER_ARG` too low (e.g., 0.01) or `OTEL_ENABLED=false`. +**Remediation**: Bump sampler to 1.0 for verification window, redeploy. After window, return to 0.1. + +### P9: V4 citation verifier rejected FMP + +**Signal**: Latest memo's `citation-verification-certificate.md` shows FMP URLs as REJECTED. +**Cause**: Citation verifier regex doesn't match `financialmodelingprep.com` URL pattern. +**Remediation**: Check `src/utils/citationWebsearchVerifier.js` regex; FMP entries should match `^https?://(www\.)?financialmodelingprep\.com/`. + +### P10: `bridge_metadata.git_sha = 'unknown'` + +**Signal**: Tier 2 FAILED on bridge_metadata git_sha probe. +**Cause**: Same as P2 — `COMMIT_SHA` build arg never propagated to runtime. +**Remediation**: Re-run deploy with build arg. Replay envelope is required for EU AI Act Art. 15 audit trail. + +## Tier 3 — Deep / Reconciliation + +### P11: Reconciliation `pending_kg > 0` + +**Signal**: Tier 3 FAILED on reconciliation backlog. +**Cause**: KG worker stuck or DB write contention. +**Remediation**: +```sql +SELECT session_key, kg_build_attempts, kg_build_last_error, last_kg_build_attempt_at +FROM sessions WHERE kg_status = 'pending' AND kg_build_attempts >= 3 +ORDER BY created_at DESC LIMIT 20; +``` +If errors are real, fix root cause. If stuck due to deploy interruption, sessions self-recover after `SESSION_RECONCILIATION` worker tick. + +### P12: `claude_hook_persistence_failures_total` non-zero + +**Signal**: Tier 3 FAILED on hook persistence. +**Cause**: DB unreachable during hook fire OR row-too-large. +**Remediation**: +```sql +SELECT event_type, count(*) FROM hook_audit_log +WHERE persisted = false AND created_at > NOW() - INTERVAL '1 hour' +GROUP BY event_type; +``` + +### P13: Memory RSS drift > 50% + +**Signal**: Tier 3 FAILED on memory baseline. +**Cause**: Leak, pool re-sizing, or legitimate workload increase. +**Remediation**: Capture heap dump via `kill -USR2 `, regenerate `baselines.json` if drift is intentional. + +### P14: Transcript event count < 100 for completed session (Pattern 10) + +**Signal**: Tier 3 FAILED on `t3-transcript-event-rate.sql`. +**Cause**: Transcript flush broken — `TRANSCRIPT_DB_PERSISTENCE=false` OR queue saturated. +**Remediation**: Confirm flag is true via `/health.feature_flags.TRANSCRIPT_DB_PERSISTENCE`. Check `claude_hook_persistence_failures_total{event_type="transcript"}`. Sessions completed during disabled flag are NOT recoverable. + +### P15: OTel sampler not propagated + +**Signal**: Tier 3 FAILED on `OTEL_ENABLED` or sampler arg mismatch. +**Cause**: Container env missing or `flags.env` drifted from `--container-env` array. +**Remediation**: Check `gcloud compute ssh ... -- docker exec env | grep OTEL`. Redeploy if missing. + +## Cross-tier patterns + +### Static IP race (deploy.sh step 7) + +Not a verification failure but precedes Tier 1 P1. If verify-tier1 sees DB unreachable from container despite `/health` 200 from frontend — container is on ephemeral IP not whitelisted by Cloud SQL. + +**Detection**: +```bash +gcloud compute instances describe --zone=us-east1-c \ + --format="value(networkInterfaces[0].accessConfigs[0].natIP)" +``` +If not `34.26.70.60`, see `deploy/SKILL.md` § "Static IP assignment race". diff --git a/.claude/skills/post-deploy-verify/references/tier1-checks.md b/.claude/skills/post-deploy-verify/references/tier1-checks.md new file mode 100644 index 000000000..3a7a057c3 --- /dev/null +++ b/.claude/skills/post-deploy-verify/references/tier1-checks.md @@ -0,0 +1,42 @@ +# Tier 1 — Critical Checks (~2 min) + +Tier 1 fails fast on any CRITICAL — operator should investigate before Tier 2/3. + +| Check | Pass criteria | Severity on fail | +|---|---|---| +| `GET /health` HTTP code | 200 | FAILED (script exits early with `fatal:true`) | +| `dependencies.database.status` | `ok` | FAILED | +| `dependencies.database.latency_ms` | `< 50` | WARNING if 50-100, FAILED if >100 | +| `dependencies.circuit_breaker.state` | `CLOSED` | FAILED if OPEN; WARNING if missing | +| Container env `COMMIT_SHA` | set, not 'unknown' | FAILED if 'unknown' or missing | +| `COMMIT_SHA` matches `git rev-parse HEAD` | exact match | WARNING if drift | +| Required env vars present | all 7 v7.0.x flags (OTEL_ENABLED, OTEL_TRACES_SAMPLER, OTEL_TRACES_SAMPLER_ARG, TRANSCRIPT_DB_PERSISTENCE, SESSION_RECONCILIATION, HOOK_DB_PERSISTENCE, FMP_ENABLED) | FAILED if any missing | + +## Fast-fail behavior + +If `/health` returns non-200, script exits immediately with `fatal:true` in the JSON output. No further Tier 1 checks run, and Tier 2/3 are not invoked. + +This avoids cascading false-negatives when the container is unreachable. + +## Assumptions + +- `gcloud` CLI installed + authenticated (Tier 1 container env audit). If not, those checks emit WARNING and skip. +- `jq` installed for JSON parsing (pre-flight). +- `curl` installed (pre-flight). + +## Why these checks + +- **`/health` 200**: container is responsive +- **DB connectivity + latency**: Cloud SQL whitelist intact, pool not saturated +- **Circuit breaker state**: Anthropic API healthy enough to make tool calls +- **`COMMIT_SHA != 'unknown'`**: deploy.sh's `--build-arg` propagation worked (compliance/audit requirement) +- **Required env vars**: deploy.sh's `--container-env` plumbing carried v7.0.x flags through + +## Manual recovery hints + +| Failure | Likely cause | Fix | +|---|---|---| +| `/health` 502/503 | Container unhealthy or starting | Wait 60s; check `gcloud compute ssh` + `docker logs` | +| `COMMIT_SHA = 'unknown'` | deploy.sh missed `--build-arg COMMIT_SHA=$(git rev-parse HEAD)` | Re-deploy with the fix in deploy.sh:54-62 | +| Missing env var | `--container-env` plumbing didn't include it | Update deploy.sh CONTAINER_ENV array; redeploy | +| DB latency >100ms | Pool exhaustion or Cloud SQL hot spot | Check `pg_pool` metrics; consider `pg_stat_activity` | diff --git a/.claude/skills/post-deploy-verify/references/tier2-checks.md b/.claude/skills/post-deploy-verify/references/tier2-checks.md new file mode 100644 index 000000000..3f1f92346 --- /dev/null +++ b/.claude/skills/post-deploy-verify/references/tier2-checks.md @@ -0,0 +1,28 @@ +# Tier 2 — § 8.4.X V1-V4 + Container Env Audit (~5 min) + +Embeds the verification protocol from `super-legal-mcp-refactored/docs/pending-updates/equity-analyst-update.md` § 8.4.X. + +## V1: FMP tool invocations +SQL: `scripts/queries/v1-fmp-tool-invocations.sql` — counts `mcp__equities__%` tool calls in last 5 min. + +## V2: Code-execution models M46-M58 +SQL: `scripts/queries/v2-code-execution-models.sql` — per-model invocation/success/duration distribution. + +## V3: Cloud Trace SubagentStart +Manual: filter Cloud Trace for `attribute.agent_type='equity-analyst' AND attributes.stage='research_support'`. Skill prints filter URL. + +## V4: Citation verifier accepts FMP +Manual: grep latest memo's `qa-outputs/citation-verification-certificate.md` for `financialmodelingprep.com` entries. Skill prints path. + +## Container env audit +Required: `OTEL_TRACES_SAMPLER`, `OTEL_TRACES_SAMPLER_ARG`, `FMP_ENABLED`, `COMMIT_SHA`. Optional: `FMP_API_KEY`, `BCRYPT_ROUNDS`. + +## bridge_metadata.git_sha probe +SQL: `scripts/queries/t3-bridge-metadata-git-sha.sql`. Pass: single row with real SHA. Fail: 'unknown' = COMMIT_SHA build arg missed. + +## Why these checks + +- **V1/V2**: confirms FMP routing is live (when FMP_ENABLED=true). Distinguishes "no recent memo" from "FMP_API_KEY broken" via `claude_api_client_results_total{fetch_source}` distribution. +- **V3**: confirms OTel sampler is sampling enough that equity-analyst spans land in Cloud Trace +- **V4**: confirms citation_websearch_verifier accepts FMP URL patterns (regex match) +- **bridge_metadata.git_sha**: confirms regulator-replay envelope is intact (EU AI Act Art. 15) diff --git a/.claude/skills/post-deploy-verify/references/tier3-checks.md b/.claude/skills/post-deploy-verify/references/tier3-checks.md new file mode 100644 index 000000000..a41d36bb3 --- /dev/null +++ b/.claude/skills/post-deploy-verify/references/tier3-checks.md @@ -0,0 +1,31 @@ +# Tier 3 — Metrics + Reconciliation + Trace (~10 min) + +## Probes + +| Check | Source | Pass criteria | +|---|---|---| +| Reconciliation | `/health.reconciliation` | `enabled=true`, `pending_kg=0`, `pending_artifacts=0`, `stuck_kg=0` | +| Hook persistence failures | `/metrics` `claude_hook_persistence_failures_total` | sum across all `reason!="unknown"` series = 0 | +| Hook circuit breakers | `/metrics` `claude_hook_circuit_breaker_state` | all values < 1 (CLOSED) | +| Tool invocations v2 emitting | `/metrics` `claude_tool_invocations_v2_total` | series present (deprecation migration on track) | +| Memory baseline | `/health.memory.rss_mb` vs `references/baselines.json` | within ±20% | +| OTel enabled | `/health.feature_flags.OTEL_ENABLED` | `true` | + +## SQL queries (operator runs via session-diagnostics or psql) + +- `scripts/queries/t3-reconciliation-status.sql` — finds stuck sessions (kg_build_attempts >= 5) +- `scripts/queries/t3-hook-persistence-failures.sql` — failure events in last 24h +- `scripts/queries/t3-bridge-metadata-git-sha.sql` — `git_sha` distribution (catches 'unknown') +- `scripts/queries/t3-transcript-event-rate.sql` — completed sessions with <100 events (Pattern 10) + +## Memory baseline + +`baselines.json` captured at deploy time. Regenerate after major releases. ±20% drift = WARNING, >50% = investigate. + +To regenerate: +```bash +curl -s http://34.26.70.60:3001/health | jq '{ + memory: .memory, + performance: { uptime_seconds_at_capture: .uptime_seconds, active_streams: .active_streams, background_tasks: .background_tasks } +}' > .claude/skills/post-deploy-verify/references/baselines.json +``` diff --git a/.claude/skills/post-deploy-verify/scripts/format-report.py b/.claude/skills/post-deploy-verify/scripts/format-report.py new file mode 100644 index 000000000..8d8207e0f --- /dev/null +++ b/.claude/skills/post-deploy-verify/scripts/format-report.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 +"""Format-report: aggregate tier results into markdown.""" + +import argparse +import json +from datetime import datetime, timezone +from pathlib import Path + +SEVERITY_MARKER = { + "PASSED": "✓", + "WARNING": "⚠", + "FAILED": "✗", + "CRITICAL": "✗", +} + +SEVERITY_RANK = {"PASSED": 0, "WARNING": 1, "FAILED": 2, "CRITICAL": 2} + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--tiers", required=True) + ap.add_argument("--base-url", required=True) + args = ap.parse_args() + + tiers = json.loads(Path(args.tiers).read_text()) + timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + + # Compute overall severity + all_severities = [] + for tier_data in tiers.values(): + if isinstance(tier_data, dict): + for c in tier_data.get("checks", []): + all_severities.append(c.get("severity", "PASSED")) + if not all_severities: + overall = "FAILED" + overall_marker = "✗" + else: + max_rank = max(SEVERITY_RANK.get(s, 0) for s in all_severities) + if max_rank == 2: + overall, overall_marker = "FAILED", "✗" + elif max_rank == 1: + overall, overall_marker = "WARNING", "⚠" + else: + overall, overall_marker = "PASSED", "✓" + + lines = [] + lines.append("## Post-Deploy Verification Report") + lines.append(f"Timestamp: {timestamp} | Target: `{args.base_url}`") + + # Try to extract container/sha info from tier1 + t1 = tiers.get("tier1", {}) + health = t1.get("health_response") or {} + if isinstance(health, dict): + commit = ( + health.get("build", {}).get("commit") + or health.get("commit") + or "unknown" + ) + lines.append(f"COMMIT_SHA: `{commit[:8] if commit else 'n/a'}`") + + lines.append("") + lines.append(f"### Overall: {overall} {overall_marker}") + lines.append("") + + # Per-tier sections + for tier_num in [1, 2, 3]: + key = f"tier{tier_num}" + if key not in tiers: + continue + tier_data = tiers[key] + if not isinstance(tier_data, dict): + continue + elapsed = tier_data.get("elapsed_s", "?") + tier_label = {1: "Critical", 2: "§ 8.4.X V1-V4 + Container Env", 3: "Metrics + Reconciliation"}[tier_num] + lines.append(f"### Tier {tier_num}: {tier_label} ({elapsed}s)") + for c in tier_data.get("checks", []): + marker = SEVERITY_MARKER.get(c.get("severity", "PASSED"), "?") + lines.append(f"{marker} {c['name']}: {c['message']}") + lines.append("") + + # Issues summary + issues = [ + c + for tier_data in tiers.values() + if isinstance(tier_data, dict) + for c in tier_data.get("checks", []) + if c.get("severity") in ("WARNING", "FAILED", "CRITICAL") + ] + lines.append("### Issues detected") + if not issues: + lines.append("NONE") + else: + for c in issues: + marker = SEVERITY_MARKER.get(c["severity"], "?") + lines.append(f"- {marker} **{c['severity']}**: {c['name']} — {c['message']}") + lines.append("") + + # Raw signals (truncated) + lines.append("### Raw Signals") + if isinstance(health, dict): + truncated = json.dumps(health, indent=2)[:3000] + lines.append("
/health response (truncated)") + lines.append("") + lines.append("```json") + lines.append(truncated) + lines.append("```") + lines.append("") + lines.append("
") + + print("\n".join(lines)) + + +if __name__ == "__main__": + main() diff --git a/.claude/skills/post-deploy-verify/scripts/queries/t3-bridge-metadata-git-sha.sql b/.claude/skills/post-deploy-verify/scripts/queries/t3-bridge-metadata-git-sha.sql new file mode 100644 index 000000000..8fda4ca9e --- /dev/null +++ b/.claude/skills/post-deploy-verify/scripts/queries/t3-bridge-metadata-git-sha.sql @@ -0,0 +1,13 @@ +-- Tier 3: bridge_metadata.git_sha distribution +-- Pass: single row with real SHA matching deployed HEAD +-- Fail: 'unknown' = COMMIT_SHA build arg missed during last deploy + +SELECT event_data->'bridge_metadata'->>'git_sha' AS git_sha, + COUNT(*) AS code_executions, + MIN(created_at) AS first_seen, + MAX(created_at) AS last_seen +FROM hook_audit_log +WHERE tool_name = 'run_python_analysis' + AND created_at > NOW() - INTERVAL '24 hours' +GROUP BY git_sha +ORDER BY last_seen DESC; diff --git a/.claude/skills/post-deploy-verify/scripts/queries/t3-hook-persistence-failures.sql b/.claude/skills/post-deploy-verify/scripts/queries/t3-hook-persistence-failures.sql new file mode 100644 index 000000000..e70684ae4 --- /dev/null +++ b/.claude/skills/post-deploy-verify/scripts/queries/t3-hook-persistence-failures.sql @@ -0,0 +1,15 @@ +-- Tier 3: Hook persistence failures since deploy +-- Pass: 0 rows = no persistence failures (rows are not being lost) +-- Fail: any rows = circuit breaker may have opened or pool saturated + +SELECT + event_type, + event_data->>'reason' AS failure_reason, + COUNT(*) AS failures, + MIN(created_at) AS first_failure, + MAX(created_at) AS last_failure +FROM hook_audit_log +WHERE event_type LIKE '%PersistFailure%' + AND created_at > NOW() - INTERVAL '24 hours' +GROUP BY event_type, failure_reason +ORDER BY failures DESC; diff --git a/.claude/skills/post-deploy-verify/scripts/queries/t3-reconciliation-status.sql b/.claude/skills/post-deploy-verify/scripts/queries/t3-reconciliation-status.sql new file mode 100644 index 000000000..7f8ffc736 --- /dev/null +++ b/.claude/skills/post-deploy-verify/scripts/queries/t3-reconciliation-status.sql @@ -0,0 +1,13 @@ +-- Tier 3: Reconciliation pipeline backlog +-- Pass: 0 stuck sessions, low pending counts +-- Fail: stuck sessions with kg_build_attempts >= 5 (kgBreaker exhausted) + +SELECT + SUM(CASE WHEN kg_status = 'pending' THEN 1 ELSE 0 END) AS kg_pending, + SUM(CASE WHEN kg_status = 'building' THEN 1 ELSE 0 END) AS kg_building, + SUM(CASE WHEN kg_status = 'failed' AND kg_build_attempts >= 5 THEN 1 ELSE 0 END) AS kg_stuck_at_breaker, + SUM(CASE WHEN artifacts_status = 'pending' THEN 1 ELSE 0 END) AS artifacts_pending, + SUM(CASE WHEN artifacts_status = 'building' THEN 1 ELSE 0 END) AS artifacts_building, + SUM(CASE WHEN artifacts_status = 'failed' AND artifacts_build_attempts >= 5 THEN 1 ELSE 0 END) AS artifacts_stuck +FROM sessions +WHERE created_at > NOW() - INTERVAL '7 days'; diff --git a/.claude/skills/post-deploy-verify/scripts/queries/t3-transcript-event-rate.sql b/.claude/skills/post-deploy-verify/scripts/queries/t3-transcript-event-rate.sql new file mode 100644 index 000000000..3dec88a4d --- /dev/null +++ b/.claude/skills/post-deploy-verify/scripts/queries/t3-transcript-event-rate.sql @@ -0,0 +1,14 @@ +-- Tier 3: transcript_events row count for recent completed sessions +-- Pass: typical session has 4,000-6,000 events +-- Fail: completed session with 0 events = transcript flush broken (Pattern 10) + +SELECT s.session_key, s.status, s.created_at, + COUNT(t.id) AS event_count +FROM sessions s +LEFT JOIN transcript_events t ON s.id = t.session_id +WHERE s.status = 'complete' + AND s.created_at > NOW() - INTERVAL '7 days' +GROUP BY s.session_key, s.status, s.created_at +HAVING COUNT(t.id) < 100 -- well below typical 4000-6000 +ORDER BY s.created_at DESC +LIMIT 10; diff --git a/.claude/skills/post-deploy-verify/scripts/queries/v1-fmp-tool-invocations.sql b/.claude/skills/post-deploy-verify/scripts/queries/v1-fmp-tool-invocations.sql new file mode 100644 index 000000000..fca0f829f --- /dev/null +++ b/.claude/skills/post-deploy-verify/scripts/queries/v1-fmp-tool-invocations.sql @@ -0,0 +1,10 @@ +-- § 8.4.X V1: FMP tool invocations in last 5 minutes +-- Pass: ≥1 row when FMP_ENABLED=true and a memo has been run since deploy +-- Fail: 0 rows = no recent memo OR FMP routing broken (check fetch_source distribution) + +SELECT tool_name, COUNT(*) AS invocations, MAX(created_at) AS last_invoked +FROM hook_audit_log +WHERE tool_name LIKE 'mcp__equities__%' + AND created_at > NOW() - INTERVAL '5 minutes' +GROUP BY tool_name +ORDER BY invocations DESC; diff --git a/.claude/skills/post-deploy-verify/scripts/queries/v2-code-execution-models.sql b/.claude/skills/post-deploy-verify/scripts/queries/v2-code-execution-models.sql new file mode 100644 index 000000000..cc5fb8222 --- /dev/null +++ b/.claude/skills/post-deploy-verify/scripts/queries/v2-code-execution-models.sql @@ -0,0 +1,15 @@ +-- § 8.4.X V2: Code-execution models M46-M58 invoked since deploy +-- Pass: ≥1 row when FMP_ENABLED=true and equity-analyst memo run +-- Bonus: shows per-model invocation/success/duration distribution + +SELECT model_id, + COUNT(*) AS invocations, + SUM(CASE WHEN success THEN 1 ELSE 0 END) AS successes, + AVG(execution_time_ms)::int AS avg_ms, + AVG(chart_count)::numeric(4,1) AS avg_charts, + MAX(created_at) AS last_invoked +FROM code_executions +WHERE model_id IN ('M46','M47','M48','M49','M50','M51','M52','M53','M54','M55','M58') + AND created_at > NOW() - INTERVAL '24 hours' +GROUP BY model_id +ORDER BY model_id; diff --git a/.claude/skills/post-deploy-verify/scripts/verify-tier1.sh b/.claude/skills/post-deploy-verify/scripts/verify-tier1.sh new file mode 100755 index 000000000..a7785d21a --- /dev/null +++ b/.claude/skills/post-deploy-verify/scripts/verify-tier1.sh @@ -0,0 +1,137 @@ +#!/usr/bin/env bash +# Tier 1: Critical (~2 min) +# Probes /health endpoint, validates DB + circuit breaker, audits container env. +# Inputs (env): BASE_URL, REPO_ROOT, HAS_GCLOUD +# Output: JSON {checks: [{name, severity, message}], elapsed_s, health_response} + +set -uo pipefail + +START=$(date +%s) + +CHECKS_FILE=$(mktemp -t tier1-checks.XXXXXX.json) +HEALTH_FILE=$(mktemp -t tier1-health.XXXXXX.json) +trap 'rm -f "$CHECKS_FILE" "$HEALTH_FILE"' EXIT +echo "[]" > "$CHECKS_FILE" +echo "null" > "$HEALTH_FILE" + +add_check() { + local name="$1" severity="$2" msg="$3" + python3 - "$CHECKS_FILE" "$name" "$severity" "$msg" <<'PY' +import json, sys +path, name, sev, msg = sys.argv[1:5] +with open(path) as f: arr = json.load(f) +arr.append({'name': name, 'severity': sev, 'message': msg}) +with open(path, 'w') as f: json.dump(arr, f) +PY +} + +emit_output() { + local fatal="${1:-false}" + local elapsed=$(($(date +%s) - START)) + python3 - "$CHECKS_FILE" "$HEALTH_FILE" "$elapsed" "$fatal" <<'PY' +import json, sys +checks_path, health_path, elapsed, fatal = sys.argv[1:5] +with open(checks_path) as f: checks = json.load(f) +with open(health_path) as f: + try: health = json.load(f) + except Exception: health = None +out = {'checks': checks, 'elapsed_s': int(elapsed), 'health_response': health} +if fatal == 'true': out['fatal'] = True +print(json.dumps(out)) +PY +} + +# ── Check 1: /health 200 ────────────────────────────────────────────────────── +HEALTH_RAW=$(curl -s --max-time 10 -w "\n%{http_code}|%{time_total}" "$BASE_URL/health" 2>&1) || true +HTTP_CODE=$(echo "$HEALTH_RAW" | tail -n1 | cut -d'|' -f1) +TIME_TOTAL=$(echo "$HEALTH_RAW" | tail -n1 | cut -d'|' -f2) +HEALTH_BODY=$(echo "$HEALTH_RAW" | sed '$d') + +# Save the body for emit_output +echo "$HEALTH_BODY" | jq -c '.' > "$HEALTH_FILE" 2>/dev/null || echo "null" > "$HEALTH_FILE" + +if [ "$HTTP_CODE" = "200" ]; then + add_check "/health 200 OK" "PASSED" "HTTP 200 in ${TIME_TOTAL}s" +else + add_check "/health probe" "FAILED" "HTTP $HTTP_CODE in ${TIME_TOTAL}s" + emit_output "true" + exit 0 +fi + +# ── Check 2: dependencies.database ─────────────────────────────────────────── +DB_STATUS=$(echo "$HEALTH_BODY" | jq -r '.dependencies.database.status // "missing"') +DB_LATENCY=$(echo "$HEALTH_BODY" | jq -r '.dependencies.database.latency_ms // 999') +if [ "$DB_STATUS" = "ok" ] && [ "$DB_LATENCY" -lt 50 ] 2>/dev/null; then + add_check "Database connectivity" "PASSED" "OK, latency ${DB_LATENCY}ms" +elif [ "$DB_STATUS" = "ok" ] && [ "$DB_LATENCY" -lt 100 ] 2>/dev/null; then + add_check "Database connectivity" "WARNING" "OK but latency ${DB_LATENCY}ms (>50)" +elif [ "$DB_STATUS" = "ok" ]; then + add_check "Database connectivity" "FAILED" "OK but latency ${DB_LATENCY}ms (>100 — pool exhaustion?)" +else + add_check "Database connectivity" "FAILED" "status=$DB_STATUS, latency=${DB_LATENCY}ms" +fi + +# ── Check 3: circuit_breaker ────────────────────────────────────────────────── +CB_STATE=$(echo "$HEALTH_BODY" | jq -r '.dependencies.circuit_breaker.state // "missing"') +CB_FAILURES=$(echo "$HEALTH_BODY" | jq -r '.dependencies.circuit_breaker.failures // 0') +CB_THRESHOLD=$(echo "$HEALTH_BODY" | jq -r '.dependencies.circuit_breaker.threshold // 3') +if [ "$CB_STATE" = "CLOSED" ]; then + add_check "Anthropic circuit breaker" "PASSED" "CLOSED ($CB_FAILURES/$CB_THRESHOLD failures)" +elif [ "$CB_STATE" = "missing" ]; then + add_check "Anthropic circuit breaker" "WARNING" "field missing in /health (older version?)" +else + add_check "Anthropic circuit breaker" "FAILED" "state=$CB_STATE, $CB_FAILURES failures" +fi + +# ── Check 4 + 5: Container env (COMMIT_SHA + flags) ────────────────────────── +if [ "$HAS_GCLOUD" = "1" ]; then + INSTANCE=$(gcloud compute instances list --filter="name~super-legal-staging AND status=RUNNING" \ + --format="value(name)" 2>/dev/null | head -1) + if [ -n "$INSTANCE" ]; then + CONTAINER_ENV=$(gcloud compute ssh "$INSTANCE" --zone=us-east1-c --quiet \ + --command='docker ps --format "{{.ID}}" | head -1 | xargs -I{} docker exec {} env' \ + 2>/dev/null || true) + + CONTAINER_SHA=$(echo "$CONTAINER_ENV" | grep '^COMMIT_SHA=' | cut -d'=' -f2) + DEPLOYED_SHA=$(cd "$REPO_ROOT" && git rev-parse HEAD 2>/dev/null) + + if [ -z "$CONTAINER_SHA" ]; then + add_check "COMMIT_SHA in container" "FAILED" "env var not set" + elif [ "$CONTAINER_SHA" = "unknown" ]; then + add_check "COMMIT_SHA in container" "FAILED" "= 'unknown' — build arg missed during last deploy" + elif [ "$CONTAINER_SHA" = "$DEPLOYED_SHA" ]; then + add_check "COMMIT_SHA in container" "PASSED" "= ${CONTAINER_SHA:0:8} (matches deployed HEAD)" + else + add_check "COMMIT_SHA in container" "WARNING" "= ${CONTAINER_SHA:0:8} but HEAD = ${DEPLOYED_SHA:0:8} (drift)" + fi + + REQ_VARS="OTEL_ENABLED OTEL_TRACES_SAMPLER OTEL_TRACES_SAMPLER_ARG TRANSCRIPT_DB_PERSISTENCE SESSION_RECONCILIATION HOOK_DB_PERSISTENCE FMP_ENABLED" + MISSING="" + for v in $REQ_VARS; do + if ! echo "$CONTAINER_ENV" | grep -q "^$v="; then + MISSING="$MISSING $v" + fi + done + if [ -z "$MISSING" ]; then + add_check "Required env vars" "PASSED" "all 7 v7.0.x flags present" + else + add_check "Required env vars" "FAILED" "missing:$MISSING" + fi + else + add_check "Container env audit" "WARNING" "no RUNNING super-legal-staging instance found via gcloud" + fi +else + add_check "Container env audit" "WARNING" "gcloud not available — skipping" +fi + +# Also derive build.commit from /health body as a fallback signal +BUILD_COMMIT=$(echo "$HEALTH_BODY" | jq -r '.build.commit // "missing"') +if [ "$HAS_GCLOUD" != "1" ]; then + if [ "$BUILD_COMMIT" = "unknown" ] || [ "$BUILD_COMMIT" = "missing" ]; then + add_check "/health build.commit" "FAILED" "= '$BUILD_COMMIT' — COMMIT_SHA build arg missed" + else + add_check "/health build.commit" "PASSED" "= ${BUILD_COMMIT:0:8}" + fi +fi + +emit_output "false" diff --git a/.claude/skills/post-deploy-verify/scripts/verify-tier2.sh b/.claude/skills/post-deploy-verify/scripts/verify-tier2.sh new file mode 100755 index 000000000..f39948744 --- /dev/null +++ b/.claude/skills/post-deploy-verify/scripts/verify-tier2.sh @@ -0,0 +1,99 @@ +#!/usr/bin/env bash +# Tier 2: § 8.4.X V1-V4 + Container Env Audit (~5 min) +# - V1/V2: prints SQL queries for operator to run via session-diagnostics or psql +# - V3/V4: prints filter URLs / file paths for manual inspection +# - Container env audit: SSH into running instance, validate env vars +# - bridge_metadata.git_sha: probes /metrics for any env-var indicator (best effort) + +set -uo pipefail + +START=$(date +%s) +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +QUERIES_DIR="$SCRIPT_DIR/queries" + +CHECKS_FILE=$(mktemp -t tier2-checks.XXXXXX.json) +trap 'rm -f "$CHECKS_FILE"' EXIT +echo "[]" > "$CHECKS_FILE" + +add_check() { + local name="$1" severity="$2" msg="$3" + python3 - "$CHECKS_FILE" "$name" "$severity" "$msg" <<'PY' +import json, sys +path, name, sev, msg = sys.argv[1:5] +with open(path) as f: arr = json.load(f) +arr.append({'name': name, 'severity': sev, 'message': msg}) +with open(path, 'w') as f: json.dump(arr, f) +PY +} + +# ── V1 + V2: print SQL queries for operator (FMP gating-aware) ─────────────── +FMP_STATE=$(curl -s --max-time 10 "$BASE_URL/health" | jq -r '.feature_flags.FMP_ENABLED // "unknown"' 2>/dev/null) + +if [ "$FMP_STATE" = "true" ]; then + add_check "V1 — FMP tool invocations" "WARNING" "Run via session-diagnostics or psql: $QUERIES_DIR/v1-fmp-tool-invocations.sql (expect >=1 row in last 5min if memo run; 0 rows = no memo OR FMP_API_KEY issue)" + add_check "V2 — Code-execution models M46-M58" "WARNING" "Run via session-diagnostics or psql: $QUERIES_DIR/v2-code-execution-models.sql (expect >=1 row if equity-analyst memo run)" +else + add_check "V1 — FMP tool invocations" "PASSED" "FMP_ENABLED=$FMP_STATE — V1 not applicable (skip)" + add_check "V2 — Code-execution models M46-M58" "PASSED" "FMP_ENABLED=$FMP_STATE — V2 not applicable (skip)" +fi + +# ── V3: print Cloud Trace filter URL ────────────────────────────────────────── +TRACE_URL="https://console.cloud.google.com/traces/list?project=gen-lang-client-0797903624" +add_check "V3 — Cloud Trace SubagentStart" "WARNING" "Manual: $TRACE_URL — filter span.attributes.agent_type=equity-analyst AND attributes.stage=research_support (last 1h)" + +# ── V4: locate latest qa-outputs ────────────────────────────────────────────── +QA_PATH="" +if [ -d "$REPO_ROOT/super-legal-mcp-refactored/sessions" ]; then + QA_PATH=$(find "$REPO_ROOT/super-legal-mcp-refactored/sessions" -name "citation-verification-certificate.md" 2>/dev/null | xargs -I{} ls -t {} 2>/dev/null | head -1) +fi +if [ -n "$QA_PATH" ]; then + add_check "V4 — Citation verifier accepts FMP" "WARNING" "Manual: grep financialmodelingprep.com $QA_PATH (expect CONFIRMED entries if memo cited FMP)" +else + add_check "V4 — Citation verifier accepts FMP" "WARNING" "No qa-outputs found locally (manual check on staging container)" +fi + +# ── Container env audit ─────────────────────────────────────────────────────── +if [ "$HAS_GCLOUD" = "1" ]; then + INSTANCE=$(gcloud compute instances list --filter="name~super-legal-staging AND status=RUNNING" \ + --format="value(name)" 2>/dev/null | head -1) + if [ -n "$INSTANCE" ]; then + CONTAINER_ENV=$(gcloud compute ssh "$INSTANCE" --zone=us-east1-c --quiet \ + --command='docker ps --format "{{.ID}}" | head -1 | xargs -I{} docker exec {} env' \ + 2>/dev/null || true) + + REQ_VARS="OTEL_TRACES_SAMPLER OTEL_TRACES_SAMPLER_ARG FMP_ENABLED COMMIT_SHA" + OPT_VARS="FMP_API_KEY BCRYPT_ROUNDS" + MISSING_REQ="" + MISSING_OPT="" + for v in $REQ_VARS; do + echo "$CONTAINER_ENV" | grep -q "^$v=" || MISSING_REQ="$MISSING_REQ $v" + done + for v in $OPT_VARS; do + echo "$CONTAINER_ENV" | grep -q "^$v=" || MISSING_OPT="$MISSING_OPT $v" + done + + if [ -z "$MISSING_REQ" ] && [ -z "$MISSING_OPT" ]; then + add_check "Container env audit" "PASSED" "all 6 v7.0.x env vars present (incl. OTel sampler + FMP + COMMIT_SHA + BCRYPT)" + elif [ -z "$MISSING_REQ" ]; then + add_check "Container env audit" "WARNING" "required vars present; optional missing:$MISSING_OPT" + else + add_check "Container env audit" "FAILED" "missing required:$MISSING_REQ" + fi + else + add_check "Container env audit" "WARNING" "no RUNNING super-legal-staging instance found" + fi +else + add_check "Container env audit" "WARNING" "gcloud not available — skipping (Tier 1 covered subset)" +fi + +# ── bridge_metadata.git_sha probe (via SQL) ─────────────────────────────────── +add_check "bridge_metadata.git_sha probe" "WARNING" "Run via session-diagnostics: $QUERIES_DIR/t3-bridge-metadata-git-sha.sql (expect single row with real SHA; 'unknown' = COMMIT_SHA build arg missed)" + +# ── Output ──────────────────────────────────────────────────────────────────── +ELAPSED=$(($(date +%s) - START)) +python3 - "$CHECKS_FILE" "$ELAPSED" <<'PY' +import json, sys +checks_path, elapsed = sys.argv[1:3] +with open(checks_path) as f: checks = json.load(f) +print(json.dumps({'checks': checks, 'elapsed_s': int(elapsed)})) +PY diff --git a/.claude/skills/post-deploy-verify/scripts/verify-tier3.sh b/.claude/skills/post-deploy-verify/scripts/verify-tier3.sh new file mode 100755 index 000000000..dc99de11c --- /dev/null +++ b/.claude/skills/post-deploy-verify/scripts/verify-tier3.sh @@ -0,0 +1,118 @@ +#!/usr/bin/env bash +# Tier 3: Metrics + Reconciliation + Trace (~10 min) +# - /health.reconciliation: backlog status +# - /metrics: hook persistence failures, circuit breakers, OTel sampler +# - Memory baseline check vs references/baselines.json + +set -uo pipefail + +START=$(date +%s) +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SKILL_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" +BASELINES="$SKILL_DIR/references/baselines.json" + +CHECKS_FILE=$(mktemp -t tier3-checks.XXXXXX.json) +trap 'rm -f "$CHECKS_FILE"' EXIT +echo "[]" > "$CHECKS_FILE" + +add_check() { + local name="$1" severity="$2" msg="$3" + python3 - "$CHECKS_FILE" "$name" "$severity" "$msg" <<'PY' +import json, sys +path, name, sev, msg = sys.argv[1:5] +with open(path) as f: arr = json.load(f) +arr.append({'name': name, 'severity': sev, 'message': msg}) +with open(path, 'w') as f: json.dump(arr, f) +PY +} + +# ── /health.reconciliation ──────────────────────────────────────────────────── +HEALTH=$(curl -s --max-time 10 "$BASE_URL/health" 2>/dev/null) +if [ -n "$HEALTH" ]; then + RECON_ENABLED=$(echo "$HEALTH" | jq -r '.reconciliation.enabled // false' 2>/dev/null) + RECON_KG=$(echo "$HEALTH" | jq -r '.reconciliation.pending_kg // 0' 2>/dev/null) + RECON_ART=$(echo "$HEALTH" | jq -r '.reconciliation.pending_artifacts // 0' 2>/dev/null) + RECON_STUCK=$(echo "$HEALTH" | jq -r '.reconciliation.stuck_kg // 0' 2>/dev/null) + + if [ "$RECON_ENABLED" = "true" ] && [ "$RECON_KG" = "0" ] && [ "$RECON_ART" = "0" ] && [ "$RECON_STUCK" = "0" ]; then + add_check "Reconciliation" "PASSED" "enabled=true, 0 backlog (kg=$RECON_KG, artifacts=$RECON_ART, stuck=$RECON_STUCK)" + elif [ "$RECON_ENABLED" != "true" ]; then + add_check "Reconciliation" "FAILED" "enabled=$RECON_ENABLED" + elif [ "$RECON_STUCK" -gt 0 ] 2>/dev/null; then + add_check "Reconciliation" "FAILED" "stuck_kg=$RECON_STUCK (manual rebuild needed)" + else + add_check "Reconciliation" "WARNING" "backlog: pending_kg=$RECON_KG, pending_artifacts=$RECON_ART" + fi +else + add_check "Reconciliation" "FAILED" "/health unreachable" +fi + +# ── /metrics: hook persistence failures + circuit breakers ─────────────────── +METRICS=$(curl -s --max-time 10 "$BASE_URL/metrics" 2>/dev/null) +if [ -n "$METRICS" ]; then + PERSIST_FAILURES=$(echo "$METRICS" | grep '^claude_hook_persistence_failures_total{' | grep -v 'reason="unknown"' | awk '{sum+=$2} END {print sum+0}') + if [ "$PERSIST_FAILURES" = "0" ]; then + add_check "Hook persistence failures" "PASSED" "0 failures (all reasons)" + else + add_check "Hook persistence failures" "WARNING" "$PERSIST_FAILURES failures since process start" + fi + + MAX_CB_STATE=$(echo "$METRICS" | grep '^claude_hook_circuit_breaker_state{' | awk '{print $2}' | sort -nr | head -1) + MAX_CB_STATE="${MAX_CB_STATE:-0}" + if [ "${MAX_CB_STATE%.*}" -ge 2 ] 2>/dev/null; then + add_check "Hook circuit breakers" "FAILED" "at least one breaker OPEN (state=$MAX_CB_STATE — persistence skipped, rows lost)" + elif [ "${MAX_CB_STATE%.*}" -ge 1 ] 2>/dev/null; then + add_check "Hook circuit breakers" "WARNING" "at least one breaker HALF_OPEN (state=$MAX_CB_STATE)" + else + add_check "Hook circuit breakers" "PASSED" "all CLOSED (state=0)" + fi + + ERR_RATE_INFO=$(echo "$METRICS" | grep '^claude_tool_invocations_v2_total{' | head -5 || echo "") + if [ -n "$ERR_RATE_INFO" ]; then + add_check "Tool invocations v2 metric" "PASSED" "claude_tool_invocations_v2_total emitting (deprecation migration on track)" + fi +else + add_check "/metrics scrape" "FAILED" "endpoint unreachable" +fi + +# ── Memory baseline check ──────────────────────────────────────────────────── +if [ -n "$HEALTH" ]; then + RSS_MB=$(echo "$HEALTH" | jq -r '.memory.rss_mb // 0' 2>/dev/null) + if [ -f "$BASELINES" ]; then + BASELINE_RSS=$(jq -r '.memory.rss_mb // 0' "$BASELINES" 2>/dev/null) + if [ "$BASELINE_RSS" != "0" ] && [ "$RSS_MB" != "0" ]; then + DRIFT=$(python3 -c "print(abs($RSS_MB - $BASELINE_RSS) / $BASELINE_RSS * 100)" 2>/dev/null || echo 0) + DRIFT_INT=${DRIFT%.*} + if [ "$DRIFT_INT" -lt 20 ] 2>/dev/null; then + add_check "Memory baseline" "PASSED" "RSS ${RSS_MB}MB (baseline ${BASELINE_RSS}MB; ${DRIFT_INT}% drift)" + else + add_check "Memory baseline" "WARNING" "RSS ${RSS_MB}MB vs baseline ${BASELINE_RSS}MB (${DRIFT_INT}% drift; investigate)" + fi + else + add_check "Memory baseline" "WARNING" "no baseline available; current RSS=${RSS_MB}MB (regenerate baselines.json)" + fi + else + add_check "Memory baseline" "WARNING" "baselines.json not found at $BASELINES" + fi +fi + +# ── OTel sampler propagation ───────────────────────────────────────────────── +if [ -n "$HEALTH" ]; then + OTEL_FLAG=$(echo "$HEALTH" | jq -r '.feature_flags.OTEL_ENABLED // "missing"' 2>/dev/null) + if [ "$OTEL_FLAG" = "true" ]; then + add_check "OTel enabled" "PASSED" "OTEL_ENABLED=true (sampler arg checked in Tier 1 container env)" + elif [ "$OTEL_FLAG" = "missing" ]; then + add_check "OTel enabled" "WARNING" "OTEL_ENABLED not in /health.feature_flags" + else + add_check "OTel enabled" "WARNING" "OTEL_ENABLED=$OTEL_FLAG (traces will not export)" + fi +fi + +# ── Output ──────────────────────────────────────────────────────────────────── +ELAPSED=$(($(date +%s) - START)) +python3 - "$CHECKS_FILE" "$ELAPSED" <<'PY' +import json, sys +checks_path, elapsed = sys.argv[1:3] +with open(checks_path) as f: checks = json.load(f) +print(json.dumps({'checks': checks, 'elapsed_s': int(elapsed)})) +PY diff --git a/.claude/skills/post-deploy-verify/scripts/verify.sh b/.claude/skills/post-deploy-verify/scripts/verify.sh new file mode 100755 index 000000000..417ba85d5 --- /dev/null +++ b/.claude/skills/post-deploy-verify/scripts/verify.sh @@ -0,0 +1,117 @@ +#!/usr/bin/env bash +# post-deploy-verify entry point. +# +# Usage: +# verify.sh # all 3 tiers +# verify.sh --tier 1 # critical (~2 min) +# verify.sh --tier 2 # § 8.4.X V1-V4 (~5 min) +# verify.sh --tier 3 # metrics + reconciliation (~10 min) +# verify.sh --url # override base URL + +set -uo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# ── Resolve repo root ───────────────────────────────────────────────────────── +cur="$SCRIPT_DIR" +while [ "$cur" != "/" ]; do + if [ -d "$cur/super-legal-mcp-refactored" ]; then + REPO_ROOT="$cur" + break + fi + cur="$(dirname "$cur")" +done +[ -n "${REPO_ROOT:-}" ] || { echo "ERROR: cannot locate repo root" >&2; exit 2; } + +# ── Defaults ────────────────────────────────────────────────────────────────── +TIER="all" +BASE_URL="" + +# Resolve base URL with 3-fallback pattern +STAGING_IP_FILE="$REPO_ROOT/super-legal-mcp-refactored/scripts/.staging-ip" + +# ── Parse args ──────────────────────────────────────────────────────────────── +while [[ $# -gt 0 ]]; do + case "$1" in + --tier) TIER="$2"; shift 2 ;; + --url) BASE_URL="$2"; shift 2 ;; + *) echo "Unknown flag: $1" >&2; exit 2 ;; + esac +done + +if [ -z "$BASE_URL" ]; then + if [ -f "$STAGING_IP_FILE" ]; then + BASE_URL="http://$(cat "$STAGING_IP_FILE"):3001" + else + BASE_URL="http://localhost:3001" + fi +fi + +# ── Pre-flight ──────────────────────────────────────────────────────────────── +for cmd in curl python3 jq; do + command -v "$cmd" >/dev/null || { echo "ERROR: $cmd not found" >&2; exit 2; } +done +# gcloud is OPTIONAL (only needed for Tier 1 container env + Tier 2 SSH probes) +HAS_GCLOUD=0 +command -v gcloud >/dev/null && HAS_GCLOUD=1 + +# ── Run tiers ───────────────────────────────────────────────────────────────── +TIERS_JSON_FILE=$(mktemp -t post-deploy-tiers.XXXXXX.json) +trap 'rm -f "$TIERS_JSON_FILE"' EXIT +echo "{}" > "$TIERS_JSON_FILE" + +run_tier() { + local tier="$1" + local script="$SCRIPT_DIR/verify-tier${tier}.sh" + if [ ! -x "$script" ]; then + echo "ERROR: $script not found or not executable" >&2 + return 1 + fi + local out + out=$(BASE_URL="$BASE_URL" REPO_ROOT="$REPO_ROOT" HAS_GCLOUD="$HAS_GCLOUD" \ + bash "$script" 2>&1) || true + python3 -c " +import json, sys +d = json.load(open('$TIERS_JSON_FILE')) +try: + d['tier${tier}'] = json.loads(sys.argv[1]) +except json.JSONDecodeError: + d['tier${tier}'] = {'error': 'tier ${tier} did not produce valid JSON', 'raw': sys.argv[1][:1000]} +json.dump(d, open('$TIERS_JSON_FILE', 'w')) +" "$out" +} + +case "$TIER" in + 1) run_tier 1 ;; + 2) run_tier 2 ;; + 3) run_tier 3 ;; + all) + run_tier 1 + run_tier 2 + run_tier 3 + ;; + *) echo "Unknown tier: $TIER" >&2; exit 2 ;; +esac + +# ── Format report ──────────────────────────────────────────────────────────── +python3 "$SCRIPT_DIR/format-report.py" \ + --tiers "$TIERS_JSON_FILE" \ + --base-url "$BASE_URL" + +# ── Exit code ──────────────────────────────────────────────────────────────── +# 0 = all PASSED +# 1 = at least one WARNING (no FAILED) +# 2 = at least one FAILED +EXIT_CODE=$(python3 -c " +import json +d = json.load(open('$TIERS_JSON_FILE')) +all_severities = [] +for tier_data in d.values(): + if isinstance(tier_data, dict): + for check in tier_data.get('checks', []): + all_severities.append(check.get('severity', 'PASSED')) +if 'FAILED' in all_severities or 'CRITICAL' in all_severities: print(2) +elif 'WARNING' in all_severities: print(1) +else: print(0) +") +exit "$EXIT_CODE"