diff --git a/.claude/skills/client-audit-export/SKILL.md b/.claude/skills/client-audit-export/SKILL.md index 5fe1b0e48..9000f07d1 100644 --- a/.claude/skills/client-audit-export/SKILL.md +++ b/.claude/skills/client-audit-export/SKILL.md @@ -50,6 +50,8 @@ The skill reuses `_shared/gcp-fleet-discover.sh` for multi-client discovery when | `access_log` | RBAC audit trail (actor_user_id, endpoint, status) | safe | | `pii_mappings` | pseudonym_id ↔ encrypted_value mapping | **DANGEROUS** — export `pseudonym_id` + `created_at` ONLY, NEVER `encrypted_value` | | `source_writes` | upstream API source provenance (Wave 2) | safe | +| `citation_verdicts` | per-footnote G5 verification verdicts (v6.8.6 T1) — CONFIRMED/UNCONFIRMED/ERROR/SKIP/PASS_WITH_NOTE + verification method + paywalled flag + notes | safe | +| `citation_verification_certificate` | full G5 certificate markdown (the canonical proof artifact for Art. 13 query reconstruction) | safe | `pii_mappings.encrypted_value` is **never** included in the bundle. The query in `range-query.py` selects only `pseudonym_id`, `created_at`, and `pii_type` — never the encrypted payload. @@ -66,7 +68,9 @@ gs://super-legal-worm-{client}-us-east1/regulator-handoff/{client}-{since}-{unti ├── human_interventions__csv.gz ├── access_log__csv.gz ├── pii_mappings_pseudonyms__csv.gz # pseudonym_id only, no encrypted values -└── source_writes__csv.gz +├── source_writes__csv.gz +├── citation_verdicts__csv.gz # v6.8.6 T1: per-footnote G5 verdicts +└── citation_verification_certificate__csv.gz # v6.8.6 T1: full certificate markdown ``` (Filenames use `.csv.gz` extension on disk; the schema in the docs uses `__csv` as a delimiter to avoid confusing it with table-column dot-notation.) diff --git a/.claude/skills/client-audit-export/scripts/range-query.py b/.claude/skills/client-audit-export/scripts/range-query.py index b59a28a12..1ceec1f74 100755 --- a/.claude/skills/client-audit-export/scripts/range-query.py +++ b/.claude/skills/client-audit-export/scripts/range-query.py @@ -36,6 +36,12 @@ def build_queries(since: str, until: str, session: str): f"COPY (SELECT pseudonym_id, pii_type, created_at FROM pii_mappings WHERE session_id = (SELECT id FROM sessions WHERE session_key = '{session}')) TO STDOUT WITH CSV HEADER"), ("source_writes", f"COPY (SELECT sw.* FROM source_writes sw JOIN sessions s ON sw.session_id = s.id WHERE s.session_key = '{session}') TO STDOUT WITH CSV HEADER"), + # v6.8.6 T1: G5 citation-verifier verdicts (regulator query-reconstruction) + ("citation_verdicts", + f"COPY (SELECT cv.* FROM citation_verdicts cv JOIN sessions s ON cv.session_id = s.id WHERE s.session_key = '{session}') TO STDOUT WITH CSV HEADER"), + # v6.8.6 T1: G5 certificate text (full markdown for regulator audit) + ("citation_verification_certificate", + f"COPY (SELECT r.id, r.session_id, r.report_type, r.report_key, r.content, r.word_count, r.agent_type, r.created_at FROM reports r JOIN sessions s ON r.session_id = s.id WHERE s.session_key = '{session}' AND r.report_type = 'qa' AND r.report_key = 'citation-verification-certificate') TO STDOUT WITH CSV HEADER"), ] # Range mode: filter by created_at on each table @@ -58,6 +64,12 @@ def build_queries(since: str, until: str, session: str): f"COPY (SELECT pseudonym_id, pii_type, created_at FROM pii_mappings WHERE created_at >= '{since}' AND created_at < '{until}'::DATE + 1) TO STDOUT WITH CSV HEADER"), ("source_writes", f"COPY (SELECT * FROM source_writes WHERE created_at >= '{since}' AND created_at < '{until}'::DATE + 1) TO STDOUT WITH CSV HEADER"), + # v6.8.6 T1: G5 citation-verifier verdicts (regulator query-reconstruction) + ("citation_verdicts", + f"COPY (SELECT * FROM citation_verdicts WHERE created_at >= '{since}' AND created_at < '{until}'::DATE + 1) TO STDOUT WITH CSV HEADER"), + # v6.8.6 T1: G5 certificate text (full markdown for regulator audit) + ("citation_verification_certificate", + f"COPY (SELECT r.id, r.session_id, r.report_type, r.report_key, r.content, r.word_count, r.agent_type, r.created_at FROM reports r WHERE r.created_at >= '{since}' AND r.created_at < '{until}'::DATE + 1 AND r.report_type = 'qa' AND r.report_key = 'citation-verification-certificate') TO STDOUT WITH CSV HEADER"), ] diff --git a/super-legal-mcp-refactored/migrations/015_citation-verdicts.down.sql b/super-legal-mcp-refactored/migrations/015_citation-verdicts.down.sql new file mode 100644 index 000000000..d8dd7545e --- /dev/null +++ b/super-legal-mcp-refactored/migrations/015_citation-verdicts.down.sql @@ -0,0 +1,7 @@ +-- 015_citation-verdicts.down.sql +-- Revert T1 of G5 citation-verifier observability remediation. + +DROP INDEX IF EXISTS idx_verdict_report; +DROP INDEX IF EXISTS idx_verdict_method; +DROP INDEX IF EXISTS idx_verdict_session_verdict; +DROP TABLE IF EXISTS citation_verdicts; diff --git a/super-legal-mcp-refactored/migrations/015_citation-verdicts.up.sql b/super-legal-mcp-refactored/migrations/015_citation-verdicts.up.sql new file mode 100644 index 000000000..cd402f6ed --- /dev/null +++ b/super-legal-mcp-refactored/migrations/015_citation-verdicts.up.sql @@ -0,0 +1,40 @@ +-- 015_citation-verdicts.up.sql +-- T1 of G5 citation-verifier observability remediation (v6.8.6). +-- Per-footnote verification verdicts parsed from citation-verification-certificate.md. +-- Mirrors the citation_source_links pattern (Wave 2): junction table referencing +-- reports(id), idempotent via UNIQUE (report_id, footnote_id), retention follows +-- parent report ON DELETE CASCADE. +-- +-- Closes the regulator-facing gap: "which specific footnotes failed verification +-- in session X, and via which of the 7 verifier batches?" — answerable in SQL +-- without parsing markdown. + +CREATE TABLE IF NOT EXISTS citation_verdicts ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + report_id UUID NOT NULL REFERENCES reports(id) ON DELETE CASCADE, + session_id UUID NOT NULL REFERENCES sessions(id) ON DELETE CASCADE, + footnote_id VARCHAR(100) NOT NULL, -- canonical "^N" form + footnote_row INTEGER, -- row from DETAILED VERIFICATION RESULTS table (nullable when fallback parse) + citation_text TEXT, -- snippet extracted from cert table (≤200 chars typical) + source_type VARCHAR(50), -- "SEC filing" | "case law" | "web article" | etc + verification_method VARCHAR(50), -- "Exa fetch" | "Exa search" | "WebFetch" | "WebSearch" | "MCP:lookup_citation" | etc + verdict VARCHAR(30) NOT NULL, -- CONFIRMED | UNCONFIRMED | ERROR | SKIP | PASS_WITH_NOTE | UNKNOWN + paywalled BOOLEAN DEFAULT FALSE, -- true when verdict=PASS_WITH_NOTE and notes mention paywall + notes TEXT, -- agent's notes column from the cert table + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW(), + + UNIQUE (report_id, footnote_id) -- idempotent re-parse on certificate rewrite +); + +-- Regulator query: "show all UNCONFIRMED footnotes in session X" +CREATE INDEX IF NOT EXISTS idx_verdict_session_verdict + ON citation_verdicts(session_id, verdict); + +-- Per-method aggregation (Verification Summary table reconstruction) +CREATE INDEX IF NOT EXISTS idx_verdict_method + ON citation_verdicts(verification_method, verdict); + +-- Per-report fetch +CREATE INDEX IF NOT EXISTS idx_verdict_report + ON citation_verdicts(report_id); diff --git a/super-legal-mcp-refactored/src/db/postgres.js b/super-legal-mcp-refactored/src/db/postgres.js index 0d641f008..2fb74e5ec 100644 --- a/super-legal-mcp-refactored/src/db/postgres.js +++ b/super-legal-mcp-refactored/src/db/postgres.js @@ -441,6 +441,36 @@ const CODE_EXECUTIONS_DDL = ` CREATE INDEX IF NOT EXISTS idx_charts_execution ON code_execution_charts(execution_id); `; +// v6.8.6 — T1 of G5 citation-verifier observability remediation. +// Per-footnote verdicts parsed from citation-verification-certificate.md and +// persisted by hookDBBridge.persistReport() fire-and-forget. Closes regulator +// gap: verdicts are now queryable in SQL rather than embedded in markdown. +// Mirrors citation_source_links junction-table pattern (Wave 2). +const CITATION_VERDICTS_DDL = ` + CREATE TABLE IF NOT EXISTS citation_verdicts ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + report_id UUID NOT NULL REFERENCES reports(id) ON DELETE CASCADE, + session_id UUID NOT NULL REFERENCES sessions(id) ON DELETE CASCADE, + footnote_id VARCHAR(100) NOT NULL, + footnote_row INTEGER, + citation_text TEXT, + source_type VARCHAR(50), + verification_method VARCHAR(50), + verdict VARCHAR(30) NOT NULL, + paywalled BOOLEAN DEFAULT FALSE, + notes TEXT, + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW(), + UNIQUE (report_id, footnote_id) + ); + CREATE INDEX IF NOT EXISTS idx_verdict_session_verdict + ON citation_verdicts(session_id, verdict); + CREATE INDEX IF NOT EXISTS idx_verdict_method + ON citation_verdicts(verification_method, verdict); + CREATE INDEX IF NOT EXISTS idx_verdict_report + ON citation_verdicts(report_id); +`; + const EXECUTION_WAVES_DDL = ` CREATE TABLE IF NOT EXISTS execution_waves ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), @@ -1032,6 +1062,7 @@ export async function ensureHookSchema() { await p.query(SESSION_STATUS_HISTORY_DDL); await p.query(AGENT_STATES_EXTENSIONS_DDL); await p.query(CODE_EXECUTIONS_DDL); + await p.query(CITATION_VERDICTS_DDL); // v6.8.6 T1: G5 verdict persistence await p.query(EXECUTION_WAVES_DDL); await p.query(PHASE_SUMMARY_DDL); await p.query(SECTIONS_DDL); diff --git a/super-legal-mcp-refactored/src/server/dbFrontendRouter.js b/super-legal-mcp-refactored/src/server/dbFrontendRouter.js index f669cd111..edaeef748 100644 --- a/super-legal-mcp-refactored/src/server/dbFrontendRouter.js +++ b/super-legal-mcp-refactored/src/server/dbFrontendRouter.js @@ -1287,6 +1287,58 @@ export function createDbFrontendRouter() { [session.id] ).catch(() => ({ rows: [] })); // graceful if table missing on stale schemas + // v6.8.6 T1: G5 citation-verifier certificate + verdicts (regulator-facing). + // Certificate content stored in reports table; per-footnote verdicts in + // citation_verdicts. Both gracefully fall back to null/[] on stale schemas. + const { rows: certificate } = await pool.query( + `SELECT r.id AS report_id, r.content AS certificate_text, r.created_at, r.word_count, r.agent_type + FROM reports r + WHERE r.session_id = $1 + AND r.report_type = 'qa' + AND r.report_key = 'citation-verification-certificate' + LIMIT 1`, + [session.id] + ).catch(() => ({ rows: [] })); + + const { rows: verdictRows } = await pool.query( + `SELECT cv.footnote_id, cv.footnote_row, cv.citation_text, + cv.source_type, cv.verification_method, cv.verdict, + cv.paywalled, cv.notes, cv.created_at + FROM citation_verdicts cv + WHERE cv.session_id = $1 + ORDER BY + CASE cv.verdict WHEN 'UNCONFIRMED' THEN 1 WHEN 'ERROR' THEN 2 + WHEN 'PASS_WITH_NOTE' THEN 3 WHEN 'CONFIRMED' THEN 4 + ELSE 5 END, + cv.footnote_id`, + [session.id] + ).catch(() => ({ rows: [] })); + + const verdictSummary = verdictRows.reduce((acc, row) => { + acc.total++; + if (row.verdict === 'CONFIRMED') acc.confirmed++; + else if (row.verdict === 'UNCONFIRMED') acc.unconfirmed++; + else if (row.verdict === 'ERROR') acc.errors++; + else if (row.verdict === 'SKIP') acc.skipped++; + else if (row.verdict === 'PASS_WITH_NOTE') acc.pass_with_note++; + if (row.paywalled) acc.paywalled++; + return acc; + }, { total: 0, confirmed: 0, unconfirmed: 0, errors: 0, skipped: 0, pass_with_note: 0, paywalled: 0 }); + verdictSummary.confirmation_rate = verdictSummary.total > 0 + ? Number(((verdictSummary.confirmed + verdictSummary.pass_with_note) / verdictSummary.total).toFixed(4)) + : null; + + const citationVerificationCertificate = certificate.length > 0 + ? { + report_id: certificate[0].report_id, + agent_type: certificate[0].agent_type, + certificate_text: certificate[0].certificate_text, + created_at: certificate[0].created_at, + word_count: certificate[0].word_count, + verdict_summary: verdictSummary, + } + : null; + // Human interventions (Wave 3) — includes any approvals/edits const { rows: interventions } = await pool.query( `SELECT intervention_type, reviewer_id, reason, created_at @@ -1314,12 +1366,32 @@ export function createDbFrontendRouter() { code_executions: executions, bridge_metadata: bridgeMetadata, citations, + citation_verification_certificate: citationVerificationCertificate, + citation_verdicts: verdictRows, human_interventions: interventions, access_log: accessLog, generated_at: new Date().toISOString(), - report_version: '1.0', + report_version: '1.1', // v6.8.6: + citation_verification_certificate + citation_verdicts }; + // v6.8.6 T1: log certificate access for Wave 3 Art. 12/13 audit trail. + // Fire-and-forget; non-fatal if access_log table missing. + if (citationVerificationCertificate) { + setImmediate(async () => { + try { + await pool.query( + `INSERT INTO access_log (session_id, resource_type, resource_key, requester, purpose_code) + VALUES ($1, $2, $3, $4, $5)`, + [session.id, 'certificate', 'citation_verification_certificate', + req.user?.id || 'anonymous', req.query?.purpose || 'regulator_audit'], + ); + } catch (err) { + // Silently ignore — access_log is best-effort, not a hot-path blocker. + void err; + } + }); + } + // Format selection if (req.query.format === 'csv') { // Minimal CSV — flatten code_executions table only (richest single diff --git a/super-legal-mcp-refactored/src/utils/certificateParser.js b/super-legal-mcp-refactored/src/utils/certificateParser.js new file mode 100644 index 000000000..7fc830880 --- /dev/null +++ b/super-legal-mcp-refactored/src/utils/certificateParser.js @@ -0,0 +1,251 @@ +/** + * certificateParser.js — parse citation-verification-certificate.md + * + * Production copy of test/sdk/_lib/certificateParser.mjs (PR #119). Promoted + * to src/utils/ in v6.8.6 T1 for use by hookDBBridge.persistReport() to + * populate the citation_verdicts table. Test harness still imports from + * _lib/ to avoid breaking PR #119 fixtures. + * + * Extracts structured data from the markdown certificate emitted by the + * citation-websearch-verifier subagent (Phase G5). Per agent-3 mapping: + * - CERTIFICATION STATUS line (PASS | PASS_WITH_EXCEPTIONS | HARD_FAIL) + * - Confirmation Rate (float + integer counts) + * - DETAILED VERIFICATION RESULTS table (per-footnote verdicts) + * - Unconfirmed Citations Detail / Error Citations Detail (per-failure rows) + * - Verification Summary table (per-method counts) + * + * Handles edge cases: incomplete certificates (no DETAILED VERIFICATION RESULTS + * section), deferred footnotes (SKIP verdict), paywalled (CONFIRMED + PAYWALLED note), + * tool timeouts (ERROR + Error Citations Detail row). + * + * Pure parser — no I/O. Caller reads the file. + */ + +/** + * Map status emojis/text to canonical verdict + */ +function normalizeVerdict(statusCell) { + const cleaned = String(statusCell || '').trim().toUpperCase(); + if (/CONFIRMED/.test(cleaned)) return 'CONFIRMED'; + if (/PASS_?WITH_?NOTE|PASS\s+WITH\s+NOTE/.test(cleaned)) return 'PASS_WITH_NOTE'; + if (/UNCONFIRMED|UNVERIFIED/.test(cleaned)) return 'UNCONFIRMED'; + if (/ERROR|FAIL/.test(cleaned)) return 'ERROR'; + if (/SKIP/.test(cleaned)) return 'SKIP'; + return 'UNKNOWN'; +} + +/** + * Split a markdown table body into rows of cells. + * Filters out separator rows (---) and header rows (where cell[0] === '#'). + */ +function parseTableRows(tableBody) { + return tableBody + .split('\n') + .filter(r => r.trim().startsWith('|')) + .map(r => r.split('|').slice(1, -1).map(c => c.trim())) + .filter(cells => cells.length > 0 && !cells.every(c => /^-+$/.test(c)) && cells[0] !== '#'); +} + +/** + * Extract a section's body between a heading and the next ---/##/end. + * Returns the body content (excluding the heading line itself). + */ +function extractSection(md, headingRegex) { + const match = md.match(headingRegex); + if (!match) return null; + const start = match.index + match[0].length; + const rest = md.slice(start); + // Stop at next ## or --- + const stopIdx = rest.search(/\n##\s|\n---\s*\n/); + return stopIdx >= 0 ? rest.slice(0, stopIdx) : rest; +} + +/** + * Main parser. + * @param {string} md - full markdown content of citation-verification-certificate.md + * @returns {object} structured data + */ +export function parseCertificate(md) { + if (typeof md !== 'string' || md.length === 0) { + return { error: 'empty content', status: null, per_footnote: [] }; + } + + const result = { + status: null, + confirmation_rate: null, + confirmed_count: null, + verifiable_count: null, + total_footnotes: null, + skipped_count: null, + paywalled_count: null, + verification_mode: null, + per_footnote: [], + unconfirmed_details: [], + error_details: [], + summary_table: [], + error: null + }; + + // CERTIFICATION STATUS + const statusMatch = md.match(/##\s+CERTIFICATION\s+STATUS\s*:?\s*(PASS_WITH_EXCEPTIONS|HARD_FAIL|PASS)/i); + if (statusMatch) { + result.status = statusMatch[1].toUpperCase().replace(/\s+/g, '_'); + } + + // Verification Mode + const modeMatch = md.match(/\*\*Verification Mode:\*\*\s+([A-Za-z][\w\s()]+?)(?:\n|$)/); + if (modeMatch) result.verification_mode = modeMatch[1].trim(); + + // Confirmation Rate — handle multiple phrasings used by the agent across runs: + // "Confirmation Rate: 100% (27 of 27 sampled citations verified)" (2026-03-07 prod sample) + // "Confirmation Rate: 96.2% (278 confirmed / 289 verifiable footnotes)" (2026-05-12 smoke) + // "Confirmation Rate: 95% (380 of 400 verifiable footnotes confirmed)" + const ratePatterns = [ + /Confirmation Rate:?\s*\**\s*(\d+(?:\.\d+)?)\s*%\s*\(\s*(\d+)\s+(?:of|\/)\s+(\d+)/i, + /Confirmation Rate:?\s*\**\s*(\d+(?:\.\d+)?)\s*%\s*\(\s*(\d+)\s+confirmed\s*\/\s*(\d+)/i + ]; + for (const pat of ratePatterns) { + const m = md.match(pat); + if (m) { + result.confirmation_rate = parseFloat(m[1]) / 100; + result.confirmed_count = parseInt(m[2], 10); + result.verifiable_count = parseInt(m[3], 10); + break; + } + } + + // Other counts (optional) + const totalFnMatch = md.match(/\*\*Total Footnotes(?:\s+in\s+Consolidated\s+Document)?:\*\*\s+(\d+)/i); + if (totalFnMatch) result.total_footnotes = parseInt(totalFnMatch[1], 10); + const skippedMatch = md.match(/\*\*Skipped(?:[^:]*):\*\*\s+(\d+)/i); + if (skippedMatch) result.skipped_count = parseInt(skippedMatch[1], 10); + const paywallMatch = md.match(/\*\*Paywalled[^:]*:\*\*\s+(\d+)/i); + if (paywallMatch) result.paywalled_count = parseInt(paywallMatch[1], 10); + + // DETAILED VERIFICATION RESULTS table + const detailedSection = extractSection(md, /##\s+DETAILED\s+VERIFICATION\s+RESULTS/i); + if (detailedSection) { + const rows = parseTableRows(detailedSection); + for (const cells of rows) { + // Expected: [#, Citation, Source Type, Method, Status, Notes] + if (cells.length >= 5) { + const num = parseInt(cells[0], 10); + result.per_footnote.push({ + row: Number.isFinite(num) ? num : null, + citation: cells[1] || '', + source_type: cells[2] || '', + method: cells[3] || '', + verdict: normalizeVerdict(cells[4]), + notes: cells[5] || '' + }); + } + } + } else { + // Some certificates use a simpler per-footnote list format (one row per footnote) + // Fallback: scan for "[^N] ... CONFIRMED|UNCONFIRMED|ERROR|SKIP" lines + const fnLinePattern = /\[\^(\d+)\][^\n]*?(CONFIRMED|UNCONFIRMED|ERROR|SKIP|PASS_WITH_NOTE)/gi; + let m; + while ((m = fnLinePattern.exec(md)) !== null) { + result.per_footnote.push({ + row: null, + footnote_id: `^${m[1]}`, + verdict: normalizeVerdict(m[2]), + notes: '' + }); + } + } + + // Unconfirmed Citations Detail — KEY for diff (lists which specific footnotes failed verification) + const unconfirmedSection = extractSection(md, /##\s+Unconfirmed\s+Citations?\s+Detail/i); + if (unconfirmedSection) { + const rows = parseTableRows(unconfirmedSection); + for (const cells of rows) { + if (cells.length >= 4) { + result.unconfirmed_details.push({ + row: parseInt(cells[0], 10) || null, + footnote: cells[1] || '', // e.g., "[^43]" + section: cells[2] || '', + citation: cells[3] || '', + tag: cells[4] || '', + method: cells[5] || '', + reason: cells[6] || '' + }); + } + } + } + + // Build the canonical "unconfirmed footnote ID set" — used by the diff layer + // to compare which specific footnotes failed in each arm. Extracts ^N from + // the "Footnote" column. + result.unconfirmed_footnote_ids = new Set(); + for (const u of result.unconfirmed_details) { + const m = (u.footnote || '').match(/\^(\d+)/); + if (m) result.unconfirmed_footnote_ids.add(`^${m[1]}`); + } + // For JSON serializability: + result.unconfirmed_footnote_ids_array = [...result.unconfirmed_footnote_ids]; + + // Error Citations Detail + const errorSection = extractSection(md, /##\s+Error\s+Citations?\s+Detail/i); + if (errorSection) { + const rows = parseTableRows(errorSection); + for (const cells of rows) { + if (cells.length >= 3) { + result.error_details.push({ + row: parseInt(cells[0], 10) || null, + footnote: cells[1] || '', + section: cells[2] || '', + error_type: cells[3] || '', + details: cells[4] || '' + }); + } + } + } + + // Verification Summary table (per-method counts) + const summarySection = extractSection(md, /##\s+Verification\s+Summary/i); + if (summarySection) { + const rows = parseTableRows(summarySection); + for (const cells of rows) { + if (cells.length < 6) continue; + if (/^TOTAL$/i.test(cells[0])) continue; + // Skip header row (cells[1] is non-numeric like "Count") + const count = parseInt(cells[1].replace(/[^\d]/g, ''), 10); + if (!Number.isFinite(count)) continue; + // Skip rows with leading bold markers if they're aggregate-summary rows + // ("**TOTAL**" etc. — already handled above, but defensive) + result.summary_table.push({ + category: cells[0].replace(/^\*+|\*+$/g, '').trim(), + count, + confirmed: parseInt(cells[2].replace(/[^\d]/g, ''), 10) || 0, + paywalled: parseInt(cells[3].replace(/[^\d—-]/g, ''), 10) || 0, + unconfirmed: parseInt(cells[4].replace(/[^\d—-]/g, ''), 10) || 0, + errors: parseInt(cells[5].replace(/[^\d—-]/g, ''), 10) || 0 + }); + } + } + + // Sanity: incomplete if no DETAILED VERIFICATION RESULTS and no per-footnote lines + if (result.per_footnote.length === 0 && !result.status) { + result.error = 'no parseable content (incomplete or missing certificate)'; + } + + return result; +} + +/** + * Build a footnote-id-keyed verdict map from parsed certificate. + * Used by diff to align two certificates by footnote. + * Strategy: prefer footnote_id from the citation text (matches "^N" or "[^N]"), + * else fall back to row number. + */ +export function buildVerdictMap(parsed) { + const map = new Map(); + for (const fn of parsed.per_footnote) { + // Try to extract ^N from the citation text first + const fnIdMatch = (fn.citation || fn.footnote_id || '').match(/\^(\d+)/); + const key = fnIdMatch ? `^${fnIdMatch[1]}` : (fn.footnote_id || `row_${fn.row}`); + map.set(key, fn); + } + return map; +} diff --git a/super-legal-mcp-refactored/src/utils/hookDBBridge.js b/super-legal-mcp-refactored/src/utils/hookDBBridge.js index 7354d3ee1..00abfdf2f 100644 --- a/super-legal-mcp-refactored/src/utils/hookDBBridge.js +++ b/super-legal-mcp-refactored/src/utils/hookDBBridge.js @@ -483,6 +483,70 @@ async function persistReport(pool, sessionCache, input, result) { citationPromise.finally(() => backgroundTasks.delete(citationPromise)); } + // v6.8.6 T1: G5 citation-verifier verdict persistence. Parses the markdown + // certificate when persisted, extracts per-footnote verdicts, and writes + // them to citation_verdicts. Closes the regulator gap (Art. 12/13): verdicts + // are now queryable in SQL rather than embedded in markdown. Fire-and-forget + // via backgroundTasks (mirrors citation_source_links pattern at line 442). + if (insertResult.rows[0]?.id + && reportType === 'qa' + && reportKey === 'citation-verification-certificate') { + const reportId = insertResult.rows[0].id; + const verdictPromise = (async () => { + try { + const { parseCertificate } = await import('./certificateParser.js'); + const parsed = parseCertificate(content); + if (parsed.error || !Array.isArray(parsed.per_footnote) || parsed.per_footnote.length === 0) { + return; // malformed or empty cert — silent skip + } + // Batch INSERT via VALUES list to amortize round-trips (one query for + // up to ~500 footnotes). Parameterized to prevent SQL injection. + const rows = []; + const params = []; + let p = 1; + for (const fn of parsed.per_footnote) { + // Normalize footnote_id: prefer "^N" form extracted from citation/footnote_id field + const idMatch = (fn.citation || fn.footnote_id || '').match(/\^(\d+)/); + const footnoteId = idMatch ? `^${idMatch[1]}` : (fn.footnote_id || (fn.row ? `row_${fn.row}` : null)); + if (!footnoteId) continue; + const isPaywalled = fn.verdict === 'PASS_WITH_NOTE' + && /paywall/i.test(fn.notes || ''); + rows.push(`($${p++}, $${p++}, $${p++}, $${p++}, $${p++}, $${p++}, $${p++}, $${p++}, $${p++}, $${p++})`); + params.push( + reportId, + sessionId, + footnoteId, + fn.row || null, + (fn.citation || '').slice(0, 500), + fn.source_type || null, + fn.method || null, + fn.verdict || 'UNKNOWN', + isPaywalled, + (fn.notes || '').slice(0, 1000), + ); + } + if (rows.length === 0) return; + await pool.query( + `INSERT INTO citation_verdicts + (report_id, session_id, footnote_id, footnote_row, citation_text, + source_type, verification_method, verdict, paywalled, notes) + VALUES ${rows.join(', ')} + ON CONFLICT (report_id, footnote_id) DO UPDATE SET + verdict = EXCLUDED.verdict, + paywalled = EXCLUDED.paywalled, + notes = EXCLUDED.notes, + verification_method = COALESCE(EXCLUDED.verification_method, citation_verdicts.verification_method), + updated_at = NOW()`, + params, + ); + } catch (err) { + console.warn('[CitationVerdicts] parser/persist failed (non-fatal):', err.message); + } + })(); + backgroundTasks.add(verdictPromise); + verdictPromise.finally(() => backgroundTasks.delete(verdictPromise)); + } + if (featureFlags.EMBEDDING_PERSISTENCE && insertResult.rows[0]?.id) { // v6.8.2 (Item 5): register in backgroundTasks so graceful shutdown drains // pending embedding writes before SIGKILL (was raw setImmediate — could be