Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .claude/skills/client-audit-export/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ The skill reuses `_shared/gcp-fleet-discover.sh` for multi-client discovery when
| `access_log` | RBAC audit trail (actor_user_id, endpoint, status) | safe |
| `pii_mappings` | pseudonym_id ↔ encrypted_value mapping | **DANGEROUS** — export `pseudonym_id` + `created_at` ONLY, NEVER `encrypted_value` |
| `source_writes` | upstream API source provenance (Wave 2) | safe |
| `citation_verdicts` | per-footnote G5 verification verdicts (v6.8.6 T1) — CONFIRMED/UNCONFIRMED/ERROR/SKIP/PASS_WITH_NOTE + verification method + paywalled flag + notes | safe |
| `citation_verification_certificate` | full G5 certificate markdown (the canonical proof artifact for Art. 13 query reconstruction) | safe |

`pii_mappings.encrypted_value` is **never** included in the bundle. The query in `range-query.py` selects only `pseudonym_id`, `created_at`, and `pii_type` — never the encrypted payload.

Expand All @@ -66,7 +68,9 @@ gs://super-legal-worm-{client}-us-east1/regulator-handoff/{client}-{since}-{unti
├── human_interventions__csv.gz
├── access_log__csv.gz
├── pii_mappings_pseudonyms__csv.gz # pseudonym_id only, no encrypted values
└── source_writes__csv.gz
├── source_writes__csv.gz
├── citation_verdicts__csv.gz # v6.8.6 T1: per-footnote G5 verdicts
└── citation_verification_certificate__csv.gz # v6.8.6 T1: full certificate markdown
```

(Filenames use `.csv.gz` extension on disk; the schema in the docs uses `__csv` as a delimiter to avoid confusing it with table-column dot-notation.)
Expand Down
12 changes: 12 additions & 0 deletions .claude/skills/client-audit-export/scripts/range-query.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ def build_queries(since: str, until: str, session: str):
f"COPY (SELECT pseudonym_id, pii_type, created_at FROM pii_mappings WHERE session_id = (SELECT id FROM sessions WHERE session_key = '{session}')) TO STDOUT WITH CSV HEADER"),
("source_writes",
f"COPY (SELECT sw.* FROM source_writes sw JOIN sessions s ON sw.session_id = s.id WHERE s.session_key = '{session}') TO STDOUT WITH CSV HEADER"),
# v6.8.6 T1: G5 citation-verifier verdicts (regulator query-reconstruction)
("citation_verdicts",
f"COPY (SELECT cv.* FROM citation_verdicts cv JOIN sessions s ON cv.session_id = s.id WHERE s.session_key = '{session}') TO STDOUT WITH CSV HEADER"),
# v6.8.6 T1: G5 certificate text (full markdown for regulator audit)
("citation_verification_certificate",
f"COPY (SELECT r.id, r.session_id, r.report_type, r.report_key, r.content, r.word_count, r.agent_type, r.created_at FROM reports r JOIN sessions s ON r.session_id = s.id WHERE s.session_key = '{session}' AND r.report_type = 'qa' AND r.report_key = 'citation-verification-certificate') TO STDOUT WITH CSV HEADER"),
]

# Range mode: filter by created_at on each table
Expand All @@ -58,6 +64,12 @@ def build_queries(since: str, until: str, session: str):
f"COPY (SELECT pseudonym_id, pii_type, created_at FROM pii_mappings WHERE created_at >= '{since}' AND created_at < '{until}'::DATE + 1) TO STDOUT WITH CSV HEADER"),
("source_writes",
f"COPY (SELECT * FROM source_writes WHERE created_at >= '{since}' AND created_at < '{until}'::DATE + 1) TO STDOUT WITH CSV HEADER"),
# v6.8.6 T1: G5 citation-verifier verdicts (regulator query-reconstruction)
("citation_verdicts",
f"COPY (SELECT * FROM citation_verdicts WHERE created_at >= '{since}' AND created_at < '{until}'::DATE + 1) TO STDOUT WITH CSV HEADER"),
# v6.8.6 T1: G5 certificate text (full markdown for regulator audit)
("citation_verification_certificate",
f"COPY (SELECT r.id, r.session_id, r.report_type, r.report_key, r.content, r.word_count, r.agent_type, r.created_at FROM reports r WHERE r.created_at >= '{since}' AND r.created_at < '{until}'::DATE + 1 AND r.report_type = 'qa' AND r.report_key = 'citation-verification-certificate') TO STDOUT WITH CSV HEADER"),
]


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
-- 015_citation-verdicts.down.sql
-- Revert T1 of G5 citation-verifier observability remediation.

DROP INDEX IF EXISTS idx_verdict_report;
DROP INDEX IF EXISTS idx_verdict_method;
DROP INDEX IF EXISTS idx_verdict_session_verdict;
DROP TABLE IF EXISTS citation_verdicts;
40 changes: 40 additions & 0 deletions super-legal-mcp-refactored/migrations/015_citation-verdicts.up.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
-- 015_citation-verdicts.up.sql
-- T1 of G5 citation-verifier observability remediation (v6.8.6).
-- Per-footnote verification verdicts parsed from citation-verification-certificate.md.
-- Mirrors the citation_source_links pattern (Wave 2): junction table referencing
-- reports(id), idempotent via UNIQUE (report_id, footnote_id), retention follows
-- parent report ON DELETE CASCADE.
--
-- Closes the regulator-facing gap: "which specific footnotes failed verification
-- in session X, and via which of the 7 verifier batches?" — answerable in SQL
-- without parsing markdown.

CREATE TABLE IF NOT EXISTS citation_verdicts (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
report_id UUID NOT NULL REFERENCES reports(id) ON DELETE CASCADE,
session_id UUID NOT NULL REFERENCES sessions(id) ON DELETE CASCADE,
footnote_id VARCHAR(100) NOT NULL, -- canonical "^N" form
footnote_row INTEGER, -- row from DETAILED VERIFICATION RESULTS table (nullable when fallback parse)
citation_text TEXT, -- snippet extracted from cert table (≤200 chars typical)
source_type VARCHAR(50), -- "SEC filing" | "case law" | "web article" | etc
verification_method VARCHAR(50), -- "Exa fetch" | "Exa search" | "WebFetch" | "WebSearch" | "MCP:lookup_citation" | etc
verdict VARCHAR(30) NOT NULL, -- CONFIRMED | UNCONFIRMED | ERROR | SKIP | PASS_WITH_NOTE | UNKNOWN
paywalled BOOLEAN DEFAULT FALSE, -- true when verdict=PASS_WITH_NOTE and notes mention paywall
notes TEXT, -- agent's notes column from the cert table
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW(),

UNIQUE (report_id, footnote_id) -- idempotent re-parse on certificate rewrite
);

-- Regulator query: "show all UNCONFIRMED footnotes in session X"
CREATE INDEX IF NOT EXISTS idx_verdict_session_verdict
ON citation_verdicts(session_id, verdict);

-- Per-method aggregation (Verification Summary table reconstruction)
CREATE INDEX IF NOT EXISTS idx_verdict_method
ON citation_verdicts(verification_method, verdict);

-- Per-report fetch
CREATE INDEX IF NOT EXISTS idx_verdict_report
ON citation_verdicts(report_id);
31 changes: 31 additions & 0 deletions super-legal-mcp-refactored/src/db/postgres.js
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,36 @@ const CODE_EXECUTIONS_DDL = `
CREATE INDEX IF NOT EXISTS idx_charts_execution ON code_execution_charts(execution_id);
`;

// v6.8.6 — T1 of G5 citation-verifier observability remediation.
// Per-footnote verdicts parsed from citation-verification-certificate.md and
// persisted by hookDBBridge.persistReport() fire-and-forget. Closes regulator
// gap: verdicts are now queryable in SQL rather than embedded in markdown.
// Mirrors citation_source_links junction-table pattern (Wave 2).
const CITATION_VERDICTS_DDL = `
CREATE TABLE IF NOT EXISTS citation_verdicts (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
report_id UUID NOT NULL REFERENCES reports(id) ON DELETE CASCADE,
session_id UUID NOT NULL REFERENCES sessions(id) ON DELETE CASCADE,
footnote_id VARCHAR(100) NOT NULL,
footnote_row INTEGER,
citation_text TEXT,
source_type VARCHAR(50),
verification_method VARCHAR(50),
verdict VARCHAR(30) NOT NULL,
paywalled BOOLEAN DEFAULT FALSE,
notes TEXT,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW(),
UNIQUE (report_id, footnote_id)
);
CREATE INDEX IF NOT EXISTS idx_verdict_session_verdict
ON citation_verdicts(session_id, verdict);
CREATE INDEX IF NOT EXISTS idx_verdict_method
ON citation_verdicts(verification_method, verdict);
CREATE INDEX IF NOT EXISTS idx_verdict_report
ON citation_verdicts(report_id);
`;

const EXECUTION_WAVES_DDL = `
CREATE TABLE IF NOT EXISTS execution_waves (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
Expand Down Expand Up @@ -1032,6 +1062,7 @@ export async function ensureHookSchema() {
await p.query(SESSION_STATUS_HISTORY_DDL);
await p.query(AGENT_STATES_EXTENSIONS_DDL);
await p.query(CODE_EXECUTIONS_DDL);
await p.query(CITATION_VERDICTS_DDL); // v6.8.6 T1: G5 verdict persistence
await p.query(EXECUTION_WAVES_DDL);
await p.query(PHASE_SUMMARY_DDL);
await p.query(SECTIONS_DDL);
Expand Down
74 changes: 73 additions & 1 deletion super-legal-mcp-refactored/src/server/dbFrontendRouter.js
Original file line number Diff line number Diff line change
Expand Up @@ -1287,6 +1287,58 @@ export function createDbFrontendRouter() {
[session.id]
).catch(() => ({ rows: [] })); // graceful if table missing on stale schemas

// v6.8.6 T1: G5 citation-verifier certificate + verdicts (regulator-facing).
// Certificate content stored in reports table; per-footnote verdicts in
// citation_verdicts. Both gracefully fall back to null/[] on stale schemas.
const { rows: certificate } = await pool.query(
`SELECT r.id AS report_id, r.content AS certificate_text, r.created_at, r.word_count, r.agent_type
FROM reports r
WHERE r.session_id = $1
AND r.report_type = 'qa'
AND r.report_key = 'citation-verification-certificate'
LIMIT 1`,
[session.id]
).catch(() => ({ rows: [] }));

const { rows: verdictRows } = await pool.query(
`SELECT cv.footnote_id, cv.footnote_row, cv.citation_text,
cv.source_type, cv.verification_method, cv.verdict,
cv.paywalled, cv.notes, cv.created_at
FROM citation_verdicts cv
WHERE cv.session_id = $1
ORDER BY
CASE cv.verdict WHEN 'UNCONFIRMED' THEN 1 WHEN 'ERROR' THEN 2
WHEN 'PASS_WITH_NOTE' THEN 3 WHEN 'CONFIRMED' THEN 4
ELSE 5 END,
cv.footnote_id`,
[session.id]
).catch(() => ({ rows: [] }));

const verdictSummary = verdictRows.reduce((acc, row) => {
acc.total++;
if (row.verdict === 'CONFIRMED') acc.confirmed++;
else if (row.verdict === 'UNCONFIRMED') acc.unconfirmed++;
else if (row.verdict === 'ERROR') acc.errors++;
else if (row.verdict === 'SKIP') acc.skipped++;
else if (row.verdict === 'PASS_WITH_NOTE') acc.pass_with_note++;
if (row.paywalled) acc.paywalled++;
return acc;
}, { total: 0, confirmed: 0, unconfirmed: 0, errors: 0, skipped: 0, pass_with_note: 0, paywalled: 0 });
verdictSummary.confirmation_rate = verdictSummary.total > 0
? Number(((verdictSummary.confirmed + verdictSummary.pass_with_note) / verdictSummary.total).toFixed(4))
: null;

const citationVerificationCertificate = certificate.length > 0
? {
report_id: certificate[0].report_id,
agent_type: certificate[0].agent_type,
certificate_text: certificate[0].certificate_text,
created_at: certificate[0].created_at,
word_count: certificate[0].word_count,
verdict_summary: verdictSummary,
}
: null;

// Human interventions (Wave 3) — includes any approvals/edits
const { rows: interventions } = await pool.query(
`SELECT intervention_type, reviewer_id, reason, created_at
Expand Down Expand Up @@ -1314,12 +1366,32 @@ export function createDbFrontendRouter() {
code_executions: executions,
bridge_metadata: bridgeMetadata,
citations,
citation_verification_certificate: citationVerificationCertificate,
citation_verdicts: verdictRows,
human_interventions: interventions,
access_log: accessLog,
generated_at: new Date().toISOString(),
report_version: '1.0',
report_version: '1.1', // v6.8.6: + citation_verification_certificate + citation_verdicts
};

// v6.8.6 T1: log certificate access for Wave 3 Art. 12/13 audit trail.
// Fire-and-forget; non-fatal if access_log table missing.
if (citationVerificationCertificate) {
setImmediate(async () => {
try {
await pool.query(
`INSERT INTO access_log (session_id, resource_type, resource_key, requester, purpose_code)
VALUES ($1, $2, $3, $4, $5)`,
[session.id, 'certificate', 'citation_verification_certificate',
req.user?.id || 'anonymous', req.query?.purpose || 'regulator_audit'],
);
} catch (err) {
// Silently ignore — access_log is best-effort, not a hot-path blocker.
void err;
}
});
}

// Format selection
if (req.query.format === 'csv') {
// Minimal CSV — flatten code_executions table only (richest single
Expand Down
Loading