diff --git a/__tests__/unit/ai-review-prompt.test.js b/__tests__/unit/ai-review-prompt.test.js index 727c338..0d5ee44 100644 --- a/__tests__/unit/ai-review-prompt.test.js +++ b/__tests__/unit/ai-review-prompt.test.js @@ -278,7 +278,7 @@ describe('renderReviewMarkdown', () => { }, 42, 'abc1234', meta ); - expect(out).toContain('AI Code Review for PR #42'); + expect(out).toContain('Automated review for PR #42'); expect(out).toContain('๐Ÿ’ฌ Comment'); expect(out).toContain('Adds X.'); expect(out).toContain('`a.js:5`'); @@ -300,6 +300,56 @@ describe('renderReviewMarkdown', () => { expect(out).not.toContain('โœ… Approve'); }); + it('renders the new "Automated review" header with the HTML supersede marker', () => { + const out = renderReviewMarkdown( + { + verdict: 'comment', + summary: 'x', + findings: [{ file: 'a.js', line: 1, quoted_line: '+x', claim: 'no test for x at a.js:1' }] + }, + 99, 'def5678', meta + ); + expect(out).toContain(''); + expect(out).toContain('## Automated review for PR #99'); + expect(out).not.toContain('## AI Code Review'); + }); + + it('shows finding-source breakdown footer (mechanical vs AI)', () => { + const out = renderReviewMarkdown( + { + verdict: 'comment', + summary: 'mixed.', + findings: [ + { source: 'oracle:rivet-validate', severity: 'warning', artifact_id: 'REQ-001', claim: 'REQ-001: foo' }, + { file: 'a.js', line: 1, quoted_line: '+x', claim: 'no test for x at a.js:1' } + ] + }, + 99, 'def5678', meta + ); + expect(out).toMatch(/_Findings: 1 mechanical \(rivet\) ยท 1 from local AI model\._/); + }); + + it('renders grouped oracle finding using artifact_ids array', () => { + const out = renderReviewMarkdown( + { + verdict: 'comment', + summary: 'group.', + findings: [ + { + source: 'oracle:rivet-validate', + severity: 'warning', + artifact_id: '3 artifacts', + artifact_ids: ['REQ-1', 'REQ-2', 'REQ-3'], + claim: 'no downstream โ€” affecting: REQ-1, REQ-2, REQ-3' + } + ] + }, + 99, 'def5678', meta + ); + expect(out).toContain('๐ŸŸก `3 artifacts`'); + expect(out).toContain('affecting: REQ-1, REQ-2, REQ-3'); + }); + it('renders oracle findings with severity badge and artifact_id (not file:line)', () => { const out = renderReviewMarkdown( { diff --git a/__tests__/unit/rivet-oracle.test.js b/__tests__/unit/rivet-oracle.test.js index 70efcd7..89b2120 100644 --- a/__tests__/unit/rivet-oracle.test.js +++ b/__tests__/unit/rivet-oracle.test.js @@ -10,7 +10,10 @@ import { isRivetProject, runRivetValidate, runRivetImpact, - runRivetOracle + runRivetOracle, + subtractFindings, + groupOracleFindings, + findingKey } from '../../src/rivet-oracle.js'; function makeRunnerOk(stdout) { @@ -201,6 +204,108 @@ describe('runRivetImpact', () => { }); }); +describe('findingKey', () => { + it('combines source, artifact_id, and message', () => { + const f = { + source: 'oracle:rivet-validate', + artifact_id: 'REQ-001', + message: 'missing field' + }; + expect(findingKey(f)).toBe('oracle:rivet-validate|REQ-001|missing field'); + }); + + it('falls back to claim when message is absent', () => { + const f = { source: 'oracle:rivet-validate', artifact_id: 'X', claim: 'X: foo' }; + expect(findingKey(f)).toContain('X: foo'); + }); + + it('returns empty string for nullish input', () => { + expect(findingKey(null)).toBe(''); + expect(findingKey(undefined)).toBe(''); + }); +}); + +describe('subtractFindings', () => { + const base = [ + { source: 'oracle:rivet-validate', artifact_id: 'REQ-001', message: 'no downstream', severity: 'warning' }, + { source: 'oracle:rivet-validate', artifact_id: 'REQ-002', message: 'no downstream', severity: 'warning' } + ]; + + it('drops findings already present at base', () => { + const head = [...base, { source: 'oracle:rivet-validate', artifact_id: 'REQ-003', message: 'no downstream', severity: 'warning' }]; + const delta = subtractFindings(head, base); + expect(delta).toHaveLength(1); + expect(delta[0].artifact_id).toBe('REQ-003'); + }); + + it('keeps everything when base is empty', () => { + expect(subtractFindings(base, [])).toEqual(base); + expect(subtractFindings(base, undefined)).toEqual(base); + }); + + it('returns [] when head โІ base (no new findings = correct silence)', () => { + expect(subtractFindings(base, base)).toEqual([]); + }); + + it('treats same artifact + same message but different severity as the same finding', () => { + const head = [{ source: 'oracle:rivet-validate', artifact_id: 'REQ-001', message: 'no downstream', severity: 'error' }]; + expect(subtractFindings(head, base)).toEqual([]); + }); +}); + +describe('groupOracleFindings', () => { + it('collapses identical messages into one finding with artifact_ids list', () => { + const findings = [ + { source: 'oracle:rivet-validate', severity: 'warning', artifact_id: 'REQ-001', message: 'no downstream' }, + { source: 'oracle:rivet-validate', severity: 'warning', artifact_id: 'REQ-002', message: 'no downstream' }, + { source: 'oracle:rivet-validate', severity: 'warning', artifact_id: 'REQ-003', message: 'no downstream' } + ]; + const grouped = groupOracleFindings(findings); + expect(grouped).toHaveLength(1); + expect(grouped[0].artifact_ids).toEqual(['REQ-001', 'REQ-002', 'REQ-003']); + expect(grouped[0].claim).toContain('REQ-001'); + expect(grouped[0].claim).toContain('REQ-002'); + expect(grouped[0].artifact_id).toBe('3 artifacts'); + }); + + it('keeps singletons in their original shape', () => { + const findings = [ + { source: 'oracle:rivet-validate', severity: 'error', artifact_id: 'REQ-001', message: 'broken ref' } + ]; + const grouped = groupOracleFindings(findings); + expect(grouped).toHaveLength(1); + expect(grouped[0].artifact_id).toBe('REQ-001'); + expect(grouped[0].artifact_ids).toBeUndefined(); + }); + + it('keeps different severities as separate groups even with same message', () => { + const findings = [ + { source: 'oracle:rivet-validate', severity: 'warning', artifact_id: 'A', message: 'foo' }, + { source: 'oracle:rivet-validate', severity: 'error', artifact_id: 'B', message: 'foo' } + ]; + expect(groupOracleFindings(findings)).toHaveLength(2); + }); + + it('passes through model findings without grouping', () => { + const findings = [ + { file: 'a.js', line: 1, quoted_line: '+x', claim: 'no test' }, + { file: 'b.js', line: 1, quoted_line: '+y', claim: 'no test' } + ]; + expect(groupOracleFindings(findings)).toHaveLength(2); + }); + + it('truncates the displayed id list at maxIdsShown', () => { + const ids = Array.from({ length: 25 }, (_, i) => `REQ-${String(i + 1).padStart(3, '0')}`); + const findings = ids.map((id) => ({ + source: 'oracle:rivet-validate', severity: 'warning', artifact_id: id, message: 'shared' + })); + const grouped = groupOracleFindings(findings, { maxIdsShown: 5 }); + expect(grouped).toHaveLength(1); + expect(grouped[0].artifact_ids).toHaveLength(25); + expect(grouped[0].claim).toMatch(/\(\+20 more\)/); + }); +}); + describe('runRivetOracle', () => { let tmpDir; beforeEach(() => { diff --git a/src/ai-review-prompt.js b/src/ai-review-prompt.js index b5d5807..50f3dcc 100644 --- a/src/ai-review-prompt.js +++ b/src/ai-review-prompt.js @@ -250,8 +250,16 @@ export function renderReviewMarkdown(parsed, prNumber, headSha, meta, opts = {}) request_changes: '๐Ÿ”ด Request changes' }[verdict]; + const oracleCount = findings.filter( + (f) => typeof f?.source === 'string' && f.source.startsWith('oracle:') + ).length; + const modelCount = findings.length - oracleCount; + const lines = []; - lines.push(`## AI Code Review for PR #${prNumber}`); + // HTML marker first so supersedePreviousReviews can identify our comments + // even if the visible header changes. + lines.push(''); + lines.push(`## Automated review for PR #${prNumber}`); if (meta?.headRepo && meta?.baseBranch && meta?.headBranch && meta?.baseRepo) { lines.push(`${meta.headRepo}:\`${meta.headBranch}\` โ†’ ${meta.baseRepo}:\`${meta.baseBranch}\``); } @@ -260,6 +268,10 @@ export function renderReviewMarkdown(parsed, prNumber, headSha, meta, opts = {}) lines.push(''); lines.push(`**Summary:** ${parsed.summary || '(none)'}`); lines.push(''); + lines.push( + `_Findings: ${oracleCount} mechanical (rivet) ยท ${modelCount} from local AI model._` + ); + lines.push(''); if (findings.length === 0) { lines.push('**Findings:** None.'); @@ -268,13 +280,18 @@ export function renderReviewMarkdown(parsed, prNumber, headSha, meta, opts = {}) lines.push(''); findings.forEach((f, i) => { if (typeof f.source === 'string' && f.source.startsWith('oracle:')) { - // Oracle finding: artifact-anchored, mechanically validated. + // Oracle finding: artifact-anchored, mechanically validated. When + // findings are grouped, `artifact_ids` is a multi-element array; + // when single, it falls back to `artifact_id`. const sevBadge = f.severity === 'error' ? '๐Ÿ”ด' : f.severity === 'warning' ? '๐ŸŸก' : 'โ„น๏ธ'; const oracleName = f.source.replace('oracle:', ''); - lines.push(`${i + 1}. ${sevBadge} \`${f.artifact_id || ''}\` _(${oracleName})_`); + const label = Array.isArray(f.artifact_ids) && f.artifact_ids.length > 1 + ? `${f.artifact_ids.length} artifacts` + : f.artifact_id || ''; + lines.push(`${i + 1}. ${sevBadge} \`${label}\` _(${oracleName})_`); lines.push(` ${f.claim}`); } else { // Model finding: file:line-anchored, post-validated. diff --git a/src/ai-review.js b/src/ai-review.js index 9f2d193..3307261 100644 --- a/src/ai-review.js +++ b/src/ai-review.js @@ -11,7 +11,12 @@ import { renderReviewMarkdown, buildResponseFormat } from './ai-review-prompt.js'; -import { runRivetOracle } from './rivet-oracle.js'; +import { + runRivetOracle, + runRivetValidate, + subtractFindings, + groupOracleFindings +} from './rivet-oracle.js'; import { hasRivetYaml, withTempRepoCheckout } from './rivet-fetch.js'; const _reviewTimestamps = new Map(); @@ -270,6 +275,11 @@ async function callLocalAI(endpoint, model, systemPrompt, userPrompt, options = } } +// Stable HTML marker for supersedePreviousReviews. Markdown renders it as +// nothing visible but a substring search finds it reliably regardless of +// header text changes. The legacy 'AI Code Review' string also matches old +// comments still in the wild from before the rename. +const REVIEW_MARKER = ''; const AI_REVIEW_SIGNATURE = 'AI Code Review'; function formatReviewComment(aiResponse, prNumber, headSha, meta) { @@ -323,7 +333,7 @@ async function supersedePreviousReviews(octokit, owner, repo, prNumber) { for (const comment of comments) { if ( comment.body && - comment.body.includes(AI_REVIEW_SIGNATURE) && + (comment.body.includes(REVIEW_MARKER) || comment.body.includes(AI_REVIEW_SIGNATURE)) && !comment.body.startsWith('>') ) { await octokit.request( @@ -415,12 +425,50 @@ async function reviewPullRequest(octokit, owner, repo, prNumber) { timeout: rivetCfg.timeout_ms || 60000 }) ); - if (oracleSummary?.findings) { - oracleFindings = oracleSummary.findings; + let headOracleFindings = oracleSummary?.findings || []; + + // Delta filter: subtract findings already present at the PR base. + // Without this, the review surfaces the entire repo backlog every + // time (see spar#175 โ€” 91 pre-existing warnings drowning the diff). + let baseValidate = null; + if (baseSha && baseSha !== headSha) { + try { + baseValidate = await withTempRepoCheckout( + octokit, + owner, + repo, + baseSha, + (repoPath) => runRivetValidate(resolvedBinary, repoPath, { + timeout: rivetCfg.timeout_ms || 60000 + }) + ); + } catch (err) { + getLogger().warn( + { pr: prNumber, err: err.message }, + 'base-ref validate failed โ€” falling back to head-only oracle findings' + ); + } } + const droppedCount = headOracleFindings.length; + headOracleFindings = subtractFindings( + headOracleFindings, + baseValidate?.findings || [] + ); + const baseDelta = droppedCount - headOracleFindings.length; + + // Group identical messages so 50 "Every requirement should be + // satisfied by ..." diagnostics collapse into one finding listing + // the affected artifact ids. + oracleFindings = groupOracleFindings(headOracleFindings); + getLogger().info( - { pr: prNumber, oracleFindings: oracleFindings.length }, - 'Rivet oracle complete' + { + pr: prNumber, + oracleFindings: oracleFindings.length, + droppedAsPreExisting: baseDelta, + afterGrouping: oracleFindings.length + }, + 'Rivet oracle complete (delta + grouped)' ); } } catch (err) { diff --git a/src/rivet-oracle.js b/src/rivet-oracle.js index bf960dc..05e9281 100644 --- a/src/rivet-oracle.js +++ b/src/rivet-oracle.js @@ -67,6 +67,10 @@ function diagnosticToFinding(diag) { source: 'oracle:rivet-validate', severity: diag.severity, artifact_id: diag.artifact_id, + // `message` is stored separately so we can group findings by message text. + // `claim` is the rendered form used by the legacy code path that doesn't + // group (kept for back-compat โ€” tests assert on it). + message: diag.message, claim: `${diag.artifact_id}: ${diag.message}` }; } @@ -79,11 +83,13 @@ function gapToFinding(gap) { if (!gap || typeof gap !== 'object') return null; const id = gap.artifact_id || gap.id; if (!id) return null; + const reason = gap.reason || gap.message || 'lifecycle coverage gap: no downstream artifacts'; return { source: 'oracle:rivet-validate', severity: 'warning', artifact_id: id, - claim: `${id} (${gap.type || 'artifact'}) โ€” ${gap.reason || gap.message || 'lifecycle coverage gap: no downstream artifacts'}` + message: reason, + claim: `${id} (${gap.type || 'artifact'}) โ€” ${reason}` }; } @@ -93,10 +99,12 @@ function gapToFinding(gap) { function brokenCrossRefToFinding(ref) { const from = ref?.from || ref?.source || 'unknown'; const to = ref?.to || ref?.target || 'unknown'; + const message = `broken cross-reference โ†’ ${to}${ref?.reason ? ` (${ref.reason})` : ''}`; return { source: 'oracle:rivet-validate', severity: 'error', artifact_id: from, + message, claim: `Broken cross-reference: ${from} โ†’ ${to}${ref?.reason ? ` (${ref.reason})` : ''}` }; } @@ -234,6 +242,93 @@ export async function runRivetImpact(binary, repoPath, baseRef, opts = {}) { }; } +/** + * Stable identity key for a finding. Used by `subtractFindings` so the + * "is this finding new?" question is decidable without false-positive churn. + * + * The key intentionally excludes claim text (which is rendered) and severity + * upgrades (a warning at base that becomes an error at head is treated as + * "the same finding", since it's the same artifact + same message). + */ +export function findingKey(f) { + if (!f) return ''; + return `${f.source || ''}|${f.artifact_id || ''}|${f.message || f.claim || ''}`; +} + +/** + * Set difference: keep only findings present at HEAD but not at BASE. + * The whole point of PR review is "what did this PR change", not "what's + * the static state of the project". Pre-existing diagnostics get filtered. + */ +export function subtractFindings(headFindings, baseFindings) { + if (!Array.isArray(baseFindings) || baseFindings.length === 0) return headFindings; + const baseKeys = new Set(baseFindings.map(findingKey)); + return headFindings.filter((f) => !baseKeys.has(findingKey(f))); +} + +/** + * Collapse oracle findings that share the same source + severity + message + * into a single finding listing all affected artifact_ids. Today's spar#175 + * review had 91 findings with 6 unique messages โ€” this turns that into 6 + * grouped findings with id lists. + * + * Non-oracle findings (model output) are passed through untouched. + */ +export function groupOracleFindings(findings, opts = {}) { + const { maxIdsShown = 10 } = opts; + if (!Array.isArray(findings) || findings.length === 0) return []; + + const groups = new Map(); + const out = []; + + for (const f of findings) { + if (!f.source || !f.source.startsWith('oracle:')) { + out.push(f); + continue; + } + if (!f.message) { + out.push(f); + continue; + } + const key = `${f.source}|${f.severity}|${f.message}`; + if (!groups.has(key)) { + groups.set(key, { + source: f.source, + severity: f.severity, + message: f.message, + artifact_ids: [] + }); + } + groups.get(key).artifact_ids.push(f.artifact_id); + } + + for (const g of groups.values()) { + if (g.artifact_ids.length === 1) { + out.push({ + source: g.source, + severity: g.severity, + artifact_id: g.artifact_ids[0], + message: g.message, + claim: `${g.artifact_ids[0]}: ${g.message}` + }); + } else { + const shown = g.artifact_ids.slice(0, maxIdsShown).join(', '); + const more = g.artifact_ids.length > maxIdsShown + ? ` (+${g.artifact_ids.length - maxIdsShown} more)` + : ''; + out.push({ + source: g.source, + severity: g.severity, + artifact_id: `${g.artifact_ids.length} artifacts`, + artifact_ids: g.artifact_ids, + message: g.message, + claim: `${g.message} โ€” affecting: ${shown}${more}` + }); + } + } + return out; +} + /** * One-shot helper: run both validate and impact (when baseRef given), merge * findings, return a single result. Errors from either are non-fatal โ€”