Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
178 changes: 147 additions & 31 deletions review/agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,13 @@
import {
defineAgent,
encodeSegment,
listJsonFiles,
readJsonFile,
resolveMountRoot,
type IntegrationClientOptions,
type WorkforceCtx
} from '@agentworkforce/runtime';
import { githubClient, slackClient } from '@relayfile/relay-helpers';
import { execFile } from 'node:child_process';
import { promisify } from 'node:util';

const execFileAsync = promisify(execFile);

export interface Pr {
owner: string;
Expand Down Expand Up @@ -516,19 +513,10 @@ export function reviewHarnessPrompt(pr: { owner: string; repo: string; number: n

async function verifyReadyForHumanReview(ctx: WorkforceCtx, pr: Pr): Promise<boolean> {
try {
const { stdout } = await execFileAsync('gh', [
'pr',
'view',
String(pr.number),
'--repo',
`${pr.owner}/${pr.repo}`,
'--json',
'state,mergeable,mergeStateStatus,reviewDecision,statusCheckRollup,headRefOid',
], { cwd: ctx.sandbox.cwd, encoding: 'utf8', maxBuffer: 1024 * 1024 });
const state = parsePrReadyState(stdout);
// Populate the head SHA from GitHub's authoritative current head. Some
// webhook payloads (e.g. check_run.completed) don't carry it, and without a
// SHA the ready-announce dedupe can't key on the commit and would re-ping.
const state = await readPrReviewState(pr);
// Populate the head SHA from the adapter's projected head. Some webhook
// payloads (e.g. check_run.completed) don't carry it, and without a SHA the
// ready-announce dedupe can't key on the commit and would re-ping.
if (typeof state.headRefOid === 'string' && state.headRefOid.trim()) {
pr.headSha = state.headRefOid.trim();
}
Expand Down Expand Up @@ -567,22 +555,150 @@ interface PullRequestReadyState {
url?: unknown;
}

function parsePrReadyState(stdout: string): PullRequestReadyState {
const parsed = JSON.parse(stdout) as unknown;
return parsed && typeof parsed === 'object' ? parsed as PullRequestReadyState : {};
async function readMergeOnGreenState(_ctx: WorkforceCtx, pr: Pr): Promise<PullRequestReadyState> {
return readPrReviewState(pr);
}

// ── PR review state from the GitHub VFS (no gh CLI) ──────────────────────────
// `gh pr view` is unavailable here: the sandbox snapshot ships no gh binary, and
// the harness never shells to git/gh (the cloud writeback commits file edits).
// Rebuild the same PullRequestReadyState the gates consume from the GitHub
// adapter's VFS projection instead:
// • pulls/{n}/meta.json → state, draft, labels, head.sha
// • pulls/{n}/checks/_summary.json → the adapter's aggregated CI rollup
// • pulls/{n}/reviews/*.json → the latest review per author
// The adapter does NOT project mergeability (GitHub computes it asynchronously
// and it isn't ingested), so the merge-conflict check is delegated to the merge
// API: mergePullRequest() returns merged:false on a dirty PR and mergePr throws
// rather than land it. Every read fails CLOSED — a missing/empty projection
// yields a state the gates treat as not-ready / pending, never a green light.
async function readPrReviewState(pr: Pr): Promise<PullRequestReadyState> {
const [meta, checks, reviews] = await Promise.all([
loadPrMeta(pr),
loadCheckSummary(pr),
loadReviews(pr),
]);
const headSha = readMetaHeadSha(meta);
return {
state: typeof meta?.state === 'string' ? meta.state : undefined,
isDraft: meta?.draft === true,
// The VFS carries no mergeability field (the adapter doesn't project it), so
// default to MERGEABLE rather than gate on a value we can't read — otherwise
// the ready/merge gates could never pass at all. Safety rests on the merge
// API, not this field: a conflicted PR can NEVER be auto-merged, because
// mergePr throws when GitHub rejects the merge (merged:false). The residual
// is cosmetic — a conflicted PR may still be pinged as "ready for human
// review" (the ready path doesn't hit the merge API); a human resolving the
// conflict at review time absorbs that. True conflict-awareness needs the
// github adapter to project mergeable/mergeStateStatus (tracked alongside the
// cloud credential/projection work).
mergeable: 'MERGEABLE',

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Keep merge-conflict checks fail-closed

When the VFS state is used for a PR that currently has merge conflicts, this hard-coded MERGEABLE value makes both prReadyStateAllowsHumanReview and evaluateMergeOnGreenState treat the PR as conflict-free once labels/checks/reviews are otherwise green. The merge API may still reject merge-on-green later, but the READY path does not call that API, so a conflicted PR can incorrectly ping Slack as ready for human review; merge-on-green also attempts a merge instead of holding with a conflict reason.

Useful? React with 👍 / 👎.

// Drafts surface via isDraft; mirror onto mergeStateStatus too so the ready
// gate (which keys on DRAFT) still holds a draft back.
...(meta?.draft === true ? { mergeStateStatus: 'DRAFT' } : {}),
labels: meta?.labels,
statusCheckRollup: rollupFromCheckSummary(checks),
latestReviews: reviews,
reviewDecision: deriveReviewDecision(reviews),
...(headSha ? { headRefOid: headSha } : {}),
};
}

/** The adapter's per-PR aggregated check status at `…/pulls/{n}/checks/_summary.json`. */
interface CheckSummary {
total?: number;
passed?: number;
failed?: number;
pending?: number;
conclusion?: string;
}

async function readMergeOnGreenState(ctx: WorkforceCtx, pr: Pr): Promise<PullRequestReadyState> {
const { stdout } = await execFileAsync('gh', [
'pr',
'view',
String(pr.number),
'--repo',
`${pr.owner}/${pr.repo}`,
'--json',
'state,isDraft,labels,mergeable,mergeStateStatus,reviewRequests,latestReviews,statusCheckRollup,headRefOid,url',
], { cwd: ctx.sandbox.cwd, encoding: 'utf8', maxBuffer: 1024 * 1024 });
return parsePrReadyState(stdout);
async function loadCheckSummary(pr: Pr): Promise<CheckSummary | undefined> {
try {
return await readJsonFile<CheckSummary>(
vfsClient(),
'github',
'getChecks',
`/github/repos/${encodeSegment(pr.owner)}/${encodeSegment(pr.repo)}/pulls/${pr.number}/checks/_summary.json`
);
} catch {
return undefined;
}
}

async function loadReviews(pr: Pr): Promise<Array<Record<string, unknown>>> {
try {
const files = await listJsonFiles<Record<string, unknown>>(
vfsClient(),
'github',
'listReviews',
`/github/repos/${encodeSegment(pr.owner)}/${encodeSegment(pr.repo)}/pulls/${pr.number}/reviews`
);
// Guard each entry: a malformed file must not throw out of the map and blank
// the whole review set (the catch below would return []), which would drop an
// active CHANGES_REQUESTED review and could let a blocked PR merge.
return Array.isArray(files)
? files
.map((file) => file?.value)
.filter((v): v is Record<string, unknown> => v !== null && typeof v === 'object')
: [];
} catch {
return [];
}
}

/** The PR head SHA from the meta.json projection's `head: { sha }`. */
function readMetaHeadSha(meta: PrMeta | undefined): string | undefined {
const head = meta?.head;
if (head && typeof head === 'object' && typeof (head as { sha?: unknown }).sha === 'string') {
return ((head as { sha: string }).sha).trim() || undefined;
}
return undefined;
}

/**
* Turn the adapter's aggregated check counts into the statusCheckRollup shape
* the gate evaluators already understand. The decision keys on the counts, not
* on per-check files (which can linger from a prior head), so the gate matches
* GitHub's own "is everything done and green" verdict:
* • no checks ingested yet (total 0 / missing) → empty rollup → the evaluators
* fall through to mergeStateStatus, which we never report CLEAN → HOLD.
* • any failing check → a FAILURE entry → blocked.
* • any pending check → an IN_PROGRESS entry → pending.
* • all complete+passing → a single SUCCESS entry → green.
*/
export function rollupFromCheckSummary(summary: CheckSummary | undefined): Array<Record<string, unknown>> {
const failed = typeof summary?.failed === 'number' ? summary.failed : 0;
const pending = typeof summary?.pending === 'number' ? summary.pending : 0;
const passed = typeof summary?.passed === 'number' ? summary.passed : 0;
// Fall back to the component counts when `total` is missing/malformed, so a
// summary that carries failed/pending/passed but no `total` still reports the
// real check states instead of being treated as "no checks" and held generically.
const total = typeof summary?.total === 'number' ? summary.total : failed + pending + passed;
if (!summary || total === 0) return [];
const rollup: Array<Record<string, unknown>> = [];
if (failed > 0) {
rollup.push({ name: `${failed} failing check${failed === 1 ? '' : 's'}`, status: 'COMPLETED', conclusion: 'FAILURE' });
}
if (pending > 0) {
rollup.push({ name: `${pending} pending check${pending === 1 ? '' : 's'}`, status: 'IN_PROGRESS', conclusion: null });
}
if (failed === 0 && pending === 0) {
rollup.push({ name: `${passed} check${passed === 1 ? '' : 's'}`, status: 'COMPLETED', conclusion: 'SUCCESS' });
}
return rollup;
}

/**
* Derive gh's `reviewDecision` from the projected reviews: if any author's
* latest review requested changes, the PR isn't the human's turn. Mirrors the
* one reviewDecision value the ready gate keys on (CHANGES_REQUESTED).
*/
export function deriveReviewDecision(reviews: Array<Record<string, unknown>>): string | undefined {
for (const [, review] of latestReviewsByAuthor(reviews)) {
if (normalizeState(review.state) === 'CHANGES_REQUESTED') return 'CHANGES_REQUESTED';
}
return undefined;
}

/** Whether the PR is still open. A missing `state` is treated as open so the
Expand Down
60 changes: 60 additions & 0 deletions tests/review-agent.test.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import { parseIntegrations } from '@agentworkforce/persona-kit';

import {
announceReadyOnce,
deriveReviewDecision,
evaluateMergeOnGreenState,
handleSlackMergeRequest,
labelNames,
Expand All @@ -15,6 +16,7 @@ import {
resolveAuthorLogin,
reviewHarnessPrompt,
reviewAuthorAllowlistDecision,
rollupFromCheckSummary,
} from '../.test-build/review/agent.js';

test('reviewAuthorAllowlistDecision lets configured authors through', () => {
Expand Down Expand Up @@ -199,6 +201,64 @@ test('evaluateMergeOnGreenState requires label, green checks, and requested bot
}).outcome, 'blocked');
});

test('rollupFromCheckSummary maps the adapter check summary to gate-ready rollups', () => {
// No checks ingested yet (missing / total 0) → empty rollup. The gates then
// fall through to mergeStateStatus (which the VFS path never reports CLEAN),
// so the PR HOLDS instead of going green on absent CI.
assert.deepEqual(rollupFromCheckSummary(undefined), []);
assert.deepEqual(rollupFromCheckSummary({ total: 0, passed: 0, failed: 0, pending: 0 }), []);

// All complete and passing → one SUCCESS entry the evaluators read as green.
const green = rollupFromCheckSummary({ total: 3, passed: 3, failed: 0, pending: 0 });
assert.equal(evaluateMergeOnGreenState({
state: 'OPEN', isDraft: false, mergeable: 'MERGEABLE',
labels: [{ name: 'merge-on-green' }], statusCheckRollup: green,
}).outcome, 'ready');

// A pending check → IN_PROGRESS → the merge-on-green gate stays pending.
const pending = rollupFromCheckSummary({ total: 2, passed: 1, failed: 0, pending: 1 });
assert.equal(evaluateMergeOnGreenState({
state: 'OPEN', isDraft: false, mergeable: 'MERGEABLE',
labels: [{ name: 'merge-on-green' }], statusCheckRollup: pending,
}).outcome, 'pending');

// A failing check → FAILURE → blocked.
const failing = rollupFromCheckSummary({ total: 2, passed: 1, failed: 1, pending: 0 });
assert.equal(evaluateMergeOnGreenState({
state: 'OPEN', isDraft: false, mergeable: 'MERGEABLE',
labels: [{ name: 'merge-on-green' }], statusCheckRollup: failing,
}).outcome, 'blocked');

// Green checks also satisfy the human-review ready gate.
assert.equal(prReadyStateAllowsHumanReview({
state: 'OPEN', mergeable: 'MERGEABLE', statusCheckRollup: green,
}), true);

// `total` missing but component counts present → derive total from the counts
// so a failing check still blocks (not treated as "no checks reported").
assert.equal(evaluateMergeOnGreenState({
state: 'OPEN', isDraft: false, mergeable: 'MERGEABLE', labels: [{ name: 'merge-on-green' }],
statusCheckRollup: rollupFromCheckSummary({ failed: 1, pending: 0, passed: 2 }),
}).outcome, 'blocked');
});

test('deriveReviewDecision flags CHANGES_REQUESTED from the latest review per author', () => {
// No reviews → undefined (not blocking).
assert.equal(deriveReviewDecision([]), undefined);

// A later APPROVED supersedes an earlier CHANGES_REQUESTED from the same author.
assert.equal(deriveReviewDecision([
{ author: { login: 'coderabbitai[bot]' }, state: 'CHANGES_REQUESTED', submitted_at: '2026-06-10T00:00:00Z' },
{ author: { login: 'coderabbitai[bot]' }, state: 'APPROVED', submitted_at: '2026-06-11T00:00:00Z' },
]), undefined);

// An outstanding CHANGES_REQUESTED (the author's latest) blocks.
assert.equal(deriveReviewDecision([
{ author: { login: 'willwashburn' }, state: 'APPROVED', submitted_at: '2026-06-10T00:00:00Z' },
{ author: { login: 'coderabbitai[bot]' }, state: 'CHANGES_REQUESTED', submitted_at: '2026-06-11T00:00:00Z' },
]), 'CHANGES_REQUESTED');
});

test('parseSlackMergeRequest extracts a GitHub PR URL from merge requests', () => {
assert.deepEqual(parseSlackMergeRequest('<@Ubot> please merge https://github.com/AgentWorkforce/relayfile-adapters/pull/158'), {
pr: {
Expand Down