Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 90 additions & 4 deletions review/agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ import {
type WorkforceCtx
} from '@agentworkforce/runtime';
import { githubClient, slackClient } from '@relayfile/relay-helpers';
import { execFile } from 'node:child_process';
import { promisify } from 'node:util';

const execFileAsync = promisify(execFile);

export interface Pr {
owner: string;
Expand Down Expand Up @@ -216,14 +220,15 @@ async function reviewAndFix(ctx: WorkforceCtx, pr: Pr): Promise<void> {
// READY sentinel (it's the slack/ready signal, not a review-body line) and
// post whatever's left as a PR comment via the github VFS.
const raw = (run.output ?? '').trimEnd();
const ready = lastLine(raw) === 'READY';
const body = ready ? stripLastLine(raw).trimEnd() : raw;
const harnessReady = lastLine(raw) === 'READY';
const body = harnessReady ? stripLastLine(raw).trimEnd() : raw;
if (!body) {
await failReviewRun(ctx, pr, 'The review harness produced no review output.');
}
if (body) {
await githubClient().comment({ owner: pr.owner, repo: pr.repo, number: pr.number }, body);
}
const ready = harnessReady ? await verifyReadyForHumanReview(ctx, pr) : false;

// Only ping Slack when the PR is actually a human's turn: checks green, all
// bot/reviewer comments resolved, nothing left for the agent to fix (the
Expand Down Expand Up @@ -264,11 +269,92 @@ export function reviewHarnessPrompt(pr: { owner: string; repo: string; number: n
`advisory text in your review instead. Anything left in the working tree is committed and pushed to the PR after`,
`you exit — an unverified push is worse than no push.`,
`Only end your output with READY on its own last line when the PR genuinely needs a human now — meaning you have`,
`resolved or addressed every bot and reviewer comment, there are no failing checks left that you could fix, and the`,
`remaining decision requires human judgment. If anything is still red, unresolved, or in-progress, do NOT print READY.`
`resolved or addressed every bot and reviewer comment, every required CI check has completed (none are pending`,
`or in-progress) and all are passing, the PR has no merge conflicts (GitHub reports it as mergeable), and the`,
`remaining decision requires human judgment. If any check is still pending, in-progress, or failed, or if the PR`,
`has merge conflicts, do NOT print READY.`
].join('\n');
}

async function verifyReadyForHumanReview(ctx: WorkforceCtx, pr: Pr): Promise<boolean> {
try {
const { stdout } = await execFileAsync('gh', [
'pr',
'view',
String(pr.number),
'--repo',
`${pr.owner}/${pr.repo}`,
'--json',
'mergeable,statusCheckRollup',
], { cwd: ctx.sandbox.cwd, encoding: 'utf8', maxBuffer: 1024 * 1024 });
const state = parsePrReadyState(stdout);
const ready = prReadyStateAllowsHumanReview(state);
if (!ready) {
ctx.log?.('warn', 'pr-reviewer.ready-sentinel.downgraded', {
owner: pr.owner,
repo: pr.repo,
number: pr.number,
reason: describeNotReadyState(state),
});
}
return ready;
} catch (error) {
ctx.log?.('warn', 'pr-reviewer.ready-sentinel.verification-failed', {
owner: pr.owner,
repo: pr.repo,
number: pr.number,
error: error instanceof Error ? error.message : String(error),
});
return false;
}
}

interface PullRequestReadyState {
mergeable?: unknown;
statusCheckRollup?: unknown;
}

function parsePrReadyState(stdout: string): PullRequestReadyState {
const parsed = JSON.parse(stdout) as unknown;
return parsed && typeof parsed === 'object' ? parsed as PullRequestReadyState : {};
}

export function prReadyStateAllowsHumanReview(state: PullRequestReadyState): boolean {
if (state.mergeable !== 'MERGEABLE') return false;
const checks = Array.isArray(state.statusCheckRollup) ? state.statusCheckRollup : [];
return checks.every(checkPassedAndComplete);
Comment on lines +324 to +325

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggestion: statusCheckRollup is treated as optional and silently coerced to an empty array when missing or in an unexpected shape, and every() on an empty array returns true. That can incorrectly mark a PR as human-ready and send Slack READY notifications even when check data is unavailable/unknown. Treat missing or non-array check data as not-ready instead of ready. [incorrect condition logic]

Severity Level: Major ⚠️
- ⚠️ Slack channel pings even when CI status data missing.
- ⚠️ Humans may review before confirming all required checks passed.
Steps of Reproduction ✅
1. Trigger any GitHub event handled by the agent (e.g., `pull_request.synchronize`) so the
default handler in `review/agent.ts:66-108` calls `reviewAndFix(ctx, pr)` at
`review/agent.ts:208-246` for an open PR.

2. Let the review harness complete successfully and emit output whose last line is exactly
`READY`, so `reviewAndFix` sets `harnessReady` to true and calls
`verifyReadyForHumanReview(ctx, pr)` at `review/agent.ts:223-232`.

3. Ensure the runtime's `gh pr view --json mergeable,statusCheckRollup` invocation in
`verifyReadyForHumanReview` (`review/agent.ts:279-289`) returns JSON where `mergeable` is
`"MERGEABLE"` but `statusCheckRollup` is missing or `null` (e.g., due to an older `gh`
version or an API shape change), so `parsePrReadyState` (`review/agent.ts:317-320`)
produces `{ mergeable: 'MERGEABLE' }` with `statusCheckRollup` undefined.

4. Observe `prReadyStateAllowsHumanReview` at `review/agent.ts:322-325` treat the missing
`statusCheckRollup` as an empty array (`checks = []`), so
`checks.every(checkPassedAndComplete)` returns `true`; `verifyReadyForHumanReview` returns
`true`, causing `reviewAndFix` to set `ready` to `true` and send the READY Slack
notification at `review/agent.ts:233-245`, even though the code had no usable check-rollup
data to confirm CI status.

Fix in Cursor | Fix in VSCode Claude

(Use Cmd/Ctrl + Click for best experience)

Prompt for AI Agent 🤖
This is a comment left during a code review.

**Path:** review/agent.ts
**Line:** 324:325
**Comment:**
	*Incorrect Condition Logic: `statusCheckRollup` is treated as optional and silently coerced to an empty array when missing or in an unexpected shape, and `every()` on an empty array returns `true`. That can incorrectly mark a PR as human-ready and send Slack READY notifications even when check data is unavailable/unknown. Treat missing or non-array check data as not-ready instead of ready.

Validate the correctness of the flagged issue. If correct, How can I resolve this? If you propose a fix, implement it and please make it concise.
Once fix is implemented, also check other comments on the same PR, and ask user if the user wants to fix the rest of the comments as well. if said yes, then fetch all the comments validate the correctness and implement a minimal fix
👍 | 👎

}

function checkPassedAndComplete(check: unknown): boolean {
if (!check || typeof check !== 'object') return false;
const record = check as Record<string, unknown>;
const state = normalizeState(record.state);
if (state) return state === 'SUCCESS' || state === 'NEUTRAL';
const status = normalizeState(record.status);
const conclusion = normalizeState(record.conclusion);
return status === 'COMPLETED' && (conclusion === 'SUCCESS' || conclusion === 'NEUTRAL');

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggestion: Check runs with conclusion: SKIPPED are currently treated as failing because only SUCCESS and NEUTRAL are accepted. In GitHub check semantics, skipped checks are non-failing (and this file already treats skipped as non-failure in ciFailed), so this logic can wrongly downgrade READY and suppress valid human-review notifications. [api mismatch]

Severity Level: Critical 🚨
- ❌ READY Slack notification suppressed when checks finish with SKIPPED.
- ⚠️ Humans may not be notified about review-ready pull requests.
Steps of Reproduction ✅
1. Configure a GitHub check or workflow that can legitimately finish with `conclusion:
"skipped"` (e.g., via a conditional job), so GitHub's `statusCheckRollup` for a PR can
contain entries with `status: "COMPLETED"` and `conclusion: "SKIPPED"`.

2. For that PR, trigger the agent (e.g., with `pull_request.opened` or
`pull_request.synchronize`) so the handler in `review/agent.ts:66-108` calls
`reviewAndFix(ctx, pr)` at `review/agent.ts:208-246`, and ensure the harness emits output
ending in `READY` so `verifyReadyForHumanReview(ctx, pr)` is invoked
(`review/agent.ts:223-232`).

3. When `verifyReadyForHumanReview` runs `gh pr view --json mergeable,statusCheckRollup`
and parses the result via `parsePrReadyState` (`review/agent.ts:279-320`), it passes the
`statusCheckRollup` list to `prReadyStateAllowsHumanReview` (`review/agent.ts:322-325`),
which calls `checkPassedAndComplete` for each check at `review/agent.ts:328-335`; for a
`COMPLETED`/`SKIPPED` check, `normalizeState` (`review/agent.ts:338-340`) yields `status
=== 'COMPLETED'` and `conclusion === 'SKIPPED'`, causing `checkPassedAndComplete` to
return `false` because it only accepts `SUCCESS` or `NEUTRAL`.

4. Because at least one check returns `false` from `checkPassedAndComplete`,
`prReadyStateAllowsHumanReview` returns `false`, `verifyReadyForHumanReview` logs a
`pr-reviewer.ready-sentinel.downgraded` warning at `review/agent.ts:292-299`, and
`reviewAndFix` sets `ready` to `false` at `review/agent.ts:231-245`; as a result, even
though all checks are non-failing (consistent with `ciFailed` treating `skipped` as
non-failure at `review/agent.ts:525-528`), the READY Slack notification is never sent for
this PR.

Fix in Cursor | Fix in VSCode Claude

(Use Cmd/Ctrl + Click for best experience)

Prompt for AI Agent 🤖
This is a comment left during a code review.

**Path:** review/agent.ts
**Line:** 335:335
**Comment:**
	*Api Mismatch: Check runs with `conclusion: SKIPPED` are currently treated as failing because only `SUCCESS` and `NEUTRAL` are accepted. In GitHub check semantics, skipped checks are non-failing (and this file already treats skipped as non-failure in `ciFailed`), so this logic can wrongly downgrade READY and suppress valid human-review notifications.

Validate the correctness of the flagged issue. If correct, How can I resolve this? If you propose a fix, implement it and please make it concise.
Once fix is implemented, also check other comments on the same PR, and ask user if the user wants to fix the rest of the comments as well. if said yes, then fetch all the comments validate the correctness and implement a minimal fix
👍 | 👎

}

function normalizeState(value: unknown): string | undefined {
return typeof value === 'string' ? value.trim().toUpperCase() : undefined;
}

function describeNotReadyState(state: PullRequestReadyState): string {
if (state.mergeable !== 'MERGEABLE') {
return `mergeable=${String(state.mergeable ?? 'missing')}`;
}
const checks = Array.isArray(state.statusCheckRollup) ? state.statusCheckRollup : [];
const blocked = checks.find((check) => !checkPassedAndComplete(check));
if (!blocked || typeof blocked !== 'object') {
return 'statusCheckRollup contains a non-passing check';
}
const record = blocked as Record<string, unknown>;
const name = record.name ?? record.context ?? record.workflowName ?? 'unknown';
const stateText = record.state ?? record.status ?? 'missing';
const conclusionText = record.conclusion ?? 'missing';
return `check=${String(name)} state=${String(stateText)} conclusion=${String(conclusionText)}`;
}

async function failReviewRun(ctx: WorkforceCtx, pr: Pr, reason: string): Promise<never> {
const message = [
`pr-reviewer could not complete review for #${pr.number} in ${pr.owner}/${pr.repo}.`,
Expand Down
38 changes: 38 additions & 0 deletions tests/review-agent.test.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import { parseIntegrations } from '@agentworkforce/persona-kit';
import {
labelNames,
postSlackPrUpdate,
prReadyStateAllowsHumanReview,
readPr,
resolveAuthorLogin,
reviewHarnessPrompt,
Expand Down Expand Up @@ -114,6 +115,43 @@ test('reviewHarnessPrompt forbids git except the explicit restore-only carve-out
assert.doesNotMatch(prompt, /\bgit\s+(checkout|reset|clean|commit|push|add|fetch|pull|rebase|merge|stash)\b/);
});

test('reviewHarnessPrompt only allows READY after checks complete, pass, and the PR is mergeable', () => {
const prompt = reviewHarnessPrompt({ owner: 'AgentWorkforce', repo: 'agents', number: 100 });
assert.match(prompt, /every required CI check has completed/);
assert.match(prompt, /none are pending\s+or in-progress/);
assert.match(prompt, /all are passing/);
assert.match(prompt, /GitHub reports it as mergeable/);
assert.match(prompt, /If any check is still pending, in-progress, or failed, or if the PR\s+has merge conflicts, do NOT print READY/);
assert.doesNotMatch(prompt, /there are no failing checks left/);
});

test('prReadyStateAllowsHumanReview downgrades READY while a check is pending', () => {
assert.equal(prReadyStateAllowsHumanReview({
mergeable: 'MERGEABLE',
statusCheckRollup: [
{ __typename: 'CheckRun', name: 'unit', status: 'COMPLETED', conclusion: 'SUCCESS' },
{ __typename: 'StatusContext', context: 'deploy-preview', state: 'PENDING' },
],
}), false);
});

test('prReadyStateAllowsHumanReview requires mergeable PRs with only completed passing checks', () => {
assert.equal(prReadyStateAllowsHumanReview({
mergeable: 'MERGEABLE',
statusCheckRollup: [
{ __typename: 'CheckRun', name: 'unit', status: 'COMPLETED', conclusion: 'SUCCESS' },
{ __typename: 'StatusContext', context: 'lint', state: 'NEUTRAL' },
],
}), true);

assert.equal(prReadyStateAllowsHumanReview({
mergeable: 'CONFLICTING',
statusCheckRollup: [
{ __typename: 'CheckRun', name: 'unit', status: 'COMPLETED', conclusion: 'SUCCESS' },
],
}), false);
});

// Cloud only mounts an integration's relayfile subtree from its `scope` (or
// from triggers — and this persona has github triggers only). A scope-less
// `slack: {}` mounts nothing, so every slackClient() post was written to
Expand Down