AgentWorkforce · khaliqgant · Jun 13, 2026 · Jun 13, 2026
diff --git a/review/README.md b/review/README.md
@@ -7,8 +7,9 @@ Instantly launch this agent on Agent Relay
 
 [![Launch Agent](https://agentrelay.com/launch-agent_small.svg)](https://agentrelay.com/cloud/deploy?persona=https://github.com/AgentWorkforce/agents/blob/main/review/persona.ts)
 
-A proactive agent that when a PR is opened up posts a multi agent review. If
-the review finds items that needs to be changed it proactively fixes the issues both
-from its own review but also other bot reviews. If there are failing CI checks
-or merge conflicts it proactively resolves it. It sends a message on Slack to let
-you know when a PR is ready for your review or can merge the PR if you approve.
+A conservative PR reviewer that posts a multi-agent review when a PR opens.
+It may auto-apply only lint, formatting, typo, import-order, and other
+mechanical non-semantic fixes. Logic changes, safety-sensitive code, lifecycle
+or termination paths, and test changes are suggestion/comment-only so a human
+author owns them. It sends a message on Slack when a PR is ready for your
+review or can merge the PR if you approve.
diff --git a/review/agent.ts b/review/agent.ts
@@ -1,14 +1,15 @@
 /**
- * pr-reviewer handler — review, auto-fix, and shepherd a PR to the finish line.
+ * pr-reviewer handler — review, apply mechanical safe fixes, and shepherd a PR
+ * to the finish line.
  *
  *   an authorized approval (pull_request_review.submitted) → merge the PR.
  *   a check run that finished green (check_run.completed)   → nothing to do.
  *   anything else — opened, new commits (synchronize), a
  *   review comment, failed CI, changes requested            → (re)review and fix.
  *
  * The PR's repo is materialized into ctx.sandbox.cwd by cloud before the
- * harness runs. The agent fixes by editing files there; cloud commits and
- * pushes those edits after the harness exits — no git/gh in the harness.
+ * harness runs. The agent may leave only mechanical fixes there; cloud commits
+ * and pushes those edits after the harness exits — no git/gh in the harness.
  *
  * Slack policy: the channel only hears about a PR when it's a human's turn —
  * checks green, every bot/reviewer comment resolved, nothing left for the agent
@@ -93,7 +94,7 @@ export default defineAgent({
   // A check run that finished without failing needs no action.
   if (event.type === 'github.check_run.completed' && !ciFailed(data)) return;
 
-  // Everything else is a reason to (re)review and push fixes.
+  // Everything else is a reason to (re)review and apply safe mechanical fixes.
   const pr = readPr(data);
   if (pr) {
     const skip = await shouldSkipReview(ctx, pr);
@@ -373,14 +374,24 @@ export function reviewHarnessPrompt(pr: { owner: string; repo: string; number: n
     `Focus on the actual PR changes: read .workforce/pr.diff first, then .workforce/changed-files.txt and .workforce/context.json.`,
     `Use the checked-out repo to trace the impact of this diff across callers, types, tests, config, and related files.`,
     `Flag and fix breakage even when the affected file is outside the changed-file set, but do not do an unrelated full-repo audit.`,
-    `Then proactively FIX everything that needs changing — your own findings and any other bot reviews on the PR —`,
-    `and resolve failing CI checks and merge conflicts by editing the code. Don't use git or the gh CLI; cloud commits`,
+    `Auto-edit only lint, formatting, spelling, typo, import-order, or other mechanical non-semantic changes.`,
+    `Do not auto-edit semantic or safety-critical logic. For behavior changes, architecture changes, and any reviewer`,
+    `request that needs human judgment, leave a clear suggestion or review comment instead of changing files.`,
+    `If the PR already has a human review or approval, switch to suggestion/comment-only for everything except`,
+    `obvious mechanical cleanup that cannot change runtime behavior.`,
+    `Resolve failing CI checks by editing the code only when the fix is mechanical and non-semantic. Don't use git or the gh CLI; cloud commits`,
     `and pushes your file edits to the PR after this run. In your output, do not claim that fixes were pushed,`,
     `a GitHub review was submitted, or CI was verified; those are post-harness actions that cloud reports separately.`,
     `Validate every finding — yours or another bot's — against the CURRENT checkout before editing: review comments`,
     `are often stale (already fixed by a later push). Reproduce the problem in the code as it is now, or skip it.`,
     `Make the smallest fix that addresses a demonstrated problem. Do not rewrite, restructure, or "harden" working`,
     `code beyond what the finding requires.`,
+    `Never change semantic or safety defaults: do not turn fail-closed states into fail-open states such as`,
+    `"timeout", "pending", throw, or undefined becoming "acked", true, {}, or another success/default path; do not`,
+    `swap truthiness checks for presence checks; do not edit guard default values. If a reviewer asks for one of`,
+    `these changes, explain the risk in your review and leave the code unchanged.`,
+    `Never touch lifecycle, termination, reaper, in-flight, dispatch, broker ownership, or process-cleanup code. Those`,
+    `areas are safety-critical; raise findings as comments for a human-authored patch instead.`,
     `Stay within this PR's purpose (.workforce/pr.diff is the change; use .workforce/context.json for available PR`,
     `metadata). A reviewer suggestion that changes files or behavior unrelated to`,
     `that purpose — refactoring a module`,
@@ -403,6 +414,8 @@ export function reviewHarnessPrompt(pr: { owner: string; repo: string; number: n
     `ships — the working tree must pass the full command with your edits in place. When you change code that`,
     `GENERATES commands, scripts, or queries, also execute a sample of the generated output against a throwaway`,
     `fixture — tests that only assert on the generated string prove nothing about its behavior.`,
+    `Never add or modify tests to make your own change pass. If a change needs a new or updated test, that is a`,
+    `human decision; describe the needed test in your review and leave the working tree unchanged.`,
     `Never make a check pass by weakening the test: do not delete it, skip it, loosen an assertion, narrow its`,
     `inputs, or replace a real assertion with a trivially-true one. A test that no longer fails when the behavior it`,
     `guards regresses is worse than no test, and it passes CI while hiding the bug. When an edit makes a test fail,`,

diff --git a/review/persona.ts b/review/persona.ts
@@ -1,15 +1,15 @@
 import { definePersona } from '@agentworkforce/persona-kit';
 
 /**
- * Review Agent — reviews every new PR, fixes the issues it (and other bots)
- * find, resolves failing CI and merge conflicts, pings you on Slack when the PR
- * is ready, and merges it once you approve.
+ * Review Agent — reviews every new PR, applies only mechanical safe fixes,
+ * comments on logic/safety findings, pings you on Slack when the PR is ready,
+ * and merges it once you approve.
  */
 export default definePersona({
   id: 'pr-reviewer',
   intent: 'review',
   tags: ['review'],
-  description: 'Reviews new PRs, fixes the issues found (its own + other bots\'), resolves failing CI and merge conflicts, pings you on Slack when ready, and merges once you approve.',
+  description: 'Reviews new PRs, applies only lint/format/typo fixes, comments on logic or safety findings, pings you on Slack when ready, and merges once you approve.',
   cloud: true,
 
   integrations: {
@@ -42,7 +42,7 @@ export default definePersona({
       picker: { provider: 'github', resource: 'users' }
     },
     REVIEW_AUTHORS: {
-      description: 'Only review and auto-fix PRs opened by these GitHub logins (comma-separated). If unset, every author is reviewed.',
+      description: 'Only review and mechanically auto-fix PRs opened by these GitHub logins (comma-separated). If unset, every author is reviewed.',
       env: 'REVIEW_AUTHORS',
       optional: true,
       picker: { provider: 'github', resource: 'users' }
@@ -56,7 +56,7 @@ export default definePersona({
 
   harness: 'codex',
   model: 'gpt-5.5',
-  systemPrompt: 'You are a rigorous senior reviewer. Review PRs, fix what you find, keep CI green, and only hand back when the PR is genuinely ready.',
+  systemPrompt: 'You are a rigorous senior reviewer. Review PRs, auto-apply only lint/format/typo fixes, leave logic and safety changes as comments, keep CI honest, and only hand back when the PR is genuinely ready.',
   harnessSettings: {
     reasoning: 'high',
     timeoutSeconds: 2400,

diff --git a/tests/review-agent.test.mjs b/tests/review-agent.test.mjs
@@ -163,6 +163,32 @@ test('reviewHarnessPrompt keeps fixes within the PR scope and verifies CI-deep',
   assert.match(prompt, /only change a test's EXPECTATION when the test encoded the OLD/);
 });
 
+test('reviewHarnessPrompt limits auto-edits to mechanical changes', () => {
+  const prompt = reviewHarnessPrompt({ owner: 'AgentWorkforce', repo: 'agents', number: 266 });
+  assert.match(prompt, /Auto-edit only lint, formatting, spelling, typo, import-order, or other mechanical non-semantic changes/);
+  assert.match(prompt, /Do not auto-edit semantic or safety-critical logic/);
+  assert.match(prompt, /leave a clear suggestion or review comment instead of changing files/);
+  assert.match(prompt, /PR already has a human review or approval/);
+  assert.match(prompt, /suggestion\/comment-only/);
+});
+
+test('reviewHarnessPrompt forbids safety-default and lifecycle edits', () => {
+  const prompt = reviewHarnessPrompt({ owner: 'AgentWorkforce', repo: 'factory-sdk', number: 264 });
+  assert.match(prompt, /Never change semantic or safety defaults/);
+  assert.match(prompt, /fail-closed states into fail-open states/);
+  assert.match(prompt, /"timeout", "pending", throw, or undefined becoming "acked", true, \{\}/);
+  assert.match(prompt, /swap truthiness checks for presence checks/);
+  assert.match(prompt, /guard default values/);
+  assert.match(prompt, /Never touch lifecycle, termination, reaper, in-flight, dispatch, broker ownership, or process-cleanup code/);
+});
+
+test('reviewHarnessPrompt forbids self-justifying test edits', () => {
+  const prompt = reviewHarnessPrompt({ owner: 'AgentWorkforce', repo: 'agents', number: 243 });
+  assert.match(prompt, /Never add or modify tests to make your own change pass/);
+  assert.match(prompt, /If a change needs a new or updated test, that is a\s+human decision/);
+  assert.match(prompt, /describe the needed test in your review and leave the working tree unchanged/);
+});
+
 test('reviewHarnessPrompt only allows READY after checks complete, pass, and the PR is mergeable', () => {
   const prompt = reviewHarnessPrompt({ owner: 'AgentWorkforce', repo: 'agents', number: 100 });
   assert.match(prompt, /every required CI check has completed/);