AgentWorkforce · khaliqgant · Jun 18, 2026 · Jun 18, 2026 · Jun 18, 2026 · Jun 18, 2026
diff --git a/README.md b/README.md
@@ -105,6 +105,7 @@ factory factory run-once --config ./factory.config.json --dry-run
 | `factory status` | Print current factory status as JSON. |
 | `factory triage <KEY\|path>` | Triage one issue and print the decision. |
 | `factory dispatch <KEY\|path>` | Triage + dispatch one issue. Honors `--dry-run`. |
+| `factory canary <KEY\|path>` | Assert a known "Ready for Agent" issue is dispatch-ready by the real dry-run triage path. Prints `{ok,issue,status,reason}`; exits non-zero (with the skip reason) if it isn't. |
 
 Global options work anywhere in the args: `--config <path>`, `--dry-run`,
 `--backend <internal|relay>`. The internal backend reuses a relay broker that's
@@ -113,6 +114,20 @@ already running for your workspace, and starts one if none is.
 (There are a few more operational commands — `loop-status`, `kill-loop`,
 `reap-orphans`, `close-probe` — for running the daemon in production.)
 
+### Scheduled sync-fidelity canary
+
+`factory canary` is the regression detector for upstream sync drift: if a synced
+issue stops carrying enough state to be dispatchable (e.g. the Linear sync
+regresses to records without `state.id`), a known-good issue flips from
+dispatch-ready to skipped. Run it on a schedule against a standing "Ready for
+Agent" canary issue and alert on failure.
+
+`scripts/factory-canary.sh` wraps the command for cron/launchd: it runs from your
+deployment dir (reusing the running relay broker), bounds a hung run, and posts a
+Slack alert via `FACTORY_CANARY_SLACK_WEBHOOK` on failure. See
+`scripts/com.agentrelay.factory-canary.plist.example` for an every-6h launchd
+template.
+
 ## Tell it what to work on
 
 Two ways to hand the factory an issue — both are just labeling/titling, nothing

diff --git a/package.json b/package.json
@@ -20,6 +20,8 @@
   "files": [
     "dist",
     "bin/factory.mjs",
+    "scripts/factory-canary.sh",
+    "scripts/com.agentrelay.factory-canary.plist.example",
     "package.json",
     "README.md"
   ],

diff --git a/scripts/com.agentrelay.factory-canary.plist.example b/scripts/com.agentrelay.factory-canary.plist.example
@@ -0,0 +1,53 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  launchd template for the factory sync-fidelity canary (see factory-canary.sh).
+
+  Install (macOS):
+    1. Replace __FACTORY_WORKDIR__ with your factory deployment dir (the one with
+       factory.config.json), and __FACTORY_BIN__ with the path to factory.mjs
+       (e.g. node_modules/@agent-relay/factory/bin/factory.mjs). Set the issue key
+       and, optionally, FACTORY_CANARY_SLACK_WEBHOOK.
+    2. cp scripts/com.agentrelay.factory-canary.plist.example \
+         ~/Library/LaunchAgents/com.agentrelay.factory-canary.plist
+    3. launchctl load ~/Library/LaunchAgents/com.agentrelay.factory-canary.plist
+    4. launchctl start com.agentrelay.factory-canary   # run once now to verify
+
+  Runs every 6h (StartInterval 21600). Logs to <workdir>/.factory-canary.log.
+  Run from the deployment dir so the canary reuses the running relay broker.
+-->
+<plist version="1.0">
+<dict>
+  <key>Label</key>
+  <string>com.agentrelay.factory-canary</string>
+
+  <key>ProgramArguments</key>
+  <array>
+    <string>/bin/bash</string>
+    <string>__FACTORY_WORKDIR__/scripts/factory-canary.sh</string>
+  </array>
+
+  <key>WorkingDirectory</key>
+  <string>__FACTORY_WORKDIR__</string>
+
+  <key>EnvironmentVariables</key>
+  <dict>
+    <key>FACTORY_CANARY_ISSUE</key>
+    <string>AR-305</string>
+    <key>FACTORY_WORKDIR</key>
+    <string>__FACTORY_WORKDIR__</string>
+    <key>FACTORY_BIN</key>
+    <string>__FACTORY_BIN__</string>
+    <key>FACTORY_CANARY_SLACK_WEBHOOK</key>
+    <string></string>
+  </dict>
+
+  <key>StartInterval</key>
+  <integer>21600</integer>
+  <key>RunAtLoad</key>
+  <true/>
+  <key>StandardOutPath</key>
+  <string>__FACTORY_WORKDIR__/.factory-canary.log</string>
+  <key>StandardErrorPath</key>
+  <string>__FACTORY_WORKDIR__/.factory-canary.log</string>
+</dict>
+</plist>
diff --git a/scripts/factory-canary.sh b/scripts/factory-canary.sh
@@ -0,0 +1,89 @@
+#!/usr/bin/env bash
+#
+# factory-canary.sh — scheduled sync-fidelity regression detector.
+#
+# Runs `factory canary <issue>` against the LIVE relayfile mount and asserts a
+# known "Ready for Agent" issue is still classified dispatch-ready by the real
+# triage path. If it ever flips to "skipped" (e.g. the Linear sync regresses to
+# sparse records with no state.id), this exits non-zero and alerts — catching the
+# regression before it silently blocks every factory dispatch.
+#
+# Run it on a schedule (cron/launchd) from your factory deployment directory —
+# the one holding factory.config.json, where the relayfile mount + relay broker
+# already live (so the canary reuses the running broker rather than spawning one).
+# See scripts/com.agentrelay.factory-canary.plist.example for a launchd template.
+#
+# Config (env vars):
+#   FACTORY_CANARY_ISSUE   Linear issue key to check (default: the first arg)
+#   FACTORY_WORKDIR        deployment dir with factory.config.json (default: cwd)
+#   FACTORY_CONFIG         config path, relative to FACTORY_WORKDIR (default: factory.config.json)
+#   FACTORY_BIN            path to factory.mjs (default: this repo's bin/factory.mjs)
+#   FACTORY_BACKEND        --backend value (default: internal)
+#   FACTORY_CANARY_TIMEOUT seconds before the canary is considered hung (default: 180)
+#   FACTORY_CANARY_SLACK_WEBHOOK  optional Slack incoming-webhook URL for failure alerts
+#
+# Exit codes: 0 = dispatch-ready (healthy); 1 = NOT ready / error / hung.
+
+set -uo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+ISSUE="${FACTORY_CANARY_ISSUE:-${1:-}}"
+WORKDIR="${FACTORY_WORKDIR:-$PWD}"
+CONFIG="${FACTORY_CONFIG:-factory.config.json}"
+BIN="${FACTORY_BIN:-$SCRIPT_DIR/../bin/factory.mjs}"
+BACKEND="${FACTORY_BACKEND:-internal}"
+TIMEOUT="${FACTORY_CANARY_TIMEOUT:-180}"
+TS="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
+
+if [[ -z "$ISSUE" ]]; then
+  echo "[$TS] factory-canary: no issue key (set FACTORY_CANARY_ISSUE or pass an arg)" >&2
+  exit 1
+fi
+if [[ ! -f "$BIN" ]]; then
+  echo "[$TS] factory-canary: factory bin not found at $BIN" >&2
+  exit 1
+fi
+cd "$WORKDIR" || { echo "[$TS] factory-canary: cannot cd to $WORKDIR" >&2; exit 1; }
+
+# The canary runs the real dry-run triage path (no agents spawned) and prints a
+# JSON verdict {ok,issue,status,reason}; exit code mirrors ok. A hung run
+# (broker/mount wedge) is bounded by FACTORY_CANARY_TIMEOUT.
+RUN=(node "$BIN" factory canary "$ISSUE" --config "$CONFIG" --backend "$BACKEND")
+# A hung run (broker/mount wedge) MUST be bounded — an unbounded canary on a
+# scheduler (launchd/cron) can wedge the slot forever and suppress later alerts.
+# macOS has no `timeout` by default; coreutils ships it as `gtimeout`. If neither
+# is present, fail closed rather than run without a deadline.
+TIMEOUT_BIN=""
+if command -v timeout >/dev/null 2>&1; then
+  TIMEOUT_BIN="timeout"
+elif command -v gtimeout >/dev/null 2>&1; then
+  TIMEOUT_BIN="gtimeout"
+fi
+if [[ -z "$TIMEOUT_BIN" ]]; then
+  echo "[$TS] factory-canary: no timeout utility found (install coreutils for 'timeout'/'gtimeout'); refusing to run unbounded" >&2
+  exit 1
+fi
+OUT="$("$TIMEOUT_BIN" "$TIMEOUT" "${RUN[@]}" 2>/dev/null)"
+CODE=$?
+if [[ $CODE -eq 124 ]]; then
+  echo "[$TS] factory-canary: TIMED OUT after ${TIMEOUT}s (broker/mount may be wedged)" >&2
+fi
+
+# The CLI prints a pretty-printed (multi-line) JSON verdict, so parse the whole
+# output — not just the last line (which is only the closing `}`).
+echo "[$TS] factory-canary $ISSUE -> exit $CODE"
+[[ $CODE -eq 0 ]] && exit 0
+
+REASON="$(printf '%s' "$OUT" | node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{try{const v=JSON.parse(s);console.log(`${v.status||"error"}: ${v.reason||"unknown"}`)}catch{console.log("unparseable verdict")}})' 2>/dev/null)"
+MSG=":rotating_light: factory canary FAILED for ${ISSUE} — ${REASON}. Sync fidelity may have regressed (issue no longer dispatch-ready)."
+echo "[$TS] $MSG" >&2
+
+if [[ -n "${FACTORY_CANARY_SLACK_WEBHOOK:-}" ]]; then
+  curl -sS -m 15 -X POST -H 'Content-type: application/json' \
+    --data "$(node -e 'process.stdout.write(JSON.stringify({text:process.argv[1]}))' "$MSG")" \
+    "$FACTORY_CANARY_SLACK_WEBHOOK" >/dev/null 2>&1 \
+    && echo "[$TS] factory-canary: posted Slack alert" >&2 \
+    || echo "[$TS] factory-canary: Slack alert post failed" >&2
+fi
+
+exit 1
diff --git a/src/config/schema.test.ts b/src/config/schema.test.ts
@@ -42,7 +42,18 @@ describe('FactoryConfigSchema', () => {
     // No hardcoded state defaults: omitted stateIds resolve to {} and are filled
     // at runtime from linear.states (by name) or explicit stateIds.
     expect(parsed.stateIds).toEqual({})
-    expect(parsed.linear).toEqual({ states: {}, statesByTeam: {} })
+    // The factory ships its workflow-state NAME conventions as defaults (so a
+    // consumer needn't configure them); statesByTeam stays empty.
+    expect(parsed.linear).toEqual({
+      states: {
+        readyForAgent: 'Ready for Agent',
+        agentImplementing: 'Agent Implementing',
+        done: 'Done',
+        inPlanning: 'In Planning',
+        humanReview: 'In Human Review',
+      },
+      statesByTeam: {},
+    })
     expect(parsed.safety).toEqual({
       requireTitlePrefix: '[factory-e2e]',
       requireLabel: 'factory',

diff --git a/src/config/schema.ts b/src/config/schema.ts
@@ -98,10 +98,22 @@ const babysitterSchema = z.object({
   enabled: z.boolean().default(false),
 }).default({})
 
+// The factory owns its workflow-state NAME conventions; consumers (e.g. pear)
+// don't hand-configure them. These names let the factory resolve a role from a
+// synced record that carries state.name but no state.id (sparse-sync fallback).
+// A workspace that names states differently can override via config.
+const DEFAULT_LINEAR_STATE_NAMES = {
+  readyForAgent: 'Ready for Agent',
+  agentImplementing: 'Agent Implementing',
+  done: 'Done',
+  inPlanning: 'In Planning',
+  humanReview: 'In Human Review',
+}
+
 const linearSchema = z.object({
-  states: linearRoleNamesSchema,
+  states: linearRoleNamesSchema.default(DEFAULT_LINEAR_STATE_NAMES),
   statesByTeam: z.record(z.string(), linearRoleNamesSchema).default({}),
-}).default({})
+}).default({ states: DEFAULT_LINEAR_STATE_NAMES, statesByTeam: {} })
 
 const stateIdsSchema = z.object({
   readyForAgent: z.string().optional(),

diff --git a/src/dispatch/templates.test.ts b/src/dispatch/templates.test.ts
@@ -52,7 +52,7 @@ describe('renderAgentTask', () => {
 
     expect(task).toContain('GitHub repo: AgentWorkforce/pear')
     expect(task).toContain('Wait for a DM from the implementer(s): ar-123-impl-ui, ar-123-impl-broker.')
-    expect(task).toContain('Read the PR diff via `.integrations/github/repos`.')
+    expect(task).toContain('Read the PR diff via .integrations/github/repos.')
     expect(task).toContain('Post review comments via the GitHub writeback path.')
     expect(task).toContain('DM the implementer with specific feedback if changes needed, or approve if good.')
     expect(task).toContain('DM `broker` when the review cycle is complete.')
@@ -67,7 +67,7 @@ describe('renderAgentTask', () => {
       reviewerName: 'ar-123-review',
       implementerNames: ['ar-123-impl'],
       pr: { number: 482, url: 'https://github.com/AgentWorkforce/pear/pull/482' },
-      slackDispatchThread: { channel: 'C123', threadId: '170.000' },
+      slackDispatchThread: { channel: 'C123', threadId: '170.000', mountRoot: '/work/.integrations' },
     })
 
     // Carries the original spec (definition of done) and the open PR ref.
@@ -195,12 +195,12 @@ describe('renderAgentTask', () => {
       role: 'implementer',
       config: baseConfig,
       reviewerName: 'ar-123-review',
-      slackDispatchThread: { channel: 'C123', threadId: '169.000' },
+      slackDispatchThread: { channel: 'C123', threadId: '169.000', mountRoot: '/work/.integrations' },
     })
 
     // The Slack-thread writeback replaces the old relay DM pattern.
     expect(task).toContain('write your question to this issue\'s Slack dispatch thread via the .integrations mount')
-    expect(task).toContain('Write path: .integrations/slack/channels/C123/messages/169_000/replies/question.json')
+    expect(task).toContain('Write path: /work/.integrations/slack/channels/C123/messages/169_000/replies/question.json')
     expect(task).toContain('Write a JSON object with a "text" field')
     expect(task).toContain('Continue with safe reversible work while waiting for a reply.')
     // No relay DM or legacy patterns.

diff --git a/src/dispatch/templates.ts b/src/dispatch/templates.ts
@@ -30,13 +30,30 @@ export interface RenderAgentTaskInput {
   slackDispatchThread?: {
     channel: string
     threadId: string
+    /**
+     * Absolute path to the .integrations mount root the agent can write to. The
+     * agent runs in its repo clonePath, NOT the daemon's cwd where .integrations
+     * lives, so a bare relative `.integrations/...` path is unreachable — the
+     * writeback path must be absolute.
+     */
+    mountRoot: string
   }
   /** Pre-rendered writeback instructions for connected integrations. */
   integrationInstructions?: string
+  /**
+   * Absolute path to the .integrations mount root. The agent runs in its repo
+   * clonePath, not the daemon cwd where .integrations lives, so every
+   * `.integrations/...` reference (github reads, slack writes) must be absolute.
+   * Falls back to the bare relative root when absent (e.g. tests).
+   */
+  integrationsMountRoot?: string
 }
 
 export function renderAgentTask(input: RenderAgentTaskInput): string {
   const repo = normalizeRepo(input.route.repo)
+  // Absolute mount root for every .integrations reference (the agent's cwd is
+  // its repo clone, where a relative .integrations/... does not resolve).
+  const mountRoot = input.integrationsMountRoot ?? '.integrations'
   const cloneInstruction = input.route.clonePath
     ? `Repo path: ${input.route.clonePath}`
     : `Clone/worktree: clone AgentWorkforce/${repo} and work in your own isolated git worktree before editing.`
@@ -68,8 +85,11 @@ export function renderAgentTask(input: RenderAgentTaskInput): string {
     ? [
         '',
         'If you are blocked or need a human answer mid-task, write your question to this issue\'s Slack dispatch thread via the .integrations mount.',
-        `Write path: .integrations/slack/channels/${input.slackDispatchThread.channel}/messages/${input.slackDispatchThread.threadId.replace('.', '_')}/replies/question.json`,
+        // Absolute path: the agent runs in its repo clone, not the daemon cwd
+        // where .integrations lives, so a relative path would be unreachable.
+        `Write path: ${input.slackDispatchThread.mountRoot}/slack/channels/${input.slackDispatchThread.channel}/messages/${input.slackDispatchThread.threadId.replaceAll('.', '_')}/replies/question.json`,
         'Write a JSON object with a "text" field containing your question.',
+        'The human\'s reply will be delivered to you as an `<integration-event>` system message injected into your session — wait for it, do not poll.',
         'Continue with safe reversible work while waiting for a reply.',
       ]
     : []
@@ -79,7 +99,7 @@ export function renderAgentTask(input: RenderAgentTaskInput): string {
       ? `PR #${input.pr.number}${input.pr.url ? ` (${input.pr.url})` : ''}`
       : 'the open PR for this issue'
     const chatLine = input.slackDispatchThread
-      ? 'You can also use this issue\'s Slack dispatch thread to discuss the PR with the human (status, trade-offs, open questions) — proactively write via .integrations/slack if it would help.'
+      ? `You can also use this issue's Slack dispatch thread to discuss the PR with the human (status, trade-offs, open questions) — proactively write via ${mountRoot}/slack if it would help.`
       : 'If a human can be reached, proactively offer to discuss the PR (status, trade-offs, open questions) via the .integrations writeback path.'
     // Match the prompt to where the issue actually lands so the babysitter is not
     // told to "stop at Human Review" while the factory is configured to finish at
@@ -100,7 +120,7 @@ export function renderAgentTask(input: RenderAgentTaskInput): string {
       `You are the PR babysitter for ${input.issue.key}. A PR is already open: ${prRef}.`,
       jobLine,
       'Unlike a conservative reviewer, you SHOULD fix things directly and aggressively — you hold the original issue spec as the definition of done, and you have the rest of the dispatched team to draw on.',
-      'Read the PR diff, CI checks, and review threads via `.integrations/github/repos`.',
+      `Read the PR diff, CI checks, and review threads via ${mountRoot}/github/repos.`,
       'Address every review comment for real — make substantive code changes when the feedback calls for it, not just lint/format touch-ups.',
       'Resolve any merge conflicts: rebase onto the base branch and reconcile using judgment anchored in the issue spec; never weaken tests or flip safety defaults just to force a merge.',
       'Fix failing CI — change the code and tests as needed until the checks pass. A red check is not done.',
@@ -122,7 +142,7 @@ export function renderAgentTask(input: RenderAgentTaskInput): string {
       ...questionInstructions,
       '',
       `Wait for a DM from the implementer(s): ${implementers}.`,
-      'Read the PR diff via `.integrations/github/repos`.',
+      `Read the PR diff via ${mountRoot}/github/repos.`,
       'Post review comments via the GitHub writeback path.',
       'DM the implementer with specific feedback if changes needed, or approve if good.',
       'DM `broker` when the review cycle is complete.',

diff --git a/src/fleet/ensure-relay-broker.test.ts b/src/fleet/ensure-relay-broker.test.ts
@@ -42,11 +42,11 @@ describe('ensureRelayBroker', () => {
     })
     const spawn = vi.fn(async () => spawned)
 
-    const handle = await ensureRelayBroker({ cwd: '/work', connect, spawn })
+    const handle = await ensureRelayBroker({ cwd: '/work', connect, spawn, env: {} })
 
     expect(handle.client).toBe(spawned)
     expect(handle.started).toBe(true)
-    expect(spawn).toHaveBeenCalledWith({ cwd: '/work' })
+    expect(spawn).toHaveBeenCalledWith({ cwd: '/work', workspaceKey: undefined })
   })
 
   it('surfaces the connect error without spawning when autoStart is false', async () => {
@@ -71,7 +71,26 @@ describe('ensureRelayBroker', () => {
       },
       spawn: async () => fakeClient('spawned'),
       logger: { info },
+      env: {},
     })
-    expect(info).toHaveBeenCalledWith('[factory] no relay broker running; starting one', { reason: 'boom' })
+    expect(info).toHaveBeenCalledWith('[factory] no relay broker running; starting one', { reason: 'boom', joiningWorkspace: false })
+  })
+
+  it('threads a workspace key (env or option) into spawn so the broker JOINS', async () => {
+    const spawn = vi.fn(async () => fakeClient('spawned'))
+    await ensureRelayBroker({ connect: () => { throw new Error('no broker') }, spawn, env: { RELAY_WORKSPACE_KEY: 'rk_live_test' } })
+    expect(spawn).toHaveBeenCalledWith(expect.objectContaining({ workspaceKey: 'rk_live_test' }))
+
+    spawn.mockClear()
+    await ensureRelayBroker({ connect: () => { throw new Error('no broker') }, spawn, workspaceKey: 'rk_live_explicit', env: {} })
+    expect(spawn).toHaveBeenCalledWith(expect.objectContaining({ workspaceKey: 'rk_live_explicit' }))
+  })
+
+  it('fails with actionable guidance when there is no broker and no workspace key', async () => {
+    await expect(ensureRelayBroker({
+      connect: () => { throw new Error('no broker') },
+      spawn: async () => { throw new Error('insert into workspaces failed') },
+      env: {},
+    })).rejects.toThrow(/RELAY_WORKSPACE_KEY/u)
   })
 })