diff --git a/.storybook/preview.tsx b/.storybook/preview.tsx index 19e31ac785..60c3553ee3 100644 --- a/.storybook/preview.tsx +++ b/.storybook/preview.tsx @@ -13,6 +13,13 @@ import { NOW } from "../src/browser/stories/storyTime"; import { updatePersistedState } from "../src/browser/hooks/usePersistedState"; import { configure } from "storybook/test"; +// Signal Storybook runtime to modules that need to stabilize for Chromatic +// (e.g. the ChatInput placeholder tip carousel pins to its lead tip so +// tip-list reorders don't cascade into baseline diffs across every story). +// Set as early as possible so it precedes any story-module import that might +// evaluate carousel logic during render. +(globalThis as { __MUX_STORYBOOK__?: boolean }).__MUX_STORYBOOK__ = true; + // Raise the default async-util timeout from 1 000 ms → 5 000 ms. // waitFor / findBy* calls inherit this, so individual stories don't need // explicit `{ timeout }` unless they intentionally want a longer budget. diff --git a/src/browser/features/ChatInput/index.tsx b/src/browser/features/ChatInput/index.tsx index bc42ac2ab7..73d4507a09 100644 --- a/src/browser/features/ChatInput/index.tsx +++ b/src/browser/features/ChatInput/index.tsx @@ -78,6 +78,7 @@ import { type SlashSuggestion, } from "@/browser/utils/slashCommands/suggestions"; import { resolveSlashCommandExperimentValue } from "@/browser/utils/slashCommands/experimentVisibility"; +import { getPlaceholderTip } from "./placeholderTips"; import { Tooltip, TooltipTrigger, TooltipContent } from "@/browser/components/Tooltip/Tooltip"; import { AgentModePicker } from "@/browser/components/AgentModePicker/AgentModePicker"; import { ContextUsageIndicatorButton } from "@/browser/components/ContextUsageIndicatorButton/ContextUsageIndicatorButton"; @@ -2642,8 +2643,17 @@ const ChatInputInner: React.FC = (props) => { return `Compacting... (${formatKeybind(interruptKeybind)} cancel | ${formatKeybind(KEYBINDS.SEND_MESSAGE)} to queue)`; } - // Keep placeholder minimal; shortcut hints are rendered below the input. - return "Type a message..."; + // Tip carousel: rotates the placeholder through a curated list of + // slash-command tricks on a wall-clock bucket so switching workspaces + // mid-bucket doesn't reroll the visible tip. See placeholderTips.ts. + // + // Mobile gets the plain placeholder because the on-screen keyboard already + // squeezes the input and a long English sentence in the placeholder looks + // like a wall of grey text instead of a hint. + if (isMobileTouch) { + return "Type a message..."; + } + return getPlaceholderTip(); })(); const activeToast = toast ?? (variant === "creation" ? creationState.toast : null); diff --git a/src/browser/features/ChatInput/placeholderTips.test.ts b/src/browser/features/ChatInput/placeholderTips.test.ts new file mode 100644 index 0000000000..16b443b614 --- /dev/null +++ b/src/browser/features/ChatInput/placeholderTips.test.ts @@ -0,0 +1,82 @@ +import { afterEach, describe, expect, test } from "bun:test"; +import { PLACEHOLDER_TIPS, getPlaceholderTip } from "./placeholderTips"; + +interface StorybookGlobal { + __MUX_STORYBOOK__?: boolean; +} + +const TWENTY_MIN_MS = 20 * 60 * 1000; + +describe("getPlaceholderTip", () => { + afterEach(() => { + // Always clear the storybook flag so one test's pin-mode doesn't leak + // into the next test's rotation assertions. + delete (globalThis as StorybookGlobal).__MUX_STORYBOOK__; + }); + + test("returns the same tip for every call inside a single 20-minute bucket", () => { + // Anchor at a bucket boundary so any ms within the next 20 min must hash + // to the same tip. If they don't, switching workspaces / re-rendering + // inside the same bucket would reshuffle the tip — which is the exact + // flicker we're trying to prevent. + const bucketStart = TWENTY_MIN_MS * 100; // arbitrary aligned anchor + const tip = getPlaceholderTip(bucketStart); + expect(getPlaceholderTip(bucketStart + 1)).toBe(tip); + expect(getPlaceholderTip(bucketStart + TWENTY_MIN_MS - 1)).toBe(tip); + }); + + test("advances to the next tip when the bucket boundary crosses", () => { + // Crossing the boundary must rotate — otherwise the carousel is silently + // stuck and the discoverability rationale is broken. + const bucketStart = TWENTY_MIN_MS * 100; + const before = getPlaceholderTip(bucketStart); + const after = getPlaceholderTip(bucketStart + TWENTY_MIN_MS); + expect(after).not.toBe(before); + }); + + test("wraps with modulo so long-running clocks never lose the placeholder", () => { + // Far-future timestamps should still resolve to a tip rather than + // undefined / out-of-bounds. + const bigFuture = TWENTY_MIN_MS * PLACEHOLDER_TIPS.length * 5 + TWENTY_MIN_MS * 3; + expect(PLACEHOLDER_TIPS).toContain(getPlaceholderTip(bigFuture)); + }); + + test("falls back to the lead tip on non-finite or negative inputs", () => { + // Defensive: mocked timers, broken clocks, or accidentally-passed + // sentinels should never produce undefined or throw. + expect(getPlaceholderTip(-1)).toBe(PLACEHOLDER_TIPS[0]); + expect(getPlaceholderTip(Number.NaN)).toBe(PLACEHOLDER_TIPS[0]); + expect(getPlaceholderTip(Number.POSITIVE_INFINITY)).toBe(PLACEHOLDER_TIPS[0]); + }); + + test("pins the default-arg call to the lead tip when running under Storybook", () => { + // Storybook/Chromatic renders 100+ stories that include ChatInput. Without + // pinning, every reorder or insertion into PLACEHOLDER_TIPS shifts the + // tip the wall-clock bucket lands on and forces a baseline re-accept on + // every one of those stories. The fix is a runtime flag set by + // .storybook/preview.tsx that short-circuits the carousel to slot 0. + (globalThis as StorybookGlobal).__MUX_STORYBOOK__ = true; + + // Default-arg path: pinned regardless of wall-clock time. + expect(getPlaceholderTip()).toBe(PLACEHOLDER_TIPS[0]); + + // Explicit nowMs must still rotate even with the flag set, otherwise + // unit tests that depend on rotation math would silently no-op when + // someone forgets to clear the flag. + const bucketStart = TWENTY_MIN_MS * 100; + const before = getPlaceholderTip(bucketStart); + const after = getPlaceholderTip(bucketStart + TWENTY_MIN_MS); + expect(after).not.toBe(before); + }); + + test("leads with the /orchestrate tip so the pinned Storybook slot promotes the new skill", () => { + // /orchestrate is unadvertised in the system-prompt skill index, so the + // tip carousel is one of the few discovery surfaces users will see it on. + // Placing it at the lead slot has two consequences this assertion locks in: + // 1) It's the tip a user sees on degenerate-timer fallback. + // 2) It's the tip every Chromatic story renders via the Storybook pin. + // Demoting it from index 0 would silently regress both surfaces, so we + // assert the position rather than just the presence. + expect(PLACEHOLDER_TIPS[0]).toMatch(/\/orchestrate\b/); + }); +}); diff --git a/src/browser/features/ChatInput/placeholderTips.ts b/src/browser/features/ChatInput/placeholderTips.ts new file mode 100644 index 0000000000..885c38af2f --- /dev/null +++ b/src/browser/features/ChatInput/placeholderTips.ts @@ -0,0 +1,96 @@ +/** + * Tip carousel for the ChatInput placeholder. + * + * The workspace ChatInput uses these strings as a rotating "Type a message..." + * placeholder so users who never read docs still get passive exposure to + * slash commands they probably don't know about. + * + * The tip rotates on a wall-clock bucket (not per-message, not per-workspace) + * so switching between chats never reshuffles the visible tip. Two tabs open + * to two workspaces show the same tip; close and re-open the app inside the + * same bucket and you still see the same tip. The bucket boundary is the + * only thing that advances the carousel. + * + * Every tip in this list must be wired up as a real slash command (registry + * or built-in skill) AND ungated by experiments (no `experimentGate` on the + * command definition). Advertising an unimplemented or feature-flag-locked + * command sends the user into an unknown-command / experiment-required dead + * end the moment they follow the suggestion. When adding a tip, grep + * `src/browser/utils/slashCommands/registry.ts` for `experimentGate` to make + * sure the command you're surfacing isn't gated. + */ + +/** Bucket length for tip rotation. */ +const TIP_ROTATION_INTERVAL_MS = 20 * 60 * 1000; // 20 minutes + +/** + * Tip index pinned for Storybook/Chromatic snapshots. + * + * Without pinning, every story that renders ChatInput would resolve a tip via + * `floor(NOW / 20min) mod PLACEHOLDER_TIPS.length` — so any reorder of or + * insertion into PLACEHOLDER_TIPS shifts the displayed tip and cascades into + * a fresh Chromatic baseline diff on every ChatInput story (currently 100+). + * + * Pinning to index 0 means tip-list edits only affect snapshots when the lead + * tip's text itself changes, which is the rare, intentional case. /orchestrate + * is the lead tip because it's the only entry-point users have for the + * unadvertised orchestrate skill — making it the storybook-fixed tip turns + * every ChatInput snapshot into passive discovery surface for the feature. + */ +const STORYBOOK_PINNED_TIP_INDEX = 0; + +export const PLACEHOLDER_TIPS: readonly string[] = [ + "Try /orchestrate to coordinate sub-agents and integrate their patches", + "Try /haiku to send just this message on a different model", + "Try /+high to crank up reasoning for this message only", + "Try /compact to summarize the conversation when context gets tight", + "Try /fork to branch this chat into a new workspace", + "Try /plan to view or edit the current plan inline", + "Try /clear --soft to reset context while keeping the chat visible", + "Try /new to start a fresh workspace from the trunk branch", + "Try /vim to toggle vim keybindings in the chat input", + "Try /truncate 50 to drop the oldest half of the conversation", +]; + +/** + * Detect Storybook runtime via a global flag set by `.storybook/preview.tsx`. + * + * We deliberately avoid `import.meta.env` here because this module is + * transitively imported by Jest-based UI tests (`tests/ui/**`) that run in + * CommonJS mode and choke on `import.meta`. A plain runtime flag works in + * every environment: Storybook's preview sets it before any story renders, + * Jest / Bun tests never touch it, and production builds never see it. + */ +function isStorybookRuntime(): boolean { + return (globalThis as { __MUX_STORYBOOK__?: boolean }).__MUX_STORYBOOK__ === true; +} + +/** + * Return the tip for the current wall-clock bucket. + * + * The bucket index is `floor(now / 20min)` modulo the tip list, so every + * caller in the same 20-minute window sees the same tip regardless of + * workspace, tab, or user-message count. `nowMs` is exposed for testing + * — production callers should let it default to `Date.now()`. + * + * Non-finite or negative inputs fall back to the lead tip so the carousel + * still surfaces a real, discoverable command in degenerate states (clock + * skew, mocked timers returning weird values, etc.). + * + * Under Storybook, default-arg calls return a fixed tip + * (`STORYBOOK_PINNED_TIP_INDEX`) so visual baselines are insulated from + * tip-list reordering. Explicit `nowMs` arguments always use rotation, so + * tests stay meaningful. + */ +export function getPlaceholderTip(nowMs?: number): string { + if (nowMs === undefined && isStorybookRuntime()) { + return PLACEHOLDER_TIPS[STORYBOOK_PINNED_TIP_INDEX]; + } + const ts = nowMs ?? Date.now(); + if (!Number.isFinite(ts) || ts < 0) { + return PLACEHOLDER_TIPS[0]; + } + const bucket = Math.floor(ts / TIP_ROTATION_INTERVAL_MS); + const index = bucket % PLACEHOLDER_TIPS.length; + return PLACEHOLDER_TIPS[index]; +} diff --git a/src/node/builtinSkills/orchestrate.md b/src/node/builtinSkills/orchestrate.md new file mode 100644 index 0000000000..4eee963c31 --- /dev/null +++ b/src/node/builtinSkills/orchestrate.md @@ -0,0 +1,119 @@ +--- +name: orchestrate +description: Coordinate sub-agent implementation and apply patches (delegate-first orchestration playbook) +advertise: false +--- + +# Orchestrate + +Use this skill when the user invokes `/orchestrate` (or asks you to coordinate, orchestrate, or delegate a multi-step implementation). It teaches the **delegate-first** playbook that the former Orchestrator agent used: spawn sub-agents to do the work, integrate their patches, verify, and report. + +This is a workflow skill, not an agent: the skill cannot remove tools from the calling agent. The constraints below are rules of the workflow — follow them even though the underlying tools remain available. + +## Mission + +Coordinate implementation by delegating investigation + coding to sub-agents, then integrating their patches into this workspace. + +## Hard rules (delegate-first) + +- **Do not implement features/bugfixes directly in this workspace.** Spawn `exec` (simple) or `plan` (complex) sub-agents and have them complete the work end-to-end. Even though your `file_edit_*` tools are available, treat them as off-limits for this workflow. +- **Do not do broad repo investigation here.** If you need context, spawn an `explore` sub-agent with a narrow prompt to preserve your context window for coordination. +- **Trust `explore` sub-agent reports as authoritative for repo facts** (paths/symbols/callsites). Do not redo the same investigation yourself; only re-check if a report is ambiguous or contradicts other evidence. For correctness claims, an `explore` report counts as having read the referenced files. +- **`bash` is for orchestration only:** `git` / `gh` repo coordination, targeted post-apply verification, and waiting on PR review/CI. Do not use `bash` for file reads/writes, manual code editing, or broad repo exploration. If a direct verification check fails due to a code issue, delegate the fix to `exec`/`plan` instead of patching it yourself. +- **Never read or scan session storage** (`~/.mux/sessions/**`, `~/.mux/sessions/subagent-patches/**`). Treat session storage as internal. Access patches only through `task_apply_git_patch`. +- **Do not call `propose_plan`** from this workflow. If planning is needed, delegate to a `plan` sub-agent. + +## When a plan is present + +If an accepted plan exists in this workspace: + +- Treat it as the source of truth. Paths/symbols/structure were validated during planning — do not routinely spawn `explore` to re-confirm them. Exception: if the plan references stale paths, one targeted `explore` to sanity-check critical paths is acceptable. +- Spawning `explore` for _additional_ context beyond the plan (existing helpers, test locations, patterns to match) is encouraged — this produces better implementation task briefs. +- Do not spawn `explore` just to verify a planner-generated plan; that was the planner's job. +- Convert the plan into concrete implementation subtasks and start delegation. + +## Delegation guide + +- **`explore`** — narrowly-scoped read-only questions (confirm an assumption, locate a symbol/callsite, find relevant tests). Avoid "scan the repo" prompts. +- **`exec`** — straightforward, low-complexity implementation where the path is obvious from the brief. Good fit: single-file edits, localized wiring to existing helpers, narrowly scoped follow-ups with clear acceptance. +- **`plan`** — higher-complexity subtasks that touch multiple files, require non-trivial investigation, or have an unclear approach. Default to `plan` when a subtask needs coordinated updates across multiple locations unless the edits are mechanical and fully specified. Plan subtasks automatically hand off to implementation after a successful `propose_plan`. +- **`desktop`** — GUI-heavy desktop automation requiring repeated screenshot → act → verify loops. + +## Task brief template (Orchestrate → Exec) + +- Task: +- Background (why this matters): + - +- Scope / non-goals: + - Scope: + - Non-goals: +- Starting points: +- Dependencies / assumptions: + - Assumes: + - If unmet: stop and report back; do not expand scope to create prerequisites. +- Acceptance: +- Deliverables: + - Commits: + - Verification: +- Constraints: + - Do not expand scope. + - Prefer `explore` tasks for repo investigation (paths/symbols/tests/patterns) to preserve your context window for implementation. Trust Explore reports as authoritative; do not re-verify unless ambiguous/contradictory. If starting points + acceptance are already clear, skip initial explore and only explore when blocked. + - Create one or more git commits before `agent_report`. + +For `plan` briefs, prioritize goal + constraints + acceptance criteria over file-by-file diff instructions. + +## Dependency analysis (required before spawning implementation tasks) + +For each candidate subtask, write: + +- **Outputs:** files/targets/artifacts introduced/renamed/generated. +- **Inputs / prerequisites** (including for verification): what must already exist. + +A subtask is "independent" only if its patch can be applied + verified on the current parent workspace HEAD, without any other pending patch. + +**Parallelism is the default.** Maximize the size of each independent batch and run it in parallel. Use the sequential protocol only when a subtask has a concrete prerequisite on another subtask's outputs. + +If task B depends on outputs from task A: + +- Do not spawn B until A has completed **and A's patch is applied** in the parent workspace. +- If the dependency chain is tight (download → generate → wire-up), prefer one `exec` task rather than splitting. + +Example dependency chain (schema download → generation): + +- Task A outputs: a new download target + new schema files. +- Task B inputs: those schema files; verifies by running generation. +- Therefore: run Task A (await + apply patch) before spawning Task B. + +## Patch integration loop (default) + +1. Identify a batch of independent subtasks. +2. Spawn one implementation sub-agent task per subtask with `run_in_background: true` (`exec` for low complexity, `plan` for higher complexity). +3. Await the batch via `task_await`. +4. For each successful implementation task (`exec` directly, or `plan` after auto-handoff to implementation), integrate patches **one at a time**: + - Treat every successful child task with a `taskId` as pending patch integration, whether the completion arrived inline from `task` or later from `task_await`. + - Complete each dry-run + real-apply pair before starting the next patch. Applying one patch changes `HEAD`, which can invalidate later dry-run results. + - Dry-run apply: `task_apply_git_patch` with `dry_run: true`. + - If dry-run succeeds, immediately apply for real: `task_apply_git_patch` with `dry_run: false`. + - Do not assume an inline `status: completed` result means the child changes are already present in this workspace. + - If dry-run fails, treat it as a patch conflict and delegate reconciliation: + 1. Do not attempt a real apply for that patch in this workspace. + 2. Spawn a dedicated `exec` task. In the brief, include the original failing `task_id` and instruct the sub-agent to replay that patch via `task_apply_git_patch`, resolve conflicts in its own workspace, run `git am --continue`, commit the resolved result, and report back with a new patch to apply cleanly. + - If real apply fails unexpectedly: + 1. Restore a clean working tree before delegating: run `git am --abort` via `bash` only when a git-am session is in progress; if abort reports no operation in progress, continue. + 2. Then follow the same delegated reconciliation flow above. +5. Verify + review: + - Run focused verification directly with `bash` when practical (targeted tests or the repo's standard full-validation command), or delegate verification to `explore`/`exec` when investigation/fixes are likely. + - Use `git`/`gh` directly for PR orchestration when a PR already exists (pushes, review-request comments, replies to review remarks, and CI/check-status waiting loops). Create a new PR only when the user explicitly asks. + - PASS: summary-only (no long logs). + - FAIL: include the failing command + key error lines; then delegate a fix to `exec`/`plan` and re-verify. + +## Sequential protocol (only for dependency chains) + +1. Spawn the prerequisite implementation task (`exec` or `plan`, based on complexity) with `run_in_background: false`. +2. If step 1 returns `queued`/`running` without a completed report, call `task_await` with the returned `taskId` before attempting any patch apply. If step 1 returns `status: completed` inline, that same `taskId` still requires patch application. +3. Dry-run apply its patch (`dry_run: true`); then apply for real (`dry_run: false`). If either step fails, follow the conflict playbook above (including `git am --abort` only when a real apply leaves a git-am session in progress). +4. Only then spawn the dependent task. + +## Prerequisites + +- **Max Task Nesting Depth must be ≥ 1** (Settings → Agents → Task Settings). Without it, `task` calls will fail and orchestration cannot proceed; surface that as the blocker rather than reverting to direct edits. diff --git a/src/node/services/agentSkills/agentSkillsService.test.ts b/src/node/services/agentSkills/agentSkillsService.test.ts index 44b512148e..a70fa56ec1 100644 --- a/src/node/services/agentSkills/agentSkillsService.test.ts +++ b/src/node/services/agentSkills/agentSkillsService.test.ts @@ -256,6 +256,7 @@ describe("agentSkillsService", () => { "init", "mux-diagram", "mux-docs", + "orchestrate", ]); const foo = skills.find((s) => s.name === "foo"); @@ -662,6 +663,7 @@ describe("agentSkillsService", () => { "init", "mux-diagram", "mux-docs", + "orchestrate", ]); const invalidNames = diagnostics.invalidSkills.map((issue) => issue.directoryName).sort(); diff --git a/src/node/services/agentSkills/builtInOrchestrateSkill.test.ts b/src/node/services/agentSkills/builtInOrchestrateSkill.test.ts new file mode 100644 index 0000000000..b3f096f08f --- /dev/null +++ b/src/node/services/agentSkills/builtInOrchestrateSkill.test.ts @@ -0,0 +1,38 @@ +import { describe, expect, test } from "bun:test"; + +import { SkillNameSchema } from "@/common/orpc/schemas"; +import { getBuiltInSkillByName, getBuiltInSkillDescriptors } from "./builtInSkillDefinitions"; + +describe("built-in orchestrate skill", () => { + const name = SkillNameSchema.parse("orchestrate"); + + test("is registered as a built-in skill", () => { + const descriptor = getBuiltInSkillDescriptors().find((d) => d.name === name); + expect(descriptor).toBeDefined(); + expect(descriptor!.scope).toBe("built-in"); + }); + + test("is unadvertised so it stays out of the system-prompt skill index", () => { + // The skill is reachable via `/orchestrate` or `agent_skill_read({ name: "orchestrate" })` + // but does not appear in the advertised skill list that primes the model. + // This keeps the default UX uncluttered while preserving the orchestration workflow + // for users who explicitly want it (see RFC: restore Orchestrator as a hidden skill). + const descriptor = getBuiltInSkillDescriptors().find((d) => d.name === name); + expect(descriptor?.advertise).toBe(false); + }); + + test("body documents the delegate-first orchestration contract", () => { + // Spot-check load-bearing directives — these are the rules a calling agent must + // follow when /orchestrate is invoked. We assert their substance (not exact prose) + // so wording can drift without breaking the test, but a wholesale gutting of the + // playbook would still fail. + const pkg = getBuiltInSkillByName(name); + expect(pkg).toBeDefined(); + + const body = pkg!.body; + expect(body).toMatch(/delegate-first/i); + expect(body).toMatch(/task_apply_git_patch/); + expect(body).toMatch(/dry[\s_-]*run/i); + expect(body).toMatch(/Max Task Nesting Depth/i); + }); +}); diff --git a/src/node/services/agentSkills/builtInSkillContent.generated.ts b/src/node/services/agentSkills/builtInSkillContent.generated.ts index 15a28a329b..2b17be672d 100644 --- a/src/node/services/agentSkills/builtInSkillContent.generated.ts +++ b/src/node/services/agentSkills/builtInSkillContent.generated.ts @@ -6668,4 +6668,128 @@ export const BUILTIN_SKILL_FILES: Record> = { "", ].join("\n"), }, + orchestrate: { + "SKILL.md": [ + "---", + "name: orchestrate", + "description: Coordinate sub-agent implementation and apply patches (delegate-first orchestration playbook)", + "advertise: false", + "---", + "", + "# Orchestrate", + "", + "Use this skill when the user invokes `/orchestrate` (or asks you to coordinate, orchestrate, or delegate a multi-step implementation). It teaches the **delegate-first** playbook that the former Orchestrator agent used: spawn sub-agents to do the work, integrate their patches, verify, and report.", + "", + "This is a workflow skill, not an agent: the skill cannot remove tools from the calling agent. The constraints below are rules of the workflow — follow them even though the underlying tools remain available.", + "", + "## Mission", + "", + "Coordinate implementation by delegating investigation + coding to sub-agents, then integrating their patches into this workspace.", + "", + "## Hard rules (delegate-first)", + "", + "- **Do not implement features/bugfixes directly in this workspace.** Spawn `exec` (simple) or `plan` (complex) sub-agents and have them complete the work end-to-end. Even though your `file_edit_*` tools are available, treat them as off-limits for this workflow.", + "- **Do not do broad repo investigation here.** If you need context, spawn an `explore` sub-agent with a narrow prompt to preserve your context window for coordination.", + "- **Trust `explore` sub-agent reports as authoritative for repo facts** (paths/symbols/callsites). Do not redo the same investigation yourself; only re-check if a report is ambiguous or contradicts other evidence. For correctness claims, an `explore` report counts as having read the referenced files.", + "- **`bash` is for orchestration only:** `git` / `gh` repo coordination, targeted post-apply verification, and waiting on PR review/CI. Do not use `bash` for file reads/writes, manual code editing, or broad repo exploration. If a direct verification check fails due to a code issue, delegate the fix to `exec`/`plan` instead of patching it yourself.", + "- **Never read or scan session storage** (`~/.mux/sessions/**`, `~/.mux/sessions/subagent-patches/**`). Treat session storage as internal. Access patches only through `task_apply_git_patch`.", + "- **Do not call `propose_plan`** from this workflow. If planning is needed, delegate to a `plan` sub-agent.", + "", + "## When a plan is present", + "", + "If an accepted plan exists in this workspace:", + "", + "- Treat it as the source of truth. Paths/symbols/structure were validated during planning — do not routinely spawn `explore` to re-confirm them. Exception: if the plan references stale paths, one targeted `explore` to sanity-check critical paths is acceptable.", + "- Spawning `explore` for _additional_ context beyond the plan (existing helpers, test locations, patterns to match) is encouraged — this produces better implementation task briefs.", + "- Do not spawn `explore` just to verify a planner-generated plan; that was the planner's job.", + "- Convert the plan into concrete implementation subtasks and start delegation.", + "", + "## Delegation guide", + "", + '- **`explore`** — narrowly-scoped read-only questions (confirm an assumption, locate a symbol/callsite, find relevant tests). Avoid "scan the repo" prompts.', + "- **`exec`** — straightforward, low-complexity implementation where the path is obvious from the brief. Good fit: single-file edits, localized wiring to existing helpers, narrowly scoped follow-ups with clear acceptance.", + "- **`plan`** — higher-complexity subtasks that touch multiple files, require non-trivial investigation, or have an unclear approach. Default to `plan` when a subtask needs coordinated updates across multiple locations unless the edits are mechanical and fully specified. Plan subtasks automatically hand off to implementation after a successful `propose_plan`.", + "- **`desktop`** — GUI-heavy desktop automation requiring repeated screenshot → act → verify loops.", + "", + "## Task brief template (Orchestrate → Exec)", + "", + "- Task: ", + "- Background (why this matters):", + " - ", + "- Scope / non-goals:", + " - Scope: ", + " - Non-goals: ", + "- Starting points: ", + "- Dependencies / assumptions:", + " - Assumes: ", + " - If unmet: stop and report back; do not expand scope to create prerequisites.", + "- Acceptance: ", + "- Deliverables:", + " - Commits: ", + " - Verification: ", + "- Constraints:", + " - Do not expand scope.", + " - Prefer `explore` tasks for repo investigation (paths/symbols/tests/patterns) to preserve your context window for implementation. Trust Explore reports as authoritative; do not re-verify unless ambiguous/contradictory. If starting points + acceptance are already clear, skip initial explore and only explore when blocked.", + " - Create one or more git commits before `agent_report`.", + "", + "For `plan` briefs, prioritize goal + constraints + acceptance criteria over file-by-file diff instructions.", + "", + "## Dependency analysis (required before spawning implementation tasks)", + "", + "For each candidate subtask, write:", + "", + "- **Outputs:** files/targets/artifacts introduced/renamed/generated.", + "- **Inputs / prerequisites** (including for verification): what must already exist.", + "", + 'A subtask is "independent" only if its patch can be applied + verified on the current parent workspace HEAD, without any other pending patch.', + "", + "**Parallelism is the default.** Maximize the size of each independent batch and run it in parallel. Use the sequential protocol only when a subtask has a concrete prerequisite on another subtask's outputs.", + "", + "If task B depends on outputs from task A:", + "", + "- Do not spawn B until A has completed **and A's patch is applied** in the parent workspace.", + "- If the dependency chain is tight (download → generate → wire-up), prefer one `exec` task rather than splitting.", + "", + "Example dependency chain (schema download → generation):", + "", + "- Task A outputs: a new download target + new schema files.", + "- Task B inputs: those schema files; verifies by running generation.", + "- Therefore: run Task A (await + apply patch) before spawning Task B.", + "", + "## Patch integration loop (default)", + "", + "1. Identify a batch of independent subtasks.", + "2. Spawn one implementation sub-agent task per subtask with `run_in_background: true` (`exec` for low complexity, `plan` for higher complexity).", + "3. Await the batch via `task_await`.", + "4. For each successful implementation task (`exec` directly, or `plan` after auto-handoff to implementation), integrate patches **one at a time**:", + " - Treat every successful child task with a `taskId` as pending patch integration, whether the completion arrived inline from `task` or later from `task_await`.", + " - Complete each dry-run + real-apply pair before starting the next patch. Applying one patch changes `HEAD`, which can invalidate later dry-run results.", + " - Dry-run apply: `task_apply_git_patch` with `dry_run: true`.", + " - If dry-run succeeds, immediately apply for real: `task_apply_git_patch` with `dry_run: false`.", + " - Do not assume an inline `status: completed` result means the child changes are already present in this workspace.", + " - If dry-run fails, treat it as a patch conflict and delegate reconciliation:", + " 1. Do not attempt a real apply for that patch in this workspace.", + " 2. Spawn a dedicated `exec` task. In the brief, include the original failing `task_id` and instruct the sub-agent to replay that patch via `task_apply_git_patch`, resolve conflicts in its own workspace, run `git am --continue`, commit the resolved result, and report back with a new patch to apply cleanly.", + " - If real apply fails unexpectedly:", + " 1. Restore a clean working tree before delegating: run `git am --abort` via `bash` only when a git-am session is in progress; if abort reports no operation in progress, continue.", + " 2. Then follow the same delegated reconciliation flow above.", + "5. Verify + review:", + " - Run focused verification directly with `bash` when practical (targeted tests or the repo's standard full-validation command), or delegate verification to `explore`/`exec` when investigation/fixes are likely.", + " - Use `git`/`gh` directly for PR orchestration when a PR already exists (pushes, review-request comments, replies to review remarks, and CI/check-status waiting loops). Create a new PR only when the user explicitly asks.", + " - PASS: summary-only (no long logs).", + " - FAIL: include the failing command + key error lines; then delegate a fix to `exec`/`plan` and re-verify.", + "", + "## Sequential protocol (only for dependency chains)", + "", + "1. Spawn the prerequisite implementation task (`exec` or `plan`, based on complexity) with `run_in_background: false`.", + "2. If step 1 returns `queued`/`running` without a completed report, call `task_await` with the returned `taskId` before attempting any patch apply. If step 1 returns `status: completed` inline, that same `taskId` still requires patch application.", + "3. Dry-run apply its patch (`dry_run: true`); then apply for real (`dry_run: false`). If either step fails, follow the conflict playbook above (including `git am --abort` only when a real apply leaves a git-am session in progress).", + "4. Only then spawn the dependent task.", + "", + "## Prerequisites", + "", + "- **Max Task Nesting Depth must be ≥ 1** (Settings → Agents → Task Settings). Without it, `task` calls will fail and orchestration cannot proceed; surface that as the blocker rather than reverting to direct edits.", + "", + ].join("\n"), + }, };