-
Notifications
You must be signed in to change notification settings - Fork 106
π€ feat: restore Orchestrator as hidden /orchestrate skill #3295
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. Weβll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
ammar-agent
wants to merge
2
commits into
main
Choose a base branch
from
orchestrate-skill
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,48 @@ | ||
| import { describe, expect, test } from "bun:test"; | ||
| import { PLACEHOLDER_TIPS, getPlaceholderTip } from "./placeholderTips"; | ||
|
|
||
| const TWENTY_MIN_MS = 20 * 60 * 1000; | ||
|
|
||
| describe("getPlaceholderTip", () => { | ||
| test("returns the same tip for every call inside a single 20-minute bucket", () => { | ||
| // Anchor at a bucket boundary so any ms within the next 20 min must hash | ||
| // to the same tip. If they don't, switching workspaces / re-rendering | ||
| // inside the same bucket would reshuffle the tip β which is the exact | ||
| // flicker we're trying to prevent. | ||
| const bucketStart = TWENTY_MIN_MS * 100; // arbitrary aligned anchor | ||
| const tip = getPlaceholderTip(bucketStart); | ||
| expect(getPlaceholderTip(bucketStart + 1)).toBe(tip); | ||
| expect(getPlaceholderTip(bucketStart + TWENTY_MIN_MS - 1)).toBe(tip); | ||
| }); | ||
|
|
||
| test("advances to the next tip when the bucket boundary crosses", () => { | ||
| // Crossing the boundary must rotate β otherwise the carousel is silently | ||
| // stuck and the discoverability rationale is broken. | ||
| const bucketStart = TWENTY_MIN_MS * 100; | ||
| const before = getPlaceholderTip(bucketStart); | ||
| const after = getPlaceholderTip(bucketStart + TWENTY_MIN_MS); | ||
| expect(after).not.toBe(before); | ||
| }); | ||
|
|
||
| test("wraps with modulo so long-running clocks never lose the placeholder", () => { | ||
| // Far-future timestamps should still resolve to a tip rather than | ||
| // undefined / out-of-bounds. | ||
| const bigFuture = TWENTY_MIN_MS * PLACEHOLDER_TIPS.length * 5 + TWENTY_MIN_MS * 3; | ||
| expect(PLACEHOLDER_TIPS).toContain(getPlaceholderTip(bigFuture)); | ||
| }); | ||
|
|
||
| test("falls back to the lead tip on non-finite or negative inputs", () => { | ||
| // Defensive: mocked timers, broken clocks, or accidentally-passed | ||
| // sentinels should never produce undefined or throw. | ||
| expect(getPlaceholderTip(-1)).toBe(PLACEHOLDER_TIPS[0]); | ||
| expect(getPlaceholderTip(Number.NaN)).toBe(PLACEHOLDER_TIPS[0]); | ||
| expect(getPlaceholderTip(Number.POSITIVE_INFINITY)).toBe(PLACEHOLDER_TIPS[0]); | ||
| }); | ||
|
|
||
| test("includes a tip surfacing the /orchestrate skill", () => { | ||
| // /orchestrate is unadvertised in the system-prompt skill index, so the | ||
| // tip carousel is one of the few discovery surfaces users will see it | ||
| // on. If this tip disappears the skill becomes effectively invisible. | ||
| expect(PLACEHOLDER_TIPS.some((tip) => tip.includes("/orchestrate"))).toBe(true); | ||
| }); | ||
| }); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,52 @@ | ||
| /** | ||
| * Tip carousel for the ChatInput placeholder. | ||
| * | ||
| * The workspace ChatInput uses these strings as a rotating "Type a message..." | ||
| * placeholder so users who never read docs still get passive exposure to | ||
| * slash commands they probably don't know about β /btw above all. | ||
| * | ||
| * The tip rotates on a wall-clock bucket (not per-message, not per-workspace) | ||
| * so switching between chats never reshuffles the visible tip. Two tabs open | ||
| * to two workspaces show the same tip; close and re-open the app inside the | ||
| * same bucket and you still see the same tip. The bucket boundary is the | ||
| * only thing that advances the carousel. | ||
| */ | ||
|
|
||
| /** Bucket length for tip rotation. */ | ||
| const TIP_ROTATION_INTERVAL_MS = 20 * 60 * 1000; // 20 minutes | ||
|
|
||
| export const PLACEHOLDER_TIPS: readonly string[] = [ | ||
| "Try /btw <question> to ask a side question without nudging the agent", | ||
| "Try /haiku <msg> to send just this message on a different model", | ||
| "Try /+high <msg> to crank up reasoning for this message only", | ||
| "Try /compact to summarize the conversation when context gets tight", | ||
| "Try /fork <start> to branch this chat into a new workspace", | ||
| "Try /plan to view or edit the current plan inline", | ||
| "Try /orchestrate to coordinate sub-agents and integrate their patches", | ||
| "Try /goal <objective> to set a workspace goal with an optional budget", | ||
| "Try /clear --soft to reset context while keeping the chat visible", | ||
| "Try /new <start> to start a fresh workspace from the trunk branch", | ||
| "Try /vim to toggle vim keybindings in the chat input", | ||
| "Try /truncate 50 to drop the oldest half of the conversation", | ||
| ]; | ||
|
|
||
| /** | ||
| * Return the tip for the current wall-clock bucket. | ||
| * | ||
| * The bucket index is `floor(now / 20min)` modulo the tip list, so every | ||
| * caller in the same 20-minute window sees the same tip regardless of | ||
| * workspace, tab, or user-message count. `nowMs` is exposed for testing | ||
| * β production callers should let it default to `Date.now()`. | ||
| * | ||
| * Non-finite or negative inputs fall back to the lead tip so /btw stays | ||
| * the first thing a user sees in degenerate states (clock skew, mocked | ||
| * timers returning weird values, etc.). | ||
| */ | ||
| export function getPlaceholderTip(nowMs: number = Date.now()): string { | ||
| if (!Number.isFinite(nowMs) || nowMs < 0) { | ||
| return PLACEHOLDER_TIPS[0]; | ||
| } | ||
| const bucket = Math.floor(nowMs / TIP_ROTATION_INTERVAL_MS); | ||
| const index = bucket % PLACEHOLDER_TIPS.length; | ||
| return PLACEHOLDER_TIPS[index]; | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,119 @@ | ||
| --- | ||
| name: orchestrate | ||
| description: Coordinate sub-agent implementation and apply patches (delegate-first orchestration playbook) | ||
| advertise: false | ||
|
ammar-agent marked this conversation as resolved.
|
||
| --- | ||
|
|
||
| # Orchestrate | ||
|
|
||
| Use this skill when the user invokes `/orchestrate` (or asks you to coordinate, orchestrate, or delegate a multi-step implementation). It teaches the **delegate-first** playbook that the former Orchestrator agent used: spawn sub-agents to do the work, integrate their patches, verify, and report. | ||
|
|
||
| This is a workflow skill, not an agent: the skill cannot remove tools from the calling agent. The constraints below are rules of the workflow β follow them even though the underlying tools remain available. | ||
|
|
||
| ## Mission | ||
|
|
||
| Coordinate implementation by delegating investigation + coding to sub-agents, then integrating their patches into this workspace. | ||
|
|
||
| ## Hard rules (delegate-first) | ||
|
|
||
| - **Do not implement features/bugfixes directly in this workspace.** Spawn `exec` (simple) or `plan` (complex) sub-agents and have them complete the work end-to-end. Even though your `file_edit_*` tools are available, treat them as off-limits for this workflow. | ||
| - **Do not do broad repo investigation here.** If you need context, spawn an `explore` sub-agent with a narrow prompt to preserve your context window for coordination. | ||
| - **Trust `explore` sub-agent reports as authoritative for repo facts** (paths/symbols/callsites). Do not redo the same investigation yourself; only re-check if a report is ambiguous or contradicts other evidence. For correctness claims, an `explore` report counts as having read the referenced files. | ||
| - **`bash` is for orchestration only:** `git` / `gh` repo coordination, targeted post-apply verification, and waiting on PR review/CI. Do not use `bash` for file reads/writes, manual code editing, or broad repo exploration. If a direct verification check fails due to a code issue, delegate the fix to `exec`/`plan` instead of patching it yourself. | ||
| - **Never read or scan session storage** (`~/.mux/sessions/**`, `~/.mux/sessions/subagent-patches/**`). Treat session storage as internal. Access patches only through `task_apply_git_patch`. | ||
| - **Do not call `propose_plan`** from this workflow. If planning is needed, delegate to a `plan` sub-agent. | ||
|
|
||
| ## When a plan is present | ||
|
|
||
| If an accepted plan exists in this workspace: | ||
|
|
||
| - Treat it as the source of truth. Paths/symbols/structure were validated during planning β do not routinely spawn `explore` to re-confirm them. Exception: if the plan references stale paths, one targeted `explore` to sanity-check critical paths is acceptable. | ||
| - Spawning `explore` for _additional_ context beyond the plan (existing helpers, test locations, patterns to match) is encouraged β this produces better implementation task briefs. | ||
| - Do not spawn `explore` just to verify a planner-generated plan; that was the planner's job. | ||
| - Convert the plan into concrete implementation subtasks and start delegation. | ||
|
|
||
| ## Delegation guide | ||
|
|
||
| - **`explore`** β narrowly-scoped read-only questions (confirm an assumption, locate a symbol/callsite, find relevant tests). Avoid "scan the repo" prompts. | ||
| - **`exec`** β straightforward, low-complexity implementation where the path is obvious from the brief. Good fit: single-file edits, localized wiring to existing helpers, narrowly scoped follow-ups with clear acceptance. | ||
| - **`plan`** β higher-complexity subtasks that touch multiple files, require non-trivial investigation, or have an unclear approach. Default to `plan` when a subtask needs coordinated updates across multiple locations unless the edits are mechanical and fully specified. Plan subtasks automatically hand off to implementation after a successful `propose_plan`. | ||
| - **`desktop`** β GUI-heavy desktop automation requiring repeated screenshot β act β verify loops. | ||
|
|
||
| ## Task brief template (Orchestrate β Exec) | ||
|
|
||
| - Task: <one sentence> | ||
| - Background (why this matters): | ||
| - <bullet> | ||
| - Scope / non-goals: | ||
| - Scope: <what to change> | ||
| - Non-goals: <explicitly out of scope> | ||
| - Starting points: <paths / symbols / callsites> | ||
| - Dependencies / assumptions: | ||
| - Assumes: <prereq patch(es) already applied in parent workspace, or required files/targets already exist> | ||
| - If unmet: stop and report back; do not expand scope to create prerequisites. | ||
| - Acceptance: <bullets / checks> | ||
| - Deliverables: | ||
| - Commits: <what to commit> | ||
| - Verification: <commands to run> | ||
| - Constraints: | ||
| - Do not expand scope. | ||
| - Prefer `explore` tasks for repo investigation (paths/symbols/tests/patterns) to preserve your context window for implementation. Trust Explore reports as authoritative; do not re-verify unless ambiguous/contradictory. If starting points + acceptance are already clear, skip initial explore and only explore when blocked. | ||
| - Create one or more git commits before `agent_report`. | ||
|
|
||
| For `plan` briefs, prioritize goal + constraints + acceptance criteria over file-by-file diff instructions. | ||
|
|
||
| ## Dependency analysis (required before spawning implementation tasks) | ||
|
|
||
| For each candidate subtask, write: | ||
|
|
||
| - **Outputs:** files/targets/artifacts introduced/renamed/generated. | ||
| - **Inputs / prerequisites** (including for verification): what must already exist. | ||
|
|
||
| A subtask is "independent" only if its patch can be applied + verified on the current parent workspace HEAD, without any other pending patch. | ||
|
|
||
| **Parallelism is the default.** Maximize the size of each independent batch and run it in parallel. Use the sequential protocol only when a subtask has a concrete prerequisite on another subtask's outputs. | ||
|
|
||
| If task B depends on outputs from task A: | ||
|
|
||
| - Do not spawn B until A has completed **and A's patch is applied** in the parent workspace. | ||
| - If the dependency chain is tight (download β generate β wire-up), prefer one `exec` task rather than splitting. | ||
|
|
||
| Example dependency chain (schema download β generation): | ||
|
|
||
| - Task A outputs: a new download target + new schema files. | ||
| - Task B inputs: those schema files; verifies by running generation. | ||
| - Therefore: run Task A (await + apply patch) before spawning Task B. | ||
|
|
||
| ## Patch integration loop (default) | ||
|
|
||
| 1. Identify a batch of independent subtasks. | ||
| 2. Spawn one implementation sub-agent task per subtask with `run_in_background: true` (`exec` for low complexity, `plan` for higher complexity). | ||
| 3. Await the batch via `task_await`. | ||
| 4. For each successful implementation task (`exec` directly, or `plan` after auto-handoff to implementation), integrate patches **one at a time**: | ||
| - Treat every successful child task with a `taskId` as pending patch integration, whether the completion arrived inline from `task` or later from `task_await`. | ||
| - Complete each dry-run + real-apply pair before starting the next patch. Applying one patch changes `HEAD`, which can invalidate later dry-run results. | ||
| - Dry-run apply: `task_apply_git_patch` with `dry_run: true`. | ||
| - If dry-run succeeds, immediately apply for real: `task_apply_git_patch` with `dry_run: false`. | ||
| - Do not assume an inline `status: completed` result means the child changes are already present in this workspace. | ||
| - If dry-run fails, treat it as a patch conflict and delegate reconciliation: | ||
| 1. Do not attempt a real apply for that patch in this workspace. | ||
| 2. Spawn a dedicated `exec` task. In the brief, include the original failing `task_id` and instruct the sub-agent to replay that patch via `task_apply_git_patch`, resolve conflicts in its own workspace, run `git am --continue`, commit the resolved result, and report back with a new patch to apply cleanly. | ||
| - If real apply fails unexpectedly: | ||
| 1. Restore a clean working tree before delegating: run `git am --abort` via `bash` only when a git-am session is in progress; if abort reports no operation in progress, continue. | ||
| 2. Then follow the same delegated reconciliation flow above. | ||
| 5. Verify + review: | ||
| - Run focused verification directly with `bash` when practical (targeted tests or the repo's standard full-validation command), or delegate verification to `explore`/`exec` when investigation/fixes are likely. | ||
| - Use `git`/`gh` directly for PR orchestration when a PR already exists (pushes, review-request comments, replies to review remarks, and CI/check-status waiting loops). Create a new PR only when the user explicitly asks. | ||
| - PASS: summary-only (no long logs). | ||
| - FAIL: include the failing command + key error lines; then delegate a fix to `exec`/`plan` and re-verify. | ||
|
|
||
| ## Sequential protocol (only for dependency chains) | ||
|
|
||
| 1. Spawn the prerequisite implementation task (`exec` or `plan`, based on complexity) with `run_in_background: false`. | ||
| 2. If step 1 returns `queued`/`running` without a completed report, call `task_await` with the returned `taskId` before attempting any patch apply. If step 1 returns `status: completed` inline, that same `taskId` still requires patch application. | ||
| 3. Dry-run apply its patch (`dry_run: true`); then apply for real (`dry_run: false`). If either step fails, follow the conflict playbook above (including `git am --abort` only when a real apply leaves a git-am session in progress). | ||
| 4. Only then spawn the dependent task. | ||
|
|
||
| ## Prerequisites | ||
|
|
||
| - **Max Task Nesting Depth must be β₯ 1** (Settings β Agents β Task Settings). Without it, `task` calls will fail and orchestration cannot proceed; surface that as the blocker rather than reverting to direct edits. | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
38 changes: 38 additions & 0 deletions
38
src/node/services/agentSkills/builtInOrchestrateSkill.test.ts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,38 @@ | ||
| import { describe, expect, test } from "bun:test"; | ||
|
|
||
| import { SkillNameSchema } from "@/common/orpc/schemas"; | ||
| import { getBuiltInSkillByName, getBuiltInSkillDescriptors } from "./builtInSkillDefinitions"; | ||
|
|
||
| describe("built-in orchestrate skill", () => { | ||
| const name = SkillNameSchema.parse("orchestrate"); | ||
|
|
||
| test("is registered as a built-in skill", () => { | ||
| const descriptor = getBuiltInSkillDescriptors().find((d) => d.name === name); | ||
| expect(descriptor).toBeDefined(); | ||
| expect(descriptor!.scope).toBe("built-in"); | ||
| }); | ||
|
|
||
| test("is unadvertised so it stays out of the system-prompt skill index", () => { | ||
| // The skill is reachable via `/orchestrate` or `agent_skill_read({ name: "orchestrate" })` | ||
| // but does not appear in the advertised skill list that primes the model. | ||
| // This keeps the default UX uncluttered while preserving the orchestration workflow | ||
| // for users who explicitly want it (see RFC: restore Orchestrator as a hidden skill). | ||
| const descriptor = getBuiltInSkillDescriptors().find((d) => d.name === name); | ||
| expect(descriptor?.advertise).toBe(false); | ||
| }); | ||
|
|
||
| test("body documents the delegate-first orchestration contract", () => { | ||
| // Spot-check load-bearing directives β these are the rules a calling agent must | ||
| // follow when /orchestrate is invoked. We assert their substance (not exact prose) | ||
| // so wording can drift without breaking the test, but a wholesale gutting of the | ||
| // playbook would still fail. | ||
| const pkg = getBuiltInSkillByName(name); | ||
| expect(pkg).toBeDefined(); | ||
|
|
||
| const body = pkg!.body; | ||
| expect(body).toMatch(/delegate-first/i); | ||
| expect(body).toMatch(/task_apply_git_patch/); | ||
| expect(body).toMatch(/dry[\s_-]*run/i); | ||
| expect(body).toMatch(/Max Task Nesting Depth/i); | ||
| }); | ||
| }); |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.