coder · ammario · May 9, 2026 · May 6, 2026 · May 6, 2026 · May 6, 2026
diff --git a/docs/hooks/tools.mdx b/docs/hooks/tools.mdx
@@ -552,16 +552,6 @@ If a value is too large for the environment, it may be omitted (not set). Mux al
 
 </details>
 
-<details>
-<summary>propose_name (2)</summary>
-
-| Env var                | JSON path | Type   | Description                                                                                        |
-| ---------------------- | --------- | ------ | -------------------------------------------------------------------------------------------------- |
-| `MUX_TOOL_INPUT_NAME`  | `name`    | string | Codebase area (1-2 words, max 15 chars): lowercase, hyphens only, e.g. 'sidebar', 'auth', 'config' |
-| `MUX_TOOL_INPUT_TITLE` | `title`   | string | Human-readable title (2-5 words): verb-noun format like 'Fix plan mode'                            |
-
-</details>
-
 <details>
 <summary>skills_catalog_read (3)</summary>
 

diff --git a/scripts/gen_docs.ts b/scripts/gen_docs.ts
@@ -675,6 +675,9 @@ function generateToolHookEnvVarsBlock(): string {
   const tools = Object.entries(TOOL_DEFINITIONS).sort(([a], [b]) => a.localeCompare(b));
 
   for (const [toolName, def] of tools) {
+    // Skip internal/bespoke tools (e.g. propose_name, propose_status) — users
+    // can't write hooks for them, so listing their env vars is misleading.
+    if ((def as { internal?: boolean }).internal) continue;
     const vars = collectToolHookEnvVarsFromZodSchema(def.schema);
     if (vars.length === 0) continue;
 

diff --git a/src/browser/components/icons/EmojiIcon/EmojiIcon.tsx b/src/browser/components/icons/EmojiIcon/EmojiIcon.tsx
@@ -62,6 +62,10 @@ const EMOJI_TO_ICON: Record<string, LucideIcon> = {
 
   // Tool-ish / app-ish
   "🔧": Wrench,
+  // 🛠 (hammer-and-wrench) is what small models pick most often for
+  // generic "fixing / building" sidebar status, so we map it to Wrench
+  // alongside 🔧 to avoid the Sparkles fallback.
+  "🛠": Wrench,
   "🔔": Bell,
   "🌐": Globe,
   "📖": BookOpen,

diff --git a/src/browser/stores/WorkspaceStore.test.ts b/src/browser/stores/WorkspaceStore.test.ts
@@ -1,4 +1,4 @@
-import { describe, expect, it, beforeEach, afterEach, mock, type Mock } from "bun:test";
+import { describe, expect, it, beforeEach, afterEach, mock, spyOn, type Mock } from "bun:test";
 import type { DisplayedMessage } from "@/common/types/message";
 import type { FrontendWorkspaceMetadata } from "@/common/types/workspace";
 import type { StreamStartEvent, ToolCallStartEvent } from "@/common/types/stream";
@@ -2591,6 +2591,60 @@ describe("WorkspaceStore", () => {
       expect(state.agentStatus).toEqual({ emoji: "🔄", message: "Run typecheck" });
     });
 
+    it("live todo derivation wins over aggregator getAgentStatus (status_set/heartbeat) for active workspaces", () => {
+      // Codex round 6: aggregator.getAgentStatus() conflates status_set and
+      // muxMeta.displayStatus into one field. A status_set value persisted
+      // from a previous turn could mask a fresh todo_write in the current
+      // turn. Live todo must win.
+      const workspaceId = "active-live-todo-beats-aggregator-status";
+      createAndAddWorkspace(store, workspaceId);
+      seedPinnedTodos(store, workspaceId, [{ content: "Run typecheck", status: "in_progress" }]);
+
+      // Simulate an aggregator that has a non-empty getAgentStatus()
+      // (e.g. an old status_set from a previous turn). The new precedence
+      // must ignore it because the live todo derivation is fresher.
+      const aggregator = store.getAggregator(workspaceId);
+      if (!aggregator) throw new Error("expected aggregator");
+      spyOn(aggregator, "getAgentStatus").mockReturnValue({
+        emoji: "🔍",
+        message: "Investigating crash",
+      });
+
+      const state = store.getWorkspaceState(workspaceId);
+      expect(state.agentStatus).toEqual({ emoji: "🔄", message: "Run typecheck" });
+    });
+
+    it("falls back to persisted AI status for active workspaces with no live todos", async () => {
+      // Live aggregator todos are the freshest signal for "what is the
+      // agent doing right now" because `todo_write` is processed
+      // synchronously, before the async setTodoStatus + activity-emit round
+      // trip. So when the workspace has live todos we prefer those (see
+      // the existing "derives active workspace status from the current todo
+      // list" test). When there are NO live todos, the AI-generated
+      // todoStatus from AgentStatusService still has to surface — that's
+      // the common "free-form chat without a todo list" case.
+      const workspaceId = "active-ai-no-live-todos";
+      const activitySnapshot: WorkspaceActivitySnapshot = {
+        recency: new Date("2024-01-04T13:00:00.000Z").getTime(),
+        streaming: true,
+        lastModel: "claude-sonnet-4",
+        lastThinkingLevel: null,
+        todoStatus: { emoji: "🛠️", message: "AI-generated summary" },
+      };
+
+      mockActivityList.mockResolvedValue({ [workspaceId]: activitySnapshot });
+      recreateStore();
+      await tick(0);
+
+      createAndAddWorkspace(store, workspaceId);
+      // Intentionally no seedPinnedTodos — the aggregator has no todos, so
+      // the live derivation returns undefined and the persisted AI status
+      // must surface through the fallback chain.
+
+      const state = store.getWorkspaceState(workspaceId);
+      expect(state.agentStatus).toEqual(activitySnapshot.todoStatus ?? undefined);
+    });
+
     it("prefers todo-derived activity status for inactive workspaces", async () => {
       const workspaceId = "activity-fallback-todo-status-workspace";
       const activitySnapshot: WorkspaceActivitySnapshot = {
@@ -2634,6 +2688,57 @@ describe("WorkspaceStore", () => {
       expect(state.agentStatus).toEqual(activitySnapshot.displayStatus ?? undefined);
     });
 
+    it("uses todoStatus from the activity snapshot for inactive workspaces", async () => {
+      // todoStatus is the persistent sidebar slot — written by both the
+      // small-model AgentStatusService and the todo-derivation path. Inactive
+      // workspaces don't run the aggregator, so the snapshot's todoStatus is
+      // what the sidebar must show.
+      const workspaceId = "activity-fallback-todo-status-workspace";
+      const activitySnapshot: WorkspaceActivitySnapshot = {
+        recency: new Date("2024-01-04T16:00:00.000Z").getTime(),
+        streaming: false,
+        lastModel: "claude-sonnet-4",
+        lastThinkingLevel: null,
+        todoStatus: { emoji: "🛠️", message: "Wiring sidebar precedence" },
+        hasTodos: true,
+      };
+
+      mockActivityList.mockResolvedValue({ [workspaceId]: activitySnapshot });
+      recreateStore();
+      await tick(0);
+
+      createAndAddWorkspace(store, workspaceId, { createdAt: "2020-01-01T00:00:00.000Z" }, false);
+
+      const state = store.getWorkspaceState(workspaceId);
+      expect(state.agentStatus).toEqual(activitySnapshot.todoStatus ?? undefined);
+    });
+
+    it("keeps displayStatus precedence over todoStatus so explicit system status still wins", async () => {
+      // displayStatus is a deliberate, system-driven signal (e.g. "Compacting
+      // idle workspace…"). It must outrank todoStatus — otherwise a periodic
+      // small-model rewrite of todoStatus would mask the explicit progress
+      // message the backend is trying to communicate.
+      const workspaceId = "activity-fallback-display-over-todo";
+      const activitySnapshot: WorkspaceActivitySnapshot = {
+        recency: new Date("2024-01-04T17:00:00.000Z").getTime(),
+        streaming: false,
+        lastModel: "claude-sonnet-4",
+        lastThinkingLevel: null,
+        displayStatus: { emoji: "💤", message: "Compacting idle workspace" },
+        todoStatus: { emoji: "🛠️", message: "Wiring sidebar precedence" },
+        hasTodos: false,
+      };
+
+      mockActivityList.mockResolvedValue({ [workspaceId]: activitySnapshot });
+      recreateStore();
+      await tick(0);
+
+      createAndAddWorkspace(store, workspaceId, { createdAt: "2020-01-01T00:00:00.000Z" }, false);
+
+      const state = store.getWorkspaceState(workspaceId);
+      expect(state.agentStatus).toEqual(activitySnapshot.displayStatus ?? undefined);
+    });
+
     it("suppresses stale legacy status fallback when activity says the todo list is empty", async () => {
       const workspaceId = "activity-fallback-empty-todo-status";
       const activitySnapshot: WorkspaceActivitySnapshot = {

diff --git a/src/browser/stores/WorkspaceStore.ts b/src/browser/stores/WorkspaceStore.ts
@@ -1745,13 +1745,40 @@ export class WorkspaceStore {
         !transient.caughtUp &&
         !hasRunningInitMessage;
       const aggregatorTodos = aggregator.getCurrentTodos();
+      // Sidebar status precedence, split into four tiers so each signal
+      // wins exactly when it should. Active and inactive workspaces draw
+      // from different sources but resolve through the same priority.
+      //
+      //   1. displayStatus (inactive only): system-driven transient status
+      //      from disk, e.g. "Compacting idle workspace…". Always wins.
+      //   2. liveTodoStatus (active only): the agent's most recent
+      //      `todo_write`, processed synchronously by the aggregator.
+      //      Beats the aggregator's persisted status_set value because
+      //      todo_write is the freshest explicit signal; beats persisted
+      //      todoStatus because the live aggregator state is ahead of
+      //      the async setTodoStatus + activity-emit round-trip.
+      //   3. fallbackAgentStatus (active only): aggregator.getAgentStatus()
+      //      — a blend of heartbeat / idle-compaction / background-turn
+      //      `displayStatus` events (genuinely transient) and the agent's
+      //      own `status_set` tool result (a pinned high-level intent).
+      //      Wins over persisted todoStatus so an AI-generated summary
+      //      doesn't mask an explicit system or agent-set message.
+      //   4. persistedTodoStatus: activity.todoStatus from disk. Either
+      //      a stale todo derivation or an AgentStatusService AI summary —
+      //      both writers target the same slot, last write wins. The
+      //      lowest tier so a newer in-memory signal always preempts.
+      //      For inactive workspaces, `hasTodos === false` blocks the
+      //      legacy aggregator-derive fallback so a freshly cleared todo
+      //      list doesn't briefly resurrect the stale derivation.
       const displayStatus = useAggregatorState ? undefined : (activity?.displayStatus ?? undefined);
-      const todoStatus = useAggregatorState
-        ? (deriveTodoStatus(aggregatorTodos) ?? activity?.todoStatus ?? undefined)
+      const liveTodoStatus = useAggregatorState ? deriveTodoStatus(aggregatorTodos) : undefined;
+      const fallbackAgentStatus = useAggregatorState ? aggregator.getAgentStatus() : undefined;
+      const persistedTodoStatus = useAggregatorState
+        ? (activity?.todoStatus ?? undefined)
         : (activity?.todoStatus ??
           (activity?.hasTodos === false ? undefined : deriveTodoStatus(aggregatorTodos)));
-      const fallbackAgentStatus = useAggregatorState ? aggregator.getAgentStatus() : undefined;
-      const agentStatus = displayStatus ?? todoStatus ?? fallbackAgentStatus;
+      const agentStatus =
+        displayStatus ?? liveTodoStatus ?? fallbackAgentStatus ?? persistedTodoStatus;
 
       return {
         name: metadata?.name ?? workspaceId, // Fall back to ID if metadata missing

diff --git a/src/common/orpc/schemas/workspace.ts b/src/common/orpc/schemas/workspace.ts
@@ -209,7 +209,7 @@ export const WorkspaceActivitySnapshotSchema = z.object({
   }),
   todoStatus: WorkspaceAgentStatusSchema.nullable().optional().meta({
     description:
-      "Status derived from the current todo list (preferred background progress surface in the sidebar).",
+      "Persistent sidebar status. Set by the small-model AgentStatusService when available, with a todo-derived fallback.",
   }),
   hasTodos: z.boolean().optional().meta({
     description: "Whether the workspace still had todos when streaming last stopped",

diff --git a/src/common/utils/tools/toolDefinitions.ts b/src/common/utils/tools/toolDefinitions.ts
@@ -831,6 +831,27 @@ export const ProposeNameToolArgsSchema = z.object({
     .describe("Human-readable title (2-5 words): verb-noun format like 'Fix plan mode'"),
 });
 
+// -----------------------------------------------------------------------------
+// propose_status (sidebar agent status generation)
+// -----------------------------------------------------------------------------
+
+export const ProposeStatusToolArgsSchema = z.object({
+  emoji: z
+    .string()
+    .min(1)
+    .max(8)
+    .describe(
+      "A single emoji that represents the agent's current activity (e.g. '🔍', '🛠️', '🧪', '📝')"
+    ),
+  message: z
+    .string()
+    .min(2)
+    .max(60)
+    .describe(
+      "A short verb-led phrase (2-6 words) describing what the agent is currently working on, in sentence case, no punctuation, no quotes (e.g. 'Investigating crash', 'Implementing sidebar status')"
+    ),
+});
+
 const MuxConfigFileSchema = z.enum(["providers", "config"]);
 
 /**
@@ -1320,11 +1341,23 @@ export const TOOL_DEFINITIONS = {
       "Each question must include 2–4 options; an 'Other' choice is provided automatically.",
     schema: AskUserQuestionToolArgsSchema,
   },
+  // `internal` tools are excluded from user-facing tool docs (hooks/tools.mdx
+  // env-var tables) because users can't write hooks for them — they run via
+  // bespoke streamText paths in their own services, not the standard tool
+  // execution pipeline. See gen_docs.ts.
   propose_name: {
     description:
       "Propose a workspace name and title. You MUST call this tool exactly once with your chosen name and title. " +
       "Do not emit a text response; call this tool immediately.",
     schema: ProposeNameToolArgsSchema,
+    internal: true,
+  },
+  propose_status: {
+    description:
+      "Propose a short sidebar status (emoji + 2-6 word verb-led phrase) summarizing what the agent is currently doing. " +
+      "You MUST call this tool exactly once. Do not emit a text response; call this tool immediately.",
+    schema: ProposeStatusToolArgsSchema,
+    internal: true,
   },
   propose_plan: {
     description:

diff --git a/src/common/utils/tools/tools.ts b/src/common/utils/tools/tools.ts
@@ -431,10 +431,12 @@ export async function getToolsForModel(
     ...(config.advisorRuntime ? { advisor: createAdvisorTool(config) } : {}),
     ask_user_question: createAskUserQuestionTool(config),
     propose_plan: createProposePlanTool(config),
-    // propose_name is intentionally NOT registered here — it's only used by
-    // the internal workspace-naming path (workspaceTitleGenerator.ts) which
-    // creates the tool inline. Exposing it in the default toolset would let
-    // exec-derived agents see its "call me immediately" description.
+    // propose_name and propose_status are intentionally NOT registered here —
+    // they are only used by the internal workspace-naming path
+    // (workspaceTitleGenerator.ts) and the sidebar agent-status path
+    // (workspaceStatusGenerator.ts), which create the tool inline. Exposing
+    // them in the default toolset would let exec-derived agents see their
+    // "call me immediately" descriptions.
     ...(config.enableAgentReport ? { agent_report: createAgentReportTool(config) } : {}),
     switch_agent: createSwitchAgentTool(config),
     todo_write: createTodoWriteTool(config),

diff --git a/src/constants/agentStatus.ts b/src/constants/agentStatus.ts
@@ -0,0 +1,47 @@
+/**
+ * Constants controlling the AI-generated sidebar agent status.
+ *
+ * The status is produced by the same "small model" path used for workspace
+ * title generation. We feed only a trailing window of chat (capped by both
+ * message count and token budget) and skip regeneration whenever the input
+ * is byte-for-byte unchanged.
+ */
+
+/**
+ * Per-workspace regen intervals split four ways: streaming workspaces
+ * (active) refresh much faster so the user can follow the agent in real
+ * time; idle workspaces (no active stream) back off because the chat
+ * isn't moving anyway. Either case backs off further when the desktop
+ * window is blurred.
+ */
+export const AGENT_STATUS_ACTIVE_FOCUSED_INTERVAL_MS = 10 * 1000;
+export const AGENT_STATUS_ACTIVE_UNFOCUSED_INTERVAL_MS = 30 * 1000;
+export const AGENT_STATUS_IDLE_FOCUSED_INTERVAL_MS = 30 * 1000;
+export const AGENT_STATUS_IDLE_UNFOCUSED_INTERVAL_MS = 2 * 60 * 1000;
+
+/**
+ * How often the scheduler wakes up to scan workspaces. Per-workspace cadence
+ * is enforced separately, so this can be small enough to make focus
+ * transitions feel snappy without driving redundant work. With
+ * AGENT_STATUS_MAX_CONCURRENT=1 the per-tick dispatch naturally smooths load
+ * across many workspaces — no separate startup delay needed.
+ */
+export const AGENT_STATUS_TICK_INTERVAL_MS = 10 * 1000;
+
+/** Token budget for the trailing chat-transcript window we feed the model. */
+export const AGENT_STATUS_MAX_TRANSCRIPT_TOKENS = 8000;
+
+/** Cap on the number of trailing messages we pull off disk before token trimming. */
+export const AGENT_STATUS_MAX_TRAILING_MESSAGES = 80;
+
+/**
+ * Cap on per-message text length before tokenization. Bounds pathological
+ * single messages (huge tool outputs) that would otherwise burn the budget.
+ */
+export const AGENT_STATUS_MAX_MESSAGE_CHARS = 4000;
+
+/**
+ * Maximum concurrent model invocations across all workspaces. Keep small so
+ * a multi-workspace sweep doesn't spike provider bills or rate limits.
+ */
+export const AGENT_STATUS_MAX_CONCURRENT = 1;