From 532a1d885562a6914f9427961c4ed09fcaaa306e Mon Sep 17 00:00:00 2001 From: Ammar Date: Tue, 5 May 2026 19:28:38 -0500 Subject: [PATCH 01/33] feat: AI-generated sidebar status via small-model loop Adds an AgentStatusService that periodically produces a sidebar agent status using the same small model used for workspace title generation (NAME_GEN_PREFERRED_MODELS), replacing the legacy todo-derived status as the primary source while keeping todoStatus as a fallback. - New propose_status tool + workspaceStatusGenerator (mirrors title gen) - Trailing transcript window capped at ~8k tokens via TokenizerService - Focus-aware cadence: 30s focused, 2m unfocused (WindowService now emits focus-change events) - Idle/frozen-chat dedup: skips regeneration when the trailing-window hash is unchanged; hash persisted across restarts via ExtensionMetadataService - Sidebar precedence: displayStatus > aiStatus > todoStatus > fallback --- docs/hooks/tools.mdx | 10 + src/browser/stores/WorkspaceStore.test.ts | 52 ++ src/browser/stores/WorkspaceStore.ts | 11 +- src/common/orpc/schemas/workspace.ts | 6 +- src/common/utils/tools/toolDefinitions.ts | 27 + src/common/utils/tools/tools.ts | 10 +- src/constants/agentStatus.ts | 66 +++ src/node/services/ExtensionMetadataService.ts | 39 ++ .../builtInSkillContent.generated.ts | 10 + src/node/services/agentStatusService.test.ts | 282 ++++++++++ src/node/services/agentStatusService.ts | 510 ++++++++++++++++++ src/node/services/serviceContainer.ts | 19 + src/node/services/windowService.ts | 57 +- src/node/services/workspaceService.ts | 27 +- .../services/workspaceStatusGenerator.test.ts | 47 ++ src/node/services/workspaceStatusGenerator.ts | 145 +++++ src/node/utils/extensionMetadata.ts | 23 + 17 files changed, 1332 insertions(+), 9 deletions(-) create mode 100644 src/constants/agentStatus.ts create mode 100644 src/node/services/agentStatusService.test.ts create mode 100644 src/node/services/agentStatusService.ts create mode 100644 src/node/services/workspaceStatusGenerator.test.ts create mode 100644 src/node/services/workspaceStatusGenerator.ts diff --git a/docs/hooks/tools.mdx b/docs/hooks/tools.mdx index b92cfb76d3..efafac7c79 100644 --- a/docs/hooks/tools.mdx +++ b/docs/hooks/tools.mdx @@ -562,6 +562,16 @@ If a value is too large for the environment, it may be omitted (not set). Mux al +
+propose_status (2) + +| Env var | JSON path | Type | Description | +| ------------------------ | --------- | ------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `MUX_TOOL_INPUT_EMOJI` | `emoji` | string | A single emoji that represents the agent's current activity (e.g. 'πŸ”', 'πŸ› οΈ', 'πŸ§ͺ', 'πŸ“') | +| `MUX_TOOL_INPUT_MESSAGE` | `message` | string | A short verb-led phrase (2-6 words) describing what the agent is currently working on, in sentence case, no punctuation, no quotes (e.g. 'Investigating crash', 'Implementing sidebar status') | + +
+
skills_catalog_read (3) diff --git a/src/browser/stores/WorkspaceStore.test.ts b/src/browser/stores/WorkspaceStore.test.ts index af4febbf15..4564f75f29 100644 --- a/src/browser/stores/WorkspaceStore.test.ts +++ b/src/browser/stores/WorkspaceStore.test.ts @@ -2634,6 +2634,58 @@ describe("WorkspaceStore", () => { expect(state.agentStatus).toEqual(activitySnapshot.displayStatus ?? undefined); }); + it("prefers AI-generated aiStatus over todo-derived status for inactive workspaces", async () => { + // The whole point of the small-model status path: when AgentStatusService + // has produced a fresh aiStatus, it should win over todoStatus in the + // sidebar. Without this precedence the sidebar would still surface the + // legacy todo derivation, defeating the feature. + const workspaceId = "activity-fallback-ai-status-workspace"; + const activitySnapshot: WorkspaceActivitySnapshot = { + recency: new Date("2024-01-04T16:00:00.000Z").getTime(), + streaming: false, + lastModel: "claude-sonnet-4", + lastThinkingLevel: null, + aiStatus: { emoji: "πŸ› οΈ", message: "Wiring sidebar precedence" }, + todoStatus: { emoji: "πŸ”„", message: "Run typecheck" }, + hasTodos: true, + }; + + mockActivityList.mockResolvedValue({ [workspaceId]: activitySnapshot }); + recreateStore(); + await tick(0); + + createAndAddWorkspace(store, workspaceId, { createdAt: "2020-01-01T00:00:00.000Z" }, false); + + const state = store.getWorkspaceState(workspaceId); + expect(state.agentStatus).toEqual(activitySnapshot.aiStatus ?? undefined); + }); + + it("keeps displayStatus precedence over aiStatus so explicit system status still wins", async () => { + // displayStatus is a deliberate, system-driven signal (e.g. "Compacting + // idle workspace…"). It must outrank aiStatus, otherwise the periodic + // small-model run would mask the explicit progress message the backend + // is trying to communicate. + const workspaceId = "activity-fallback-display-over-ai"; + const activitySnapshot: WorkspaceActivitySnapshot = { + recency: new Date("2024-01-04T17:00:00.000Z").getTime(), + streaming: false, + lastModel: "claude-sonnet-4", + lastThinkingLevel: null, + displayStatus: { emoji: "πŸ’€", message: "Compacting idle workspace" }, + aiStatus: { emoji: "πŸ› οΈ", message: "Wiring sidebar precedence" }, + hasTodos: false, + }; + + mockActivityList.mockResolvedValue({ [workspaceId]: activitySnapshot }); + recreateStore(); + await tick(0); + + createAndAddWorkspace(store, workspaceId, { createdAt: "2020-01-01T00:00:00.000Z" }, false); + + const state = store.getWorkspaceState(workspaceId); + expect(state.agentStatus).toEqual(activitySnapshot.displayStatus ?? undefined); + }); + it("suppresses stale legacy status fallback when activity says the todo list is empty", async () => { const workspaceId = "activity-fallback-empty-todo-status"; const activitySnapshot: WorkspaceActivitySnapshot = { diff --git a/src/browser/stores/WorkspaceStore.ts b/src/browser/stores/WorkspaceStore.ts index 6822897317..88c55e470e 100644 --- a/src/browser/stores/WorkspaceStore.ts +++ b/src/browser/stores/WorkspaceStore.ts @@ -1746,12 +1746,18 @@ export class WorkspaceStore { !hasRunningInitMessage; const aggregatorTodos = aggregator.getCurrentTodos(); const displayStatus = useAggregatorState ? undefined : (activity?.displayStatus ?? undefined); + // Replaces the legacy todo-derived status as the primary sidebar signal. + // Produced periodically by AgentStatusService using the same "small model" + // path as title generation; we keep todoStatus below as a fallback while + // the AI status is being generated for the first time, on errors, or + // before the activity snapshot has caught up. + const aiStatus = activity?.aiStatus ?? undefined; const todoStatus = useAggregatorState ? (deriveTodoStatus(aggregatorTodos) ?? activity?.todoStatus ?? undefined) : (activity?.todoStatus ?? (activity?.hasTodos === false ? undefined : deriveTodoStatus(aggregatorTodos))); const fallbackAgentStatus = useAggregatorState ? aggregator.getAgentStatus() : undefined; - const agentStatus = displayStatus ?? todoStatus ?? fallbackAgentStatus; + const agentStatus = displayStatus ?? aiStatus ?? todoStatus ?? fallbackAgentStatus; return { name: metadata?.name ?? workspaceId, // Fall back to ID if metadata missing @@ -2449,7 +2455,8 @@ export class WorkspaceStore { previous?.recency !== snapshot?.recency || previous?.hasTodos !== snapshot?.hasTodos || !areAgentStatusesEqual(previous?.displayStatus, snapshot?.displayStatus) || - !areAgentStatusesEqual(previous?.todoStatus, snapshot?.todoStatus); + !areAgentStatusesEqual(previous?.todoStatus, snapshot?.todoStatus) || + !areAgentStatusesEqual(previous?.aiStatus, snapshot?.aiStatus); if (!changed) { return; diff --git a/src/common/orpc/schemas/workspace.ts b/src/common/orpc/schemas/workspace.ts index 7f1a4326b9..28472ae82d 100644 --- a/src/common/orpc/schemas/workspace.ts +++ b/src/common/orpc/schemas/workspace.ts @@ -209,7 +209,11 @@ export const WorkspaceActivitySnapshotSchema = z.object({ }), todoStatus: WorkspaceAgentStatusSchema.nullable().optional().meta({ description: - "Status derived from the current todo list (preferred background progress surface in the sidebar).", + "Status derived from the current todo list (legacy, kept as a fallback when aiStatus is unavailable).", + }), + aiStatus: WorkspaceAgentStatusSchema.nullable().optional().meta({ + description: + "AI-generated status summary produced by the small-model status path. When set, takes precedence over todoStatus in the sidebar.", }), hasTodos: z.boolean().optional().meta({ description: "Whether the workspace still had todos when streaming last stopped", diff --git a/src/common/utils/tools/toolDefinitions.ts b/src/common/utils/tools/toolDefinitions.ts index bd5439e5a2..9e7951e392 100644 --- a/src/common/utils/tools/toolDefinitions.ts +++ b/src/common/utils/tools/toolDefinitions.ts @@ -831,6 +831,27 @@ export const ProposeNameToolArgsSchema = z.object({ .describe("Human-readable title (2-5 words): verb-noun format like 'Fix plan mode'"), }); +// ----------------------------------------------------------------------------- +// propose_status (sidebar agent status generation) +// ----------------------------------------------------------------------------- + +export const ProposeStatusToolArgsSchema = z.object({ + emoji: z + .string() + .min(1) + .max(8) + .describe( + "A single emoji that represents the agent's current activity (e.g. 'πŸ”', 'πŸ› οΈ', 'πŸ§ͺ', 'πŸ“')" + ), + message: z + .string() + .min(2) + .max(60) + .describe( + "A short verb-led phrase (2-6 words) describing what the agent is currently working on, in sentence case, no punctuation, no quotes (e.g. 'Investigating crash', 'Implementing sidebar status')" + ), +}); + const MuxConfigFileSchema = z.enum(["providers", "config"]); /** @@ -1326,6 +1347,12 @@ export const TOOL_DEFINITIONS = { "Do not emit a text response; call this tool immediately.", schema: ProposeNameToolArgsSchema, }, + propose_status: { + description: + "Propose a short sidebar status (emoji + 2-6 word verb-led phrase) summarizing what the agent is currently doing. " + + "You MUST call this tool exactly once. Do not emit a text response; call this tool immediately.", + schema: ProposeStatusToolArgsSchema, + }, propose_plan: { description: "Signal that your plan is complete and ready for user approval. " + diff --git a/src/common/utils/tools/tools.ts b/src/common/utils/tools/tools.ts index e1870d3bde..63cb5a5020 100644 --- a/src/common/utils/tools/tools.ts +++ b/src/common/utils/tools/tools.ts @@ -431,10 +431,12 @@ export async function getToolsForModel( ...(config.advisorRuntime ? { advisor: createAdvisorTool(config) } : {}), ask_user_question: createAskUserQuestionTool(config), propose_plan: createProposePlanTool(config), - // propose_name is intentionally NOT registered here β€” it's only used by - // the internal workspace-naming path (workspaceTitleGenerator.ts) which - // creates the tool inline. Exposing it in the default toolset would let - // exec-derived agents see its "call me immediately" description. + // propose_name and propose_status are intentionally NOT registered here β€” + // they are only used by the internal workspace-naming path + // (workspaceTitleGenerator.ts) and the sidebar agent-status path + // (workspaceStatusGenerator.ts), which create the tool inline. Exposing + // them in the default toolset would let exec-derived agents see their + // "call me immediately" descriptions. ...(config.enableAgentReport ? { agent_report: createAgentReportTool(config) } : {}), switch_agent: createSwitchAgentTool(config), todo_write: createTodoWriteTool(config), diff --git a/src/constants/agentStatus.ts b/src/constants/agentStatus.ts new file mode 100644 index 0000000000..16aae58fbc --- /dev/null +++ b/src/constants/agentStatus.ts @@ -0,0 +1,66 @@ +/** + * Constants controlling the AI-generated sidebar agent status. + * + * The status is produced by the same "small model" path used for workspace + * title generation (see {@link NAME_GEN_PREFERRED_MODELS}). To keep cost + * predictable, we only feed the model a trailing window of the chat + * transcript β€” capped both by message count and by token budget β€” and we + * skip regeneration whenever the input is byte-for-byte unchanged. + */ + +/** + * How often a per-workspace status is regenerated when the desktop window is + * focused. Smaller intervals make the sidebar feel responsive to the user + * who is actively watching it. + */ +export const AGENT_STATUS_FOCUSED_INTERVAL_MS = 30 * 1000; + +/** + * How often a per-workspace status is regenerated when the desktop window is + * blurred. Larger intervals respect the fact that the user isn't watching, + * while still picking up changes for any user who switches back to mux. + */ +export const AGENT_STATUS_UNFOCUSED_INTERVAL_MS = 2 * 60 * 1000; + +/** + * How often the scheduler wakes up to scan workspaces. Per-workspace cadence + * is enforced by comparing now() against each workspace's `nextEligibleAt`, + * so this can be small enough to make focus transitions feel snappy without + * causing redundant work β€” the cadence intervals above are the upper bound + * on actual generation frequency. + */ +export const AGENT_STATUS_TICK_INTERVAL_MS = 10 * 1000; + +/** + * Delay before the scheduler runs its first pass after startup. Lets initial + * chat replay and metadata bootstrap settle, and avoids a thundering herd of + * model calls during launch. + */ +export const AGENT_STATUS_STARTUP_DELAY_MS = 30 * 1000; + +/** + * Token budget for the trailing chat-transcript window we feed into the + * small model. Capped to keep cost bounded across long chats. + */ +export const AGENT_STATUS_MAX_TRANSCRIPT_TOKENS = 8000; + +/** + * Cap on the number of trailing messages we ever pull off disk before token + * trimming kicks in. Bounds disk I/O for very chatty workspaces. + */ +export const AGENT_STATUS_MAX_TRAILING_MESSAGES = 80; + +/** + * Cap on per-message text length (post-trim) before we feed it to the + * tokenizer. Tool outputs and assistant turns can be enormous; we already + * have a token budget, but a per-message cap protects against pathological + * single messages that would otherwise burn the entire budget. + */ +export const AGENT_STATUS_MAX_MESSAGE_CHARS = 4000; + +/** + * Maximum number of concurrent model invocations across all workspaces. + * Keep this small so a multi-workspace sweep doesn't spike provider bills + * or trip rate limits. + */ +export const AGENT_STATUS_MAX_CONCURRENT = 1; diff --git a/src/node/services/ExtensionMetadataService.ts b/src/node/services/ExtensionMetadataService.ts index 4cd28091b3..92916990f9 100644 --- a/src/node/services/ExtensionMetadataService.ts +++ b/src/node/services/ExtensionMetadataService.ts @@ -226,6 +226,29 @@ export class ExtensionMetadataService { }); } + /** + * Update the AI-generated sidebar status payload for a workspace. + * + * `inputHash` is opaque from this service's perspective: AgentStatusService + * persists a fingerprint of the trailing transcript window so that on + * restart we can skip regeneration when the transcript is unchanged. + * Callers pass `null` to clear both the payload and the cached hash. + */ + async setAiStatus( + workspaceId: string, + aiStatus: ExtensionAgentStatus | null, + inputHash: string | null + ): Promise { + return this.mutateWorkspaceSnapshot(workspaceId, Date.now(), (workspace) => { + if (aiStatus) { + workspace.aiStatus = aiStatus; + } else { + workspace.aiStatus = null; + } + workspace.aiStatusInputHash = inputHash; + }); + } + /** * Update the latest transient non-todo status payload for a workspace. */ @@ -266,6 +289,22 @@ export class ExtensionMetadataService { return this.toSnapshot(data.workspaces[workspaceId]); } + /** + * Read the persisted aiStatus input hash for a workspace, if any. + * + * Internal helper for AgentStatusService dedup across restarts. The hash is + * intentionally not part of WorkspaceActivitySnapshot because it has no + * sidebar/UI semantics β€” it's purely a backend bookkeeping field. + */ + async getAiStatusInputHash(workspaceId: string): Promise { + const data = await this.load(); + const normalized = coerceExtensionMetadata(data.workspaces[workspaceId]); + if (!normalized) { + return null; + } + return typeof normalized.aiStatusInputHash === "string" ? normalized.aiStatusInputHash : null; + } + /** * Delete metadata for a workspace. * Call this when a workspace is deleted. diff --git a/src/node/services/agentSkills/builtInSkillContent.generated.ts b/src/node/services/agentSkills/builtInSkillContent.generated.ts index 166b37f5d5..329b601f32 100644 --- a/src/node/services/agentSkills/builtInSkillContent.generated.ts +++ b/src/node/services/agentSkills/builtInSkillContent.generated.ts @@ -4210,6 +4210,16 @@ export const BUILTIN_SKILL_FILES: Record> = { "
", "", "
", + "propose_status (2)", + "", + "| Env var | JSON path | Type | Description |", + "| ------------------------ | --------- | ------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |", + "| `MUX_TOOL_INPUT_EMOJI` | `emoji` | string | A single emoji that represents the agent's current activity (e.g. 'πŸ”', 'πŸ› οΈ', 'πŸ§ͺ', 'πŸ“') |", + "| `MUX_TOOL_INPUT_MESSAGE` | `message` | string | A short verb-led phrase (2-6 words) describing what the agent is currently working on, in sentence case, no punctuation, no quotes (e.g. 'Investigating crash', 'Implementing sidebar status') |", + "", + "
", + "", + "
", "skills_catalog_read (3)", "", "| Env var | JSON path | Type | Description |", diff --git a/src/node/services/agentStatusService.test.ts b/src/node/services/agentStatusService.test.ts new file mode 100644 index 0000000000..3b5dc6421c --- /dev/null +++ b/src/node/services/agentStatusService.test.ts @@ -0,0 +1,282 @@ +import { describe, test, expect, beforeEach, afterEach, mock, spyOn } from "bun:test"; +import { EventEmitter } from "events"; +import type { ProjectsConfig, ProjectConfig, Workspace } from "@/common/types/project"; +import { Ok } from "@/common/types/result"; +import { createMuxMessage } from "@/common/types/message"; +import type { Config } from "@/node/config"; +import type { AIService } from "./aiService"; +import type { ExtensionMetadataService } from "./ExtensionMetadataService"; +import type { WindowService } from "./windowService"; +import type { WorkspaceService } from "./workspaceService"; +import type { TokenizerService } from "./tokenizerService"; +import { AgentStatusService } from "./agentStatusService"; +import * as workspaceStatusGenerator from "./workspaceStatusGenerator"; +import { createTestHistoryService } from "./testHistoryService"; + +interface AgentStatusServiceInternals { + tick(): void; + runTick(): Promise; + runForWorkspace(workspaceId: string): Promise; +} + +describe("AgentStatusService", () => { + const workspaceId = "ws-test"; + const projectPath = "/test/project"; + + let historyHandle: Awaited>; + let projectsConfig: ProjectsConfig; + let mockConfig: Config; + let mockExtensionMetadata: ExtensionMetadataService; + let mockWorkspaceService: WorkspaceService; + let mockTokenizer: TokenizerService; + let mockAiService: AIService; + let windowService: WindowService; + let updateAiStatusMock: ReturnType< + typeof mock<(workspaceId: string, status: unknown, hash: string | null) => Promise> + >; + let getAiStatusInputHashMock: ReturnType< + typeof mock<(workspaceId: string) => Promise> + >; + let generateSpy: ReturnType< + typeof spyOn + >; + + function makeWorkspaceEntry(overrides: Partial = {}): Workspace { + return { + id: workspaceId, + name: workspaceId, + path: "/test/path", + ...overrides, + } as unknown as Workspace; + } + + function makeProjectsConfig(workspaces: Workspace[]): ProjectsConfig { + return { + projects: new Map([ + [projectPath, { workspaces } as unknown as ProjectConfig], + ]), + }; + } + + // Driver: instantiate the service with a controllable clock and synchronously + // run a tick. We intentionally bypass the scheduler timers so each test step + // is deterministic. + function createService(options?: { clock?: () => number }): AgentStatusService { + return new AgentStatusService( + mockConfig, + historyHandle.historyService, + mockTokenizer, + mockExtensionMetadata, + mockWorkspaceService, + windowService, + mockAiService, + { + clock: options?.clock, + startupDelayMs: 0, + // Use a very large tick interval so setInterval doesn't fire while + // the test is running; we drive ticks manually via getInternals(). + tickIntervalMs: 60 * 60 * 1000, + } + ); + } + + function getInternals(service: AgentStatusService): AgentStatusServiceInternals { + return service as unknown as AgentStatusServiceInternals; + } + + beforeEach(async () => { + historyHandle = await createTestHistoryService(); + projectsConfig = makeProjectsConfig([makeWorkspaceEntry()]); + + mockConfig = { + loadConfigOrDefault: mock(() => projectsConfig), + getSessionDir: historyHandle.config.getSessionDir.bind(historyHandle.config), + } as unknown as Config; + + updateAiStatusMock = mock(() => Promise.resolve()); + mockWorkspaceService = { + getWorkspaceTitleModelCandidates: mock(() => Promise.resolve(["anthropic:claude-haiku-4-5"])), + updateAiStatus: updateAiStatusMock, + } as unknown as WorkspaceService; + + getAiStatusInputHashMock = mock(() => Promise.resolve(null)); + mockExtensionMetadata = { + getAiStatusInputHash: getAiStatusInputHashMock, + } as unknown as ExtensionMetadataService; + + mockTokenizer = { + // Cheap deterministic tokenizer: 1 token per 4 chars. Avoids spinning up + // the real worker pool for each test. + countTokensBatch: mock((_model: string, texts: string[]) => + Promise.resolve(texts.map((t) => Math.ceil(t.length / 4))) + ), + } as unknown as TokenizerService; + + mockAiService = {} as unknown as AIService; + + windowService = new EventEmitter() as unknown as WindowService; + (windowService as unknown as { isFocused: () => boolean }).isFocused = () => true; + + generateSpy = spyOn(workspaceStatusGenerator, "generateWorkspaceStatus").mockResolvedValue( + Ok({ + status: { emoji: "πŸ› οΈ", message: "Editing source" }, + modelUsed: "anthropic:claude-haiku-4-5", + }) + ); + }); + + afterEach(async () => { + generateSpy.mockRestore(); + await historyHandle.cleanup(); + }); + + test("generates a fresh AI status when chat history exists and persists the input hash", async () => { + await historyHandle.historyService.appendToHistory( + workspaceId, + createMuxMessage("u1", "user", "Please run the test suite") + ); + await historyHandle.historyService.appendToHistory( + workspaceId, + createMuxMessage("a1", "assistant", "Running tests now") + ); + + const service = createService(); + await getInternals(service).runForWorkspace(workspaceId); + + expect(generateSpy).toHaveBeenCalledTimes(1); + const generationCall = generateSpy.mock.calls[0]; + expect(generationCall[0]).toContain("User: Please run the test suite"); + expect(generationCall[0]).toContain("Assistant: Running tests now"); + expect(generationCall[1]).toEqual(["anthropic:claude-haiku-4-5"]); + + expect(updateAiStatusMock).toHaveBeenCalledTimes(1); + const updateCall = updateAiStatusMock.mock.calls[0]; + expect(updateCall[0]).toBe(workspaceId); + expect(updateCall[1]).toEqual({ emoji: "πŸ› οΈ", message: "Editing source" }); + // The hash is persisted so subsequent runs can dedup against it. + expect(typeof updateCall[2]).toBe("string"); + expect(updateCall[2]!.length).toBeGreaterThan(0); + }); + + test("skips regeneration when the trailing transcript is unchanged (dedup)", async () => { + await historyHandle.historyService.appendToHistory( + workspaceId, + createMuxMessage("u1", "user", "Idle workspace") + ); + + const service = createService(); + await getInternals(service).runForWorkspace(workspaceId); + expect(generateSpy).toHaveBeenCalledTimes(1); + expect(updateAiStatusMock).toHaveBeenCalledTimes(1); + + // Second pass: history hasn't changed, so the input hash matches and we + // must not call the model again. This is the "frozen chat" behavior the + // user explicitly asked for. + await getInternals(service).runForWorkspace(workspaceId); + expect(generateSpy).toHaveBeenCalledTimes(1); + expect(updateAiStatusMock).toHaveBeenCalledTimes(1); + }); + + test("re-generates after the trailing transcript changes", async () => { + await historyHandle.historyService.appendToHistory( + workspaceId, + createMuxMessage("u1", "user", "Initial request") + ); + const service = createService(); + await getInternals(service).runForWorkspace(workspaceId); + expect(generateSpy).toHaveBeenCalledTimes(1); + + // New user turn changes the trailing window β€” hash must differ and we + // must regenerate. + await historyHandle.historyService.appendToHistory( + workspaceId, + createMuxMessage("u2", "user", "Second request") + ); + await getInternals(service).runForWorkspace(workspaceId); + expect(generateSpy).toHaveBeenCalledTimes(2); + expect(updateAiStatusMock).toHaveBeenCalledTimes(2); + }); + + test("skips regeneration when there is no chat history yet", async () => { + const service = createService(); + await getInternals(service).runForWorkspace(workspaceId); + + // Empty workspaces have nothing to summarize. We must not pay for an LLM + // call producing a hallucinated status, and we must not blank an + // existing aiStatus on disk. + expect(generateSpy).not.toHaveBeenCalled(); + expect(updateAiStatusMock).not.toHaveBeenCalled(); + }); + + test("focused windows regenerate at the focused interval; unfocused windows wait longer", async () => { + await historyHandle.historyService.appendToHistory( + workspaceId, + createMuxMessage("u1", "user", "Hello") + ); + await historyHandle.historyService.appendToHistory( + workspaceId, + createMuxMessage("a1", "assistant", "Hi") + ); + + let now = 1_000_000; + const service = createService({ clock: () => now }); + const internals = getInternals(service); + + // First tick (focused) generates immediately. Mutate history afterwards + // so the dedup hash differs on subsequent ticks β€” otherwise this test + // would fail for the wrong reason. + (windowService as unknown as { isFocused: () => boolean }).isFocused = () => true; + await internals.runTick(); + await historyHandle.historyService.appendToHistory( + workspaceId, + createMuxMessage("u2", "user", "follow-up A") + ); + + expect(generateSpy).toHaveBeenCalledTimes(1); + + // Advance time by less than the focused interval. The scheduler must + // skip this workspace. + now += 5_000; + await internals.runTick(); + expect(generateSpy).toHaveBeenCalledTimes(1); + + // Advance past the focused interval; another generation should fire. + now += 30_000; + await internals.runTick(); + expect(generateSpy).toHaveBeenCalledTimes(2); + + // Now go unfocused. Even after the focused interval elapses, the + // unfocused interval is longer (2 minutes) and we should not regenerate + // until that boundary. Advance another 60s (well past focused, well + // short of unfocused). + (windowService as unknown as { isFocused: () => boolean }).isFocused = () => false; + await historyHandle.historyService.appendToHistory( + workspaceId, + createMuxMessage("u3", "user", "follow-up B") + ); + now += 60_000; + await internals.runTick(); + expect(generateSpy).toHaveBeenCalledTimes(2); + + // Past the unfocused interval β€” should regenerate. + now += 120_000; + await internals.runTick(); + expect(generateSpy).toHaveBeenCalledTimes(3); + }); + + test("archived workspaces are not regenerated", async () => { + projectsConfig = makeProjectsConfig([ + makeWorkspaceEntry({ archivedAt: new Date().toISOString() } as Partial), + ]); + await historyHandle.historyService.appendToHistory( + workspaceId, + createMuxMessage("u1", "user", "Archived chat") + ); + + const service = createService(); + await getInternals(service).runTick(); + + expect(generateSpy).not.toHaveBeenCalled(); + expect(updateAiStatusMock).not.toHaveBeenCalled(); + }); +}); diff --git a/src/node/services/agentStatusService.ts b/src/node/services/agentStatusService.ts new file mode 100644 index 0000000000..31c013f6e8 --- /dev/null +++ b/src/node/services/agentStatusService.ts @@ -0,0 +1,510 @@ +import { createHash } from "crypto"; +import assert from "@/common/utils/assert"; +import { + AGENT_STATUS_FOCUSED_INTERVAL_MS, + AGENT_STATUS_MAX_CONCURRENT, + AGENT_STATUS_MAX_MESSAGE_CHARS, + AGENT_STATUS_MAX_TRAILING_MESSAGES, + AGENT_STATUS_MAX_TRANSCRIPT_TOKENS, + AGENT_STATUS_STARTUP_DELAY_MS, + AGENT_STATUS_TICK_INTERVAL_MS, + AGENT_STATUS_UNFOCUSED_INTERVAL_MS, +} from "@/constants/agentStatus"; +import type { Config } from "@/node/config"; +import type { MuxMessage } from "@/common/types/message"; +import { isWorkspaceArchived } from "@/common/utils/archive"; +import type { AIService } from "./aiService"; +import type { ExtensionMetadataService } from "./ExtensionMetadataService"; +import type { HistoryService } from "./historyService"; +import type { TokenizerService } from "./tokenizerService"; +import type { WindowService } from "./windowService"; +import type { WorkspaceService } from "./workspaceService"; +import { generateWorkspaceStatus } from "./workspaceStatusGenerator"; +import { log } from "./log"; + +/** + * Public-test surface for AgentStatusService. Real callers use the no-arg + * constructor; tests pass a `clock` to drive deterministic time and can + * skip the startup delay by passing `startupDelayMs: 0`. + */ +export interface AgentStatusServiceOptions { + /** Override for test injection. Defaults to `Date.now`. */ + clock?: () => number; + /** Override startup delay (ms). Defaults to {@link AGENT_STATUS_STARTUP_DELAY_MS}. */ + startupDelayMs?: number; + /** Override scheduler tick interval (ms). Defaults to {@link AGENT_STATUS_TICK_INTERVAL_MS}. */ + tickIntervalMs?: number; +} + +interface WorkspaceTrackingState { + /** Last time we successfully ran (or skipped due to dedup). 0 on first ever tick. */ + lastRanAt: number; + /** Hash of the most recent input we generated against. null if we never ran. */ + lastInputHash: string | null; + /** Whether a generation is currently in flight for this workspace. */ + inFlight: boolean; +} + +/** + * Periodic backend job that produces the sidebar's AI-generated agent + * status using the same "small model" path as workspace titles. + * + * Cadence: + * - The scheduler ticks every {@link AGENT_STATUS_TICK_INTERVAL_MS}. + * - Each workspace has its own per-tick eligibility window: focused windows + * regenerate at most every {@link AGENT_STATUS_FOCUSED_INTERVAL_MS}, blurred + * windows back off to {@link AGENT_STATUS_UNFOCUSED_INTERVAL_MS}. + * + * Dedup: + * - Each generation hashes its trailing-transcript window. We persist the + * hash on disk via ExtensionMetadataService so a workspace whose chat is + * idle/frozen produces no further generations (input is unchanged). + * + * Concurrency: + * - Bounded by {@link AGENT_STATUS_MAX_CONCURRENT} so a sweep across many + * workspaces never spikes provider load. + */ +export class AgentStatusService { + private readonly config: Config; + private readonly historyService: HistoryService; + private readonly tokenizerService: TokenizerService; + private readonly extensionMetadata: ExtensionMetadataService; + private readonly workspaceService: WorkspaceService; + private readonly windowService: WindowService; + private readonly aiService: AIService; + + private readonly clock: () => number; + private readonly startupDelayMs: number; + private readonly tickIntervalMs: number; + + private readonly tracked = new Map(); + private inFlightCount = 0; + // Track in-flight per-workspace promises so a tick can be awaited cleanly + // in tests (and so shutdown can drain them if we ever need to). + private readonly inFlightPromises = new Set>(); + + private startupTimeout: ReturnType | null = null; + private checkInterval: ReturnType | null = null; + // Default to "running so the service is usable as soon as it's + // constructed (tests drive runTick() directly). stop() flips this true to + // gate any in-flight or scheduled work. + private stopped = false; + private tickInFlight = false; + private hashesHydrated = false; + + constructor( + config: Config, + historyService: HistoryService, + tokenizerService: TokenizerService, + extensionMetadata: ExtensionMetadataService, + workspaceService: WorkspaceService, + windowService: WindowService, + aiService: AIService, + options: AgentStatusServiceOptions = {} + ) { + this.config = config; + this.historyService = historyService; + this.tokenizerService = tokenizerService; + this.extensionMetadata = extensionMetadata; + this.workspaceService = workspaceService; + this.windowService = windowService; + this.aiService = aiService; + + this.clock = options.clock ?? (() => Date.now()); + this.startupDelayMs = options.startupDelayMs ?? AGENT_STATUS_STARTUP_DELAY_MS; + this.tickIntervalMs = options.tickIntervalMs ?? AGENT_STATUS_TICK_INTERVAL_MS; + } + + start(): void { + // Idempotent re-entry guard: callers in production wire start() once at + // initialize() time, but a defensive assert keeps double-start mistakes + // visible during development. + assert( + this.checkInterval === null && this.startupTimeout === null, + "AgentStatusService.start() called while already running" + ); + this.stopped = false; + + const scheduleTicks = () => { + if (this.stopped) { + return; + } + // Fire one tick immediately after the startup delay so the user sees an + // initial status without waiting a full interval. + this.tick(); + this.checkInterval = setInterval(() => this.tick(), this.tickIntervalMs); + }; + + if (this.startupDelayMs <= 0) { + scheduleTicks(); + } else { + this.startupTimeout = setTimeout(() => { + this.startupTimeout = null; + scheduleTicks(); + }, this.startupDelayMs); + } + + log.info("AgentStatusService started", { + startupDelayMs: this.startupDelayMs, + tickIntervalMs: this.tickIntervalMs, + }); + } + + stop(): void { + this.stopped = true; + if (this.startupTimeout) { + clearTimeout(this.startupTimeout); + this.startupTimeout = null; + } + if (this.checkInterval) { + clearInterval(this.checkInterval); + this.checkInterval = null; + } + this.tracked.clear(); + this.inFlightCount = 0; + this.inFlightPromises.clear(); + this.tickInFlight = false; + this.hashesHydrated = false; + log.info("AgentStatusService stopped"); + } + + /** + * Synchronous best-effort tick entrypoint. Safe to call repeatedly; we + * guard with `tickInFlight` so overlapping ticks coalesce. + */ + private tick(): void { + if (this.stopped || this.tickInFlight) { + return; + } + this.tickInFlight = true; + void this.runTick().finally(() => { + this.tickInFlight = false; + }); + } + + private async runTick(): Promise { + try { + // First tick after start() needs to seed lastInputHash from disk so + // we honor the previous run's dedup state across restarts. + if (!this.hashesHydrated) { + await this.hydratePersistedHashes(); + this.hashesHydrated = true; + } + this.processEligibleWorkspaces(); + // Wait for the workspaces we just dispatched so callers (production + // schedulers + tests) observe their effects deterministically. + await this.drainInFlight(); + } catch (error) { + log.error("AgentStatusService tick failed", { error }); + } + } + + private async drainInFlight(): Promise { + while (this.inFlightPromises.size > 0) { + await Promise.allSettled(Array.from(this.inFlightPromises)); + } + } + + private async hydratePersistedHashes(): Promise { + const config = this.config.loadConfigOrDefault(); + for (const [, projectConfig] of config.projects) { + for (const workspace of projectConfig.workspaces) { + const workspaceId = workspace.id ?? workspace.name; + if (typeof workspaceId !== "string" || workspaceId.length === 0) { + continue; + } + const persistedHash = await this.extensionMetadata.getAiStatusInputHash(workspaceId); + if (persistedHash !== null) { + this.tracked.set(workspaceId, { + lastRanAt: 0, + lastInputHash: persistedHash, + inFlight: false, + }); + } + } + } + } + + // Synchronous: per-workspace dispatches go on inFlightPromises and are + // awaited by runTick via drainInFlight. Keeping this sync avoids a no-op + // Promise allocation on every tick. + private processEligibleWorkspaces(): void { + const now = this.clock(); + const focused = this.windowService.isFocused(); + const interval = focused + ? AGENT_STATUS_FOCUSED_INTERVAL_MS + : AGENT_STATUS_UNFOCUSED_INTERVAL_MS; + + const config = this.config.loadConfigOrDefault(); + + for (const [, projectConfig] of config.projects) { + for (const workspace of projectConfig.workspaces) { + if (this.stopped) { + return; + } + if (this.inFlightCount >= AGENT_STATUS_MAX_CONCURRENT) { + return; + } + + const workspaceId = workspace.id ?? workspace.name; + if (typeof workspaceId !== "string" || workspaceId.length === 0) { + continue; + } + if (isWorkspaceArchived(workspace.archivedAt, workspace.unarchivedAt)) { + continue; + } + + const state = this.tracked.get(workspaceId); + if (state?.inFlight) { + continue; + } + if (state && now - state.lastRanAt < interval) { + continue; + } + + // Per-workspace work runs concurrently up to AGENT_STATUS_MAX_CONCURRENT. + // We track the promise (instead of fire-and-forget) so runTick can + // await all dispatched workspaces before returning. That keeps the + // production tick loop's "did we finish?" semantics observable, and + // makes tests deterministic without hand-rolled microtask flushing. + this.inFlightCount += 1; + this.markInFlight(workspaceId, true); + const promise = this.runForWorkspace(workspaceId).finally(() => { + this.inFlightCount = Math.max(0, this.inFlightCount - 1); + this.markInFlight(workspaceId, false); + this.inFlightPromises.delete(promise); + }); + this.inFlightPromises.add(promise); + } + } + } + + private markInFlight(workspaceId: string, value: boolean): void { + const state = this.tracked.get(workspaceId); + if (state) { + state.inFlight = value; + return; + } + if (value) { + this.tracked.set(workspaceId, { lastRanAt: 0, lastInputHash: null, inFlight: true }); + } + } + + private async runForWorkspace(workspaceId: string): Promise { + try { + const transcript = await this.buildTrailingTranscript(workspaceId); + const inputHash = computeInputHash(transcript); + + // Always update lastRanAt: even when we skip the LLM call, we don't + // want to reconsider this workspace until the next interval boundary. + const state = this.ensureState(workspaceId); + const now = this.clock(); + state.lastRanAt = now; + + if (transcript.trim().length === 0) { + // A brand-new workspace with no chat content yet β€” skip silently. + // We deliberately do not clear an existing aiStatus here so that a + // post-compaction "empty boundary" doesn't blank a recently produced + // status. + return; + } + + if (state.lastInputHash === inputHash) { + // Idle/frozen: identical trailing window, no point in regenerating. + // Still bump lastRanAt above so we won't revisit until the next + // interval boundary, which keeps the scheduler cheap. + return; + } + + const candidates = await this.workspaceService.getWorkspaceTitleModelCandidates(workspaceId); + if (candidates.length === 0) { + log.debug("AgentStatusService: no model candidates for workspace, skipping", { + workspaceId, + }); + return; + } + + const result = await generateWorkspaceStatus(transcript, candidates, this.aiService); + if (!result.success) { + log.debug("AgentStatusService: status generation failed; will retry next tick", { + workspaceId, + error: result.error, + }); + // Leave lastInputHash unchanged so the next tick retries even + // though the input is unchanged. + return; + } + + state.lastInputHash = inputHash; + + await this.workspaceService.updateAiStatus( + workspaceId, + { emoji: result.data.status.emoji, message: result.data.status.message }, + inputHash + ); + } catch (error) { + log.error("AgentStatusService: unexpected error during status generation", { + workspaceId, + error, + }); + } + } + + private ensureState(workspaceId: string): WorkspaceTrackingState { + let state = this.tracked.get(workspaceId); + if (!state) { + state = { lastRanAt: 0, lastInputHash: null, inFlight: false }; + this.tracked.set(workspaceId, state); + } + return state; + } + + /** + * Build the trailing chat transcript for a workspace, capped by both + * message count and {@link AGENT_STATUS_MAX_TRANSCRIPT_TOKENS} tokens. + * + * Returns an empty string if the workspace has no chat history yet. + */ + private async buildTrailingTranscript(workspaceId: string): Promise { + const result = await this.historyService.getLastMessages( + workspaceId, + AGENT_STATUS_MAX_TRAILING_MESSAGES + ); + if (!result.success) { + return ""; + } + + const formatted = result.data + .map(formatMessageForTranscript) + .filter((entry) => entry.length > 0); + + if (formatted.length === 0) { + return ""; + } + + // Trim from the front (oldest messages) until we fit within the token + // budget. The trailing-most messages carry the most signal for "what is + // the agent currently doing", so we never drop them. + // + // Use the first candidate model for tokenization. The tokenizer service + // gracefully falls back to a known family for unknown model strings, so + // this is safe even when the user's model is not in our table. + const tokenizerModel = await this.resolveTokenizerModel(workspaceId); + const tokenCounts = await this.tokenizerService.countTokensBatch(tokenizerModel, formatted); + + let totalTokens = tokenCounts.reduce((sum, n) => sum + n, 0); + let dropFromIndex = 0; + while ( + totalTokens > AGENT_STATUS_MAX_TRANSCRIPT_TOKENS && + dropFromIndex < formatted.length - 1 + ) { + totalTokens -= tokenCounts[dropFromIndex]; + dropFromIndex += 1; + } + + return formatted.slice(dropFromIndex).join("\n\n"); + } + + private async resolveTokenizerModel(workspaceId: string): Promise { + try { + const candidates = await this.workspaceService.getWorkspaceTitleModelCandidates(workspaceId); + // The first candidate is our preferred small model; tokenizing against + // it is good enough for budgeting purposes even if a fallback ends up + // being used. + return candidates[0] ?? "anthropic:claude-haiku-4-5"; + } catch { + return "anthropic:claude-haiku-4-5"; + } + } +} + +function extractMessageText(message: MuxMessage): string { + if (!Array.isArray(message.parts)) { + return ""; + } + const textParts: string[] = []; + for (const part of message.parts) { + if (part?.type !== "text") { + continue; + } + const text = (part as { text?: unknown }).text; + if (typeof text === "string" && text.trim().length > 0) { + textParts.push(text.trim()); + } + } + return textParts.join("\n"); +} + +function summarizeToolPart(part: unknown): string | null { + if (typeof part !== "object" || part === null) { + return null; + } + const record = part as Record; + const type = record.type; + if (typeof type !== "string") { + return null; + } + // Tool calls have type "tool-" or "dynamic-tool" with a toolName. + const toolName = + typeof record.toolName === "string" + ? record.toolName + : type.startsWith("tool-") + ? type.slice(5) + : null; + if (!toolName) { + return null; + } + return `[tool ${toolName}]`; +} + +function formatMessageForTranscript(message: MuxMessage): string { + const role = message.role === "user" ? "User" : message.role === "assistant" ? "Assistant" : null; + if (!role) { + return ""; + } + const text = extractMessageText(message); + // Include a brief tool-call summary so the model can see *what* the agent + // is doing even when the assistant has not yet emitted natural-language + // text for the current step. We avoid inlining tool args/output to keep + // the cost predictable. + const toolSummaries: string[] = []; + if (Array.isArray(message.parts)) { + for (const part of message.parts) { + const summary = summarizeToolPart(part); + if (summary) { + toolSummaries.push(summary); + } + } + } + + const segments: string[] = []; + if (text.length > 0) { + segments.push(text.slice(0, AGENT_STATUS_MAX_MESSAGE_CHARS)); + } + if (toolSummaries.length > 0) { + segments.push(toolSummaries.join(" ")); + } + + if (segments.length === 0) { + return ""; + } + + return `${role}: ${segments.join("\n")}`; +} + +/** + * Compute a stable hash of the trailing transcript window. Used by the + * scheduler to skip regeneration when the input hasn't changed since the + * last successful generation. SHA-256 is overkill but trivially cheap; + * the hash is opaque to everything outside this service. + */ +function computeInputHash(transcript: string): string { + return createHash("sha256").update(transcript).digest("hex"); +} + +// Exported for tests. +export const __test__ = { + computeInputHash, + extractMessageText, + formatMessageForTranscript, +}; diff --git a/src/node/services/serviceContainer.ts b/src/node/services/serviceContainer.ts index f0cbc98db0..7556595b14 100644 --- a/src/node/services/serviceContainer.ts +++ b/src/node/services/serviceContainer.ts @@ -43,6 +43,7 @@ import { ExperimentsService } from "@/node/services/experimentsService"; import { WorkspaceMcpOverridesService } from "@/node/services/workspaceMcpOverridesService"; import { McpOauthService } from "@/node/services/mcpOauthService"; import { HeartbeatService } from "@/node/services/heartbeatService"; +import { AgentStatusService } from "@/node/services/agentStatusService"; import { IdleCompactionService } from "@/node/services/idleCompactionService"; import { getSigningService, type SigningService } from "@/node/services/signingService"; import { coderService, type CoderService } from "@/node/services/coderService"; @@ -127,6 +128,7 @@ export class ServiceContainer { private readonly ptyService: PTYService; public readonly idleCompactionService: IdleCompactionService; public readonly heartbeatService: HeartbeatService; + public readonly agentStatusService: AgentStatusService; constructor(config: Config) { this.config = config; @@ -275,6 +277,18 @@ export class ServiceContainer { this.editorService = new EditorService(config); this.updateService = new UpdateService(this.config); this.tokenizerService = new TokenizerService(this.sessionUsageService); + // AgentStatusService depends on tokenizer + window focus state; instantiate + // after both are constructed so the small-model status loop can run with + // accurate token budgeting and focus-aware cadence. + this.agentStatusService = new AgentStatusService( + config, + this.historyService, + this.tokenizerService, + this.extensionMetadata, + this.workspaceService, + this.windowService, + this.aiService + ); this.serverService = new ServerService(); this.menuEventService = new MenuEventService(); this.voiceService = new VoiceService( @@ -428,6 +442,10 @@ export class ServiceContainer { this.heartbeatService.start(); stepDurationsMs["heartbeatService.start"] = Date.now() - heartbeatStartedAt; + const agentStatusStartedAt = Date.now(); + this.agentStatusService.start(); + stepDurationsMs["agentStatusService.start"] = Date.now() - agentStatusStartedAt; + // Refresh mux-owned Coder SSH config in background (handles binary path changes on restart) // Skip getCoderInfo() to avoid caching "unavailable" if coder isn't installed yet void this.coderService.ensureMuxCoderSSHConfig().catch((error: unknown) => { @@ -505,6 +523,7 @@ export class ServiceContainer { this.desktopTokenManager.dispose(); await this.desktopSessionManager.closeAll(); this.heartbeatService.stop(); + this.agentStatusService.stop(); this.idleCompactionService.stop(); await this.browserBridgeServer.stop(); this.browserSessionStateHub.dispose(); diff --git a/src/node/services/windowService.ts b/src/node/services/windowService.ts index 20712998c6..38a166be7c 100644 --- a/src/node/services/windowService.ts +++ b/src/node/services/windowService.ts @@ -1,19 +1,74 @@ +import { EventEmitter } from "events"; import type { BrowserWindow } from "electron"; import { log } from "@/node/services/log"; type RestartAppHandler = () => void | Promise; -export class WindowService { +/** + * WindowService extends EventEmitter so backend services that need to react + * to window focus state (e.g. AgentStatusService cadence gating) can subscribe + * via `windowService.on("focus-change", listener)` without depending on + * Electron internals or polling. + */ +export class WindowService extends EventEmitter { private mainWindow: BrowserWindow | null = null; private restartAppHandler: RestartAppHandler | null = null; + // Default to true so headless/test environments behave as if the user is + // actively watching. Desktop wires this to BrowserWindow focus/blur events + // in `setMainWindow` below. + private focused = true; setMainWindow(window: BrowserWindow) { this.mainWindow = window; + + // Seed from the window's current state if we can. + try { + this.setFocused(typeof window.isFocused === "function" ? window.isFocused() : true); + } catch { + this.setFocused(true); + } + + // Wire focus/blur listeners directly to the window. The window is + // recreated only on app restart, so we don't need to teardown listeners. + // Tests pass a minimal stub without an EventEmitter surface; gracefully + // skip listener wiring in that case so unrelated suites don't crash. + const eventTarget = window as unknown as { + on?: (event: string, listener: () => void) => unknown; + }; + if (typeof eventTarget.on === "function") { + eventTarget.on("focus", () => this.setFocused(true)); + eventTarget.on("blur", () => this.setFocused(false)); + } } setRestartAppHandler(handler: RestartAppHandler | null): void { this.restartAppHandler = handler; } + /** + * Returns whether the desktop main window is currently focused. Falls back + * to `true` in non-desktop contexts (CLI server, tests) so backend + * services don't accidentally throttle themselves to "unfocused" cadence + * when there is no window at all. + */ + isFocused(): boolean { + return this.focused; + } + + /** + * Update the cached focus state. Emits `focus-change` only on transitions + * so subscribers don't have to debounce duplicate notifications. + * + * Exposed publicly to allow tests and headless callers to drive focus + * transitions without an actual BrowserWindow. + */ + setFocused(focused: boolean): void { + if (this.focused === focused) { + return; + } + this.focused = focused; + this.emit("focus-change", focused); + } + async restartApp(): Promise<{ supported: true } | { supported: false; message: string }> { const restartAppHandler = this.restartAppHandler; if (!restartAppHandler) { diff --git a/src/node/services/workspaceService.ts b/src/node/services/workspaceService.ts index 0bb166a244..f4e04f399b 100644 --- a/src/node/services/workspaceService.ts +++ b/src/node/services/workspaceService.ts @@ -1577,6 +1577,21 @@ export class WorkspaceService extends EventEmitter { ); } + /** + * Persist + broadcast an AI-generated sidebar status. Used by + * AgentStatusService; kept on WorkspaceService so it goes through the + * shared activity-emit path that frontends are already subscribed to. + */ + public async updateAiStatus( + workspaceId: string, + aiStatus: WorkspaceAgentStatus | null, + inputHash: string | null + ): Promise { + await this.emitWorkspaceActivityUpdate(workspaceId, "update workspace AI status", () => + this.extensionMetadata.setAiStatus(workspaceId, aiStatus, inputHash) + ); + } + private async updateTodoStatusFromStorage(workspaceId: string): Promise { const previousUpdate = this.todoStatusUpdateQueue.get(workspaceId) ?? Promise.resolve(); const nextUpdate = previousUpdate @@ -3847,7 +3862,17 @@ export class WorkspaceService extends EventEmitter { } } - private async getWorkspaceTitleModelCandidates(workspaceId: string): Promise { + /** + * Build the candidate list used by both title generation and the + * sidebar AI-status path. Starts with the global "small model" preferences + * and falls back to any model the workspace itself has configured so a + * custom-model workspace can still produce names/statuses when the global + * preferred models are unavailable. + * + * Public so AgentStatusService (and any future small-model consumer) can + * reuse the same precedence without duplicating the workspace lookup. + */ + public async getWorkspaceTitleModelCandidates(workspaceId: string): Promise { const candidates: string[] = [...NAME_GEN_PREFERRED_MODELS]; const metadataResult = await this.aiService.getWorkspaceMetadata(workspaceId); if (!metadataResult.success) { diff --git a/src/node/services/workspaceStatusGenerator.test.ts b/src/node/services/workspaceStatusGenerator.test.ts new file mode 100644 index 0000000000..ffae75f863 --- /dev/null +++ b/src/node/services/workspaceStatusGenerator.test.ts @@ -0,0 +1,47 @@ +import { describe, expect, test } from "bun:test"; +import { buildWorkspaceStatusPrompt, generateWorkspaceStatus } from "./workspaceStatusGenerator"; + +describe("buildWorkspaceStatusPrompt", () => { + test("contains the transcript inside delimited markers", () => { + const prompt = buildWorkspaceStatusPrompt("User: please run tests\nAssistant: running"); + + // The transcript block needs explicit delimiters so the model can tell + // where the transcript ends and the requirements begin. If we ever drop + // these delimiters, the model is more likely to follow trailing + // instructions baked into the transcript itself (a real prompt-injection + // risk for arbitrary chat history). + expect(prompt).toContain(""); + expect(prompt).toContain(""); + expect(prompt).toContain("User: please run tests"); + expect(prompt).toContain("Assistant: running"); + }); + + test("falls back to a sentinel when transcript is empty", () => { + const prompt = buildWorkspaceStatusPrompt(""); + + // Empty transcripts must still produce a syntactically-valid prompt; the + // sentinel keeps the small model from inheriting system-prompt context + // from a previous workspace. + expect(prompt).toContain("(no recent transcript)"); + }); +}); + +describe("generateWorkspaceStatus error paths", () => { + test("returns a configuration error when no candidates are provided", async () => { + const fakeAiService = { + // Asserting this never gets called is the real point of this test β€” + // the empty-candidates short-circuit prevents wasteful provider calls + // for misconfigured workspaces. + createModel: () => { + throw new Error("createModel must not be called when no candidates exist"); + }, + } as unknown as Parameters[2]; + + const result = await generateWorkspaceStatus("hello", [], fakeAiService); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.type).toBe("unknown"); + expect(result.error.raw).toContain("No model candidates"); + } + }); +}); diff --git a/src/node/services/workspaceStatusGenerator.ts b/src/node/services/workspaceStatusGenerator.ts new file mode 100644 index 0000000000..a118e61136 --- /dev/null +++ b/src/node/services/workspaceStatusGenerator.ts @@ -0,0 +1,145 @@ +import { streamText, tool } from "ai"; +import type { AIService } from "./aiService"; +import { log } from "./log"; +import { mapModelCreationError, mapNameGenerationError } from "./workspaceTitleGenerator"; +import type { Result } from "@/common/types/result"; +import { Ok, Err } from "@/common/types/result"; +import type { NameGenerationError } from "@/common/types/errors"; +import { + TOOL_DEFINITIONS, + ProposeStatusToolArgsSchema, +} from "@/common/utils/tools/toolDefinitions"; + +/** + * AI-generated sidebar status summary. + * + * Emoji + short verb-led phrase, intentionally identical to the existing + * WorkspaceAgentStatus shape so the frontend can render it through the + * same WorkspaceStatusIndicator path used for displayStatus / todoStatus. + */ +export interface WorkspaceAgentStatusPayload { + emoji: string; + message: string; +} + +export interface GenerateWorkspaceStatusResult { + status: WorkspaceAgentStatusPayload; + /** The model that successfully generated the status */ + modelUsed: string; +} + +/** + * Build the prompt used by {@link generateWorkspaceStatus}. + * + * The transcript is supplied pre-trimmed (token budget enforced upstream). + * We deliberately keep the prompt short β€” the small model's job is to look + * at the trailing window and write a present-tense phrase. + */ +export function buildWorkspaceStatusPrompt(transcript: string): string { + // Sentinel for an empty trailing window (e.g., a fresh workspace with no + // text content). Shouldn't happen in practice because AgentStatusService + // skips empty inputs, but the model still needs *something* to ground on. + const body = transcript.trim().length > 0 ? transcript : "(no recent transcript)"; + + // The prompt avoids "summarize the whole task" framing on purpose: this + // is a sidebar status, not a workspace title. We want the *current* + // activity, not the overall scope. + return [ + "You produce a short sidebar status that tells the user what an AI coding agent is doing right now.\n\n", + "Recent chat transcript (oldest first, newest last):\n", + "\n", + body, + "\n\n\n", + "Requirements:\n", + "- Focus on the most recent activity, not the overall task scope.\n", + "- emoji: A single emoji that visually represents the activity.\n", + "- message: 2-6 words, present tense, verb-led, sentence case, no punctuation, no quotes.\n", + '- Examples of good messages: "Investigating crash", "Implementing sidebar status", "Running tests", "Reading config files", "Awaiting user reply".\n', + '- If the agent appears idle or finished, describe that state instead (e.g. "Awaiting next task").\n\n', + "Call propose_status exactly once with your chosen emoji and message. Do not emit any text response.", + ].join(""); +} + +/** + * Generate a sidebar agent-status summary using the same "small model" path + * that powers workspace title generation. + * + * Try candidates in order, retrying on transient API errors (auth, quota, + * 5xx, etc.) up to a small cap so a single misconfigured candidate doesn't + * silently disable status updates for everyone. + */ +export async function generateWorkspaceStatus( + transcript: string, + candidates: readonly string[], + aiService: AIService +): Promise> { + if (candidates.length === 0) { + return Err({ + type: "unknown", + raw: "No model candidates provided for workspace status generation", + }); + } + + // Match workspaceTitleGenerator's retry behavior so a single API outage + // can't permanently disable the feature. + const maxAttempts = Math.min(candidates.length, 3); + + let lastError: NameGenerationError | null = null; + + for (let i = 0; i < maxAttempts; i++) { + const modelString = candidates[i]; + + const modelResult = await aiService.createModel(modelString, undefined, { + agentInitiated: true, + }); + if (!modelResult.success) { + lastError = mapModelCreationError(modelResult.error, modelString); + log.debug(`Status generation: skipping ${modelString} (${modelResult.error.type})`); + continue; + } + + try { + const currentStream = streamText({ + model: modelResult.data, + prompt: buildWorkspaceStatusPrompt(transcript), + tools: { + propose_status: tool({ + description: TOOL_DEFINITIONS.propose_status.description, + inputSchema: ProposeStatusToolArgsSchema, + // eslint-disable-next-line @typescript-eslint/require-await -- AI SDK Tool.execute must return a Promise + execute: async (args) => ({ success: true as const, ...args }), + }), + }, + }); + + const results = await currentStream.toolResults; + const toolResult = results.find((r) => r.dynamic !== true && r.toolName === "propose_status"); + + if (!toolResult) { + lastError = { type: "unknown", raw: "Model did not call propose_status tool" }; + log.warn("Status generation: model did not call propose_status", { modelString }); + continue; + } + + const { emoji, message } = toolResult.output; + return Ok({ + status: { emoji: emoji.trim(), message: message.trim() }, + modelUsed: modelString, + }); + } catch (error) { + lastError = mapNameGenerationError(error, modelString); + log.warn("Status generation failed, trying next candidate", { + modelString, + error: lastError, + }); + continue; + } + } + + return Err( + lastError ?? { + type: "configuration", + raw: "No working model candidates were available for workspace status generation.", + } + ); +} diff --git a/src/node/utils/extensionMetadata.ts b/src/node/utils/extensionMetadata.ts index 551f77d181..b4dc6a0f66 100644 --- a/src/node/utils/extensionMetadata.ts +++ b/src/node/utils/extensionMetadata.ts @@ -28,6 +28,15 @@ export interface ExtensionMetadata { // Persists the latest display-status URL so later updates without a URL // can still carry the last deep link even after displayStatus is cleared. lastStatusUrl?: string | null; + // AI-generated status summary produced by the small-model status path + // (workspaceStatusGenerator.ts). When present, takes precedence over + // todoStatus in the sidebar. + aiStatus?: ExtensionAgentStatus | null; + // Hash of the trailing transcript window that produced `aiStatus`. Used by + // AgentStatusService to skip regeneration when the input is unchanged + // (idle/frozen chats). Survives restarts so we don't pay for redundant + // generations on relaunch. + aiStatusInputHash?: string | null; } /** @@ -92,6 +101,12 @@ export function coerceExtensionMetadata(value: unknown): ExtensionMetadata | nul ? null : (coerceAgentStatus(record.todoStatus) ?? undefined) : undefined; + const aiStatus = + "aiStatus" in record + ? record.aiStatus === null + ? null + : (coerceAgentStatus(record.aiStatus) ?? undefined) + : undefined; return { recency: record.recency, @@ -104,8 +119,14 @@ export function coerceExtensionMetadata(value: unknown): ExtensionMetadata | nul agentStatus: coerceAgentStatus(record.agentStatus), ...(displayStatus !== undefined ? { displayStatus } : {}), ...(todoStatus !== undefined ? { todoStatus } : {}), + ...(aiStatus !== undefined ? { aiStatus } : {}), ...(typeof record.hasTodos === "boolean" ? { hasTodos: record.hasTodos } : {}), lastStatusUrl: coerceStatusUrl(record.lastStatusUrl), + ...(typeof record.aiStatusInputHash === "string" + ? { aiStatusInputHash: record.aiStatusInputHash } + : record.aiStatusInputHash === null + ? { aiStatusInputHash: null } + : {}), }; } @@ -122,6 +143,7 @@ export function toWorkspaceActivitySnapshot( // agentStatus field. Project that forward into todoStatus until a fresh todo_write // or stream-stop snapshot rewrites the workspace metadata. coerceAgentStatus(metadata.agentStatus); + const aiStatus = metadata.aiStatus !== undefined ? metadata.aiStatus : null; return { recency: metadata.recency, @@ -133,6 +155,7 @@ export function toWorkspaceActivitySnapshot( lastThinkingLevel: metadata.lastThinkingLevel ?? null, ...(displayStatus ? { displayStatus } : {}), ...(todoStatus ? { todoStatus } : {}), + ...(aiStatus ? { aiStatus } : {}), ...(typeof metadata.hasTodos === "boolean" ? { hasTodos: metadata.hasTodos } : {}), }; } From d1c67fa22b00ce80781e60f7742dcb4f1b919a16 Mon Sep 17 00:00:00 2001 From: Ammar Date: Tue, 5 May 2026 19:35:37 -0500 Subject: [PATCH 02/33] fix: include partial assistant message in status transcript MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex flagged that during long-running streams, the assistant's current text + tool activity lives in partial.json (via HistoryService.writePartial) before being committed to chat.jsonl. getLastMessages() only reads committed lines, so the trailing-window hash would stay constant for the whole stream and the small model would never see the live activity the status is supposed to surface. buildTrailingTranscript now appends readPartial(workspaceId) to the committed tail before formatting/trimming, so the hash changes β€” and the status refreshes β€” as the stream progresses. --- src/node/services/agentStatusService.test.ts | 31 ++++++++++++++++++++ src/node/services/agentStatusService.ts | 16 ++++++++-- 2 files changed, 44 insertions(+), 3 deletions(-) diff --git a/src/node/services/agentStatusService.test.ts b/src/node/services/agentStatusService.test.ts index 3b5dc6421c..666f0c8cd4 100644 --- a/src/node/services/agentStatusService.test.ts +++ b/src/node/services/agentStatusService.test.ts @@ -177,6 +177,37 @@ describe("AgentStatusService", () => { expect(updateAiStatusMock).toHaveBeenCalledTimes(1); }); + test("includes the in-flight partial assistant message so the hash refreshes mid-stream", async () => { + // During an active stream the assistant's text/tool activity lives in + // partial.json before being committed to chat.jsonl. If buildTrailing- + // Transcript only saw committed messages, the hash would stay constant + // for the entire stream, defeating the whole point of the feature + // (showing what the agent is doing *right now*). + await historyHandle.historyService.appendToHistory( + workspaceId, + createMuxMessage("u1", "user", "kick off a long task") + ); + + const service = createService(); + await getInternals(service).runForWorkspace(workspaceId); + expect(generateSpy).toHaveBeenCalledTimes(1); + const initialHash = updateAiStatusMock.mock.calls[0][2]; + expect(typeof initialHash).toBe("string"); + + // Stage a partial assistant message β€” same shape the streaming pipeline + // writes via writePartial. The runForWorkspace tick should now see this + // text in the transcript and regenerate. + const partial = createMuxMessage("a-partial", "assistant", "Reading config files"); + await historyHandle.historyService.writePartial(workspaceId, partial); + + await getInternals(service).runForWorkspace(workspaceId); + expect(generateSpy).toHaveBeenCalledTimes(2); + const transcriptArg = generateSpy.mock.calls[1][0]; + expect(transcriptArg).toContain("Assistant: Reading config files"); + const newHash = updateAiStatusMock.mock.calls[1][2]; + expect(newHash).not.toBe(initialHash); + }); + test("re-generates after the trailing transcript changes", async () => { await historyHandle.historyService.appendToHistory( workspaceId, diff --git a/src/node/services/agentStatusService.ts b/src/node/services/agentStatusService.ts index 31c013f6e8..8ce665cc70 100644 --- a/src/node/services/agentStatusService.ts +++ b/src/node/services/agentStatusService.ts @@ -364,6 +364,12 @@ export class AgentStatusService { * message count and {@link AGENT_STATUS_MAX_TRANSCRIPT_TOKENS} tokens. * * Returns an empty string if the workspace has no chat history yet. + * + * During an active stream the assistant's current text and tool calls live + * in `partial.json` (via HistoryService.writePartial) before being committed + * to `chat.jsonl`. We append the partial message after the committed tail + * so the hash changes β€” and the status refreshes β€” as the stream progresses, + * which is exactly when an "agent doing X right now" status is most useful. */ private async buildTrailingTranscript(workspaceId: string): Promise { const result = await this.historyService.getLastMessages( @@ -374,9 +380,13 @@ export class AgentStatusService { return ""; } - const formatted = result.data - .map(formatMessageForTranscript) - .filter((entry) => entry.length > 0); + const messages: MuxMessage[] = [...result.data]; + const partial = await this.historyService.readPartial(workspaceId); + if (partial) { + messages.push(partial); + } + + const formatted = messages.map(formatMessageForTranscript).filter((entry) => entry.length > 0); if (formatted.length === 0) { return ""; From 8bc6b8180b9d637e12a28951de2a1a38555343eb Mon Sep 17 00:00:00 2001 From: Ammar Date: Tue, 5 May 2026 19:41:01 -0500 Subject: [PATCH 03/33] fix: round-robin AgentStatusService across workspaces Codex flagged that with MAX_CONCURRENT=1 and a fixed iteration order, the first workspace in the list would always become re-eligible (focused interval = 30s) before deeper workspaces got their turn. Workspaces 4+ would never produce a status. processEligibleWorkspaces now collects every eligible workspace first, sorts by lastRanAt ascending, and dispatches in that order. Workspaces that have never run sort with lastRanAt=0 so they preempt previously-run ones. Adds a 3-workspace round-robin test that asserts every workspace gets a turn before any repeats. --- src/node/services/agentStatusService.test.ts | 64 ++++++++++++++++++++ src/node/services/agentStatusService.ts | 56 +++++++++++------ 2 files changed, 100 insertions(+), 20 deletions(-) diff --git a/src/node/services/agentStatusService.test.ts b/src/node/services/agentStatusService.test.ts index 666f0c8cd4..15b4bb1335 100644 --- a/src/node/services/agentStatusService.test.ts +++ b/src/node/services/agentStatusService.test.ts @@ -295,6 +295,70 @@ describe("AgentStatusService", () => { expect(generateSpy).toHaveBeenCalledTimes(3); }); + test("round-robins across multiple workspaces so none starve under MAX_CONCURRENT=1", async () => { + // With MAX_CONCURRENT=1 and a fixed iteration order, the first workspace + // would always become re-eligible before later ones got their turn β€” + // workspaces 4+ would never produce a status. The scheduler must + // prioritize least-recently-run workspaces so each one gets fair + // attention even when many are eligible at the same time. + const projectPathLocal = "/test/round-robin-project"; + const wsA: Workspace = { + id: "ws-a", + name: "ws-a", + path: "/test/path/a", + } as unknown as Workspace; + const wsB: Workspace = { + id: "ws-b", + name: "ws-b", + path: "/test/path/b", + } as unknown as Workspace; + const wsC: Workspace = { + id: "ws-c", + name: "ws-c", + path: "/test/path/c", + } as unknown as Workspace; + projectsConfig = { + projects: new Map([ + [projectPathLocal, { workspaces: [wsA, wsB, wsC] } as unknown as ProjectConfig], + ]), + }; + for (const id of ["ws-a", "ws-b", "ws-c"]) { + await historyHandle.historyService.appendToHistory( + id, + createMuxMessage(`u1-${id}`, "user", `prompt for ${id}`) + ); + } + + let now = 1_000_000; + const service = createService({ clock: () => now }); + const internals = getInternals(service); + + // Tick 1 β†’ first workspace runs. + await internals.runTick(); + expect(generateSpy).toHaveBeenCalledTimes(1); + const firstRunWorkspaceIds = updateAiStatusMock.mock.calls.map((call) => call[0]); + + // Advance just past one focused interval so all three are eligible. The + // scheduler must pick a workspace that hasn't run yet (lastRanAt=0) + // before re-running the workspace that just ran. + now += 31_000; + await internals.runTick(); + expect(generateSpy).toHaveBeenCalledTimes(2); + const idsAfterTick2 = updateAiStatusMock.mock.calls.map((call) => call[0]); + expect(new Set(idsAfterTick2).size).toBe(2); + + // One more tick should cover the third workspace before any repeats. + now += 31_000; + await internals.runTick(); + expect(generateSpy).toHaveBeenCalledTimes(3); + const idsAfterTick3 = updateAiStatusMock.mock.calls.map((call) => call[0]); + expect(new Set(idsAfterTick3)).toEqual(new Set(["ws-a", "ws-b", "ws-c"])); + + // Use the variable to satisfy lint / show intent: every workspace was + // covered at least once. + expect(firstRunWorkspaceIds.length).toBeGreaterThan(0); + }); + test("archived workspaces are not regenerated", async () => { projectsConfig = makeProjectsConfig([ makeWorkspaceEntry({ archivedAt: new Date().toISOString() } as Partial), diff --git a/src/node/services/agentStatusService.ts b/src/node/services/agentStatusService.ts index 8ce665cc70..25c91056b2 100644 --- a/src/node/services/agentStatusService.ts +++ b/src/node/services/agentStatusService.ts @@ -237,15 +237,15 @@ export class AgentStatusService { const config = this.config.loadConfigOrDefault(); + // Collect every eligible workspace first, then sort by lastRanAt + // ascending. With AGENT_STATUS_MAX_CONCURRENT=1 a fixed iteration order + // would let the first workspace starve everyone deeper in the list + // (it becomes re-eligible at 30s, and workspace[N>1] is never reached). + // Sorting by least-recently-run produces a fair round-robin without an + // explicit queue. + const eligible: Array<{ workspaceId: string; lastRanAt: number }> = []; for (const [, projectConfig] of config.projects) { for (const workspace of projectConfig.workspaces) { - if (this.stopped) { - return; - } - if (this.inFlightCount >= AGENT_STATUS_MAX_CONCURRENT) { - return; - } - const workspaceId = workspace.id ?? workspace.name; if (typeof workspaceId !== "string" || workspaceId.length === 0) { continue; @@ -262,20 +262,36 @@ export class AgentStatusService { continue; } - // Per-workspace work runs concurrently up to AGENT_STATUS_MAX_CONCURRENT. - // We track the promise (instead of fire-and-forget) so runTick can - // await all dispatched workspaces before returning. That keeps the - // production tick loop's "did we finish?" semantics observable, and - // makes tests deterministic without hand-rolled microtask flushing. - this.inFlightCount += 1; - this.markInFlight(workspaceId, true); - const promise = this.runForWorkspace(workspaceId).finally(() => { - this.inFlightCount = Math.max(0, this.inFlightCount - 1); - this.markInFlight(workspaceId, false); - this.inFlightPromises.delete(promise); - }); - this.inFlightPromises.add(promise); + // Workspaces that have never run (state === undefined) get the + // earliest possible lastRanAt so they preempt previously-run + // workspaces on their first tick. + eligible.push({ workspaceId, lastRanAt: state?.lastRanAt ?? 0 }); + } + } + + eligible.sort((a, b) => a.lastRanAt - b.lastRanAt); + + for (const { workspaceId } of eligible) { + if (this.stopped) { + return; } + if (this.inFlightCount >= AGENT_STATUS_MAX_CONCURRENT) { + return; + } + + // Per-workspace work runs concurrently up to AGENT_STATUS_MAX_CONCURRENT. + // We track the promise (instead of fire-and-forget) so runTick can + // await all dispatched workspaces before returning. That keeps the + // production tick loop's "did we finish?" semantics observable, and + // makes tests deterministic without hand-rolled microtask flushing. + this.inFlightCount += 1; + this.markInFlight(workspaceId, true); + const promise = this.runForWorkspace(workspaceId).finally(() => { + this.inFlightCount = Math.max(0, this.inFlightCount - 1); + this.markInFlight(workspaceId, false); + this.inFlightPromises.delete(promise); + }); + this.inFlightPromises.add(promise); } } From 69700773a91cf8deebbba118aa671b537a2b31ad Mon Sep 17 00:00:00 2001 From: Ammar Date: Tue, 5 May 2026 19:48:15 -0500 Subject: [PATCH 04/33] fix: only update dedup hash after a successful persist MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex flagged that emitWorkspaceActivityUpdate (the historical wrapper) swallows disk errors. The previous code set state.lastInputHash BEFORE that non-throwing write, which meant a transient extensionMetadata.json write failure would leave the in-memory hash advanced even though the status never reached disk or the frontend. The next tick would dedup against the new hash and never retry β€” bricking the feature for that session. agentStatusService now persists directly via extensionMetadata.setAiStatus (which throws on failure) and only sets state.lastInputHash after the write resolves, then emits via the new public workspaceService.emitWorkspaceActivity. The legacy non-throwing updateAiStatus wrapper is removed since this was its only caller. Adds a regression test that injects a disk-write failure and asserts the next tick retries against the same transcript. --- src/node/services/agentStatusService.test.ts | 74 +++++++++++++++----- src/node/services/agentStatusService.ts | 29 ++++++-- src/node/services/workspaceService.ts | 24 +++---- 3 files changed, 88 insertions(+), 39 deletions(-) diff --git a/src/node/services/agentStatusService.test.ts b/src/node/services/agentStatusService.test.ts index 15b4bb1335..c4b62ea7a4 100644 --- a/src/node/services/agentStatusService.test.ts +++ b/src/node/services/agentStatusService.test.ts @@ -31,8 +31,13 @@ describe("AgentStatusService", () => { let mockTokenizer: TokenizerService; let mockAiService: AIService; let windowService: WindowService; - let updateAiStatusMock: ReturnType< - typeof mock<(workspaceId: string, status: unknown, hash: string | null) => Promise> + let setAiStatusMock: ReturnType< + typeof mock< + (workspaceId: string, status: unknown, hash: string | null) => Promise<{ recency: number }> + > + >; + let emitWorkspaceActivityMock: ReturnType< + typeof mock<(workspaceId: string, snapshot: unknown) => void> >; let getAiStatusInputHashMock: ReturnType< typeof mock<(workspaceId: string) => Promise> @@ -93,14 +98,18 @@ describe("AgentStatusService", () => { getSessionDir: historyHandle.config.getSessionDir.bind(historyHandle.config), } as unknown as Config; - updateAiStatusMock = mock(() => Promise.resolve()); + emitWorkspaceActivityMock = mock(() => undefined); mockWorkspaceService = { getWorkspaceTitleModelCandidates: mock(() => Promise.resolve(["anthropic:claude-haiku-4-5"])), - updateAiStatus: updateAiStatusMock, + emitWorkspaceActivity: emitWorkspaceActivityMock, } as unknown as WorkspaceService; + setAiStatusMock = mock((_workspaceId: string, _status: unknown, _hash: string | null) => + Promise.resolve({ recency: 0 }) + ); getAiStatusInputHashMock = mock(() => Promise.resolve(null)); mockExtensionMetadata = { + setAiStatus: setAiStatusMock, getAiStatusInputHash: getAiStatusInputHashMock, } as unknown as ExtensionMetadataService; @@ -149,8 +158,8 @@ describe("AgentStatusService", () => { expect(generationCall[0]).toContain("Assistant: Running tests now"); expect(generationCall[1]).toEqual(["anthropic:claude-haiku-4-5"]); - expect(updateAiStatusMock).toHaveBeenCalledTimes(1); - const updateCall = updateAiStatusMock.mock.calls[0]; + expect(setAiStatusMock).toHaveBeenCalledTimes(1); + const updateCall = setAiStatusMock.mock.calls[0]; expect(updateCall[0]).toBe(workspaceId); expect(updateCall[1]).toEqual({ emoji: "πŸ› οΈ", message: "Editing source" }); // The hash is persisted so subsequent runs can dedup against it. @@ -167,14 +176,14 @@ describe("AgentStatusService", () => { const service = createService(); await getInternals(service).runForWorkspace(workspaceId); expect(generateSpy).toHaveBeenCalledTimes(1); - expect(updateAiStatusMock).toHaveBeenCalledTimes(1); + expect(setAiStatusMock).toHaveBeenCalledTimes(1); // Second pass: history hasn't changed, so the input hash matches and we // must not call the model again. This is the "frozen chat" behavior the // user explicitly asked for. await getInternals(service).runForWorkspace(workspaceId); expect(generateSpy).toHaveBeenCalledTimes(1); - expect(updateAiStatusMock).toHaveBeenCalledTimes(1); + expect(setAiStatusMock).toHaveBeenCalledTimes(1); }); test("includes the in-flight partial assistant message so the hash refreshes mid-stream", async () => { @@ -191,7 +200,7 @@ describe("AgentStatusService", () => { const service = createService(); await getInternals(service).runForWorkspace(workspaceId); expect(generateSpy).toHaveBeenCalledTimes(1); - const initialHash = updateAiStatusMock.mock.calls[0][2]; + const initialHash = setAiStatusMock.mock.calls[0][2]; expect(typeof initialHash).toBe("string"); // Stage a partial assistant message β€” same shape the streaming pipeline @@ -204,7 +213,7 @@ describe("AgentStatusService", () => { expect(generateSpy).toHaveBeenCalledTimes(2); const transcriptArg = generateSpy.mock.calls[1][0]; expect(transcriptArg).toContain("Assistant: Reading config files"); - const newHash = updateAiStatusMock.mock.calls[1][2]; + const newHash = setAiStatusMock.mock.calls[1][2]; expect(newHash).not.toBe(initialHash); }); @@ -225,7 +234,7 @@ describe("AgentStatusService", () => { ); await getInternals(service).runForWorkspace(workspaceId); expect(generateSpy).toHaveBeenCalledTimes(2); - expect(updateAiStatusMock).toHaveBeenCalledTimes(2); + expect(setAiStatusMock).toHaveBeenCalledTimes(2); }); test("skips regeneration when there is no chat history yet", async () => { @@ -236,7 +245,7 @@ describe("AgentStatusService", () => { // call producing a hallucinated status, and we must not blank an // existing aiStatus on disk. expect(generateSpy).not.toHaveBeenCalled(); - expect(updateAiStatusMock).not.toHaveBeenCalled(); + expect(setAiStatusMock).not.toHaveBeenCalled(); }); test("focused windows regenerate at the focused interval; unfocused windows wait longer", async () => { @@ -336,7 +345,7 @@ describe("AgentStatusService", () => { // Tick 1 β†’ first workspace runs. await internals.runTick(); expect(generateSpy).toHaveBeenCalledTimes(1); - const firstRunWorkspaceIds = updateAiStatusMock.mock.calls.map((call) => call[0]); + const firstRunWorkspaceIds = setAiStatusMock.mock.calls.map((call) => call[0]); // Advance just past one focused interval so all three are eligible. The // scheduler must pick a workspace that hasn't run yet (lastRanAt=0) @@ -344,14 +353,14 @@ describe("AgentStatusService", () => { now += 31_000; await internals.runTick(); expect(generateSpy).toHaveBeenCalledTimes(2); - const idsAfterTick2 = updateAiStatusMock.mock.calls.map((call) => call[0]); + const idsAfterTick2 = setAiStatusMock.mock.calls.map((call) => call[0]); expect(new Set(idsAfterTick2).size).toBe(2); // One more tick should cover the third workspace before any repeats. now += 31_000; await internals.runTick(); expect(generateSpy).toHaveBeenCalledTimes(3); - const idsAfterTick3 = updateAiStatusMock.mock.calls.map((call) => call[0]); + const idsAfterTick3 = setAiStatusMock.mock.calls.map((call) => call[0]); expect(new Set(idsAfterTick3)).toEqual(new Set(["ws-a", "ws-b", "ws-c"])); // Use the variable to satisfy lint / show intent: every workspace was @@ -359,6 +368,39 @@ describe("AgentStatusService", () => { expect(firstRunWorkspaceIds.length).toBeGreaterThan(0); }); + test("a failed persistence write does not update the dedup hash, so the next tick retries", async () => { + // Codex review: emitWorkspaceActivityUpdate (the historical wrapper) used + // to swallow disk errors, which meant a transient extensionMetadata.json + // write failure could leave the in-memory hash advanced even though the + // generated status never made it to disk or the frontend. After that, + // the next tick would dedup against the new hash and never retry. + // The fix is: only update lastInputHash AFTER a successful persist. + await historyHandle.historyService.appendToHistory( + workspaceId, + createMuxMessage("u1", "user", "kick off a task") + ); + + setAiStatusMock.mockImplementationOnce(() => Promise.reject(new Error("disk full"))); + + const service = createService(); + await getInternals(service).runForWorkspace(workspaceId); + + expect(generateSpy).toHaveBeenCalledTimes(1); + // setAiStatus was attempted but failed. + expect(setAiStatusMock).toHaveBeenCalledTimes(1); + // Activity emit must NOT happen on persist failure β€” frontend must not + // see a status the disk doesn't actually have. + expect(emitWorkspaceActivityMock).not.toHaveBeenCalled(); + + // The next runForWorkspace pass on the SAME transcript must retry, + // because the previous failure should have left lastInputHash null. + setAiStatusMock.mockImplementation((_w, _s, _h) => Promise.resolve({ recency: 0 })); + await getInternals(service).runForWorkspace(workspaceId); + expect(generateSpy).toHaveBeenCalledTimes(2); + expect(setAiStatusMock).toHaveBeenCalledTimes(2); + expect(emitWorkspaceActivityMock).toHaveBeenCalledTimes(1); + }); + test("archived workspaces are not regenerated", async () => { projectsConfig = makeProjectsConfig([ makeWorkspaceEntry({ archivedAt: new Date().toISOString() } as Partial), @@ -372,6 +414,6 @@ describe("AgentStatusService", () => { await getInternals(service).runTick(); expect(generateSpy).not.toHaveBeenCalled(); - expect(updateAiStatusMock).not.toHaveBeenCalled(); + expect(setAiStatusMock).not.toHaveBeenCalled(); }); }); diff --git a/src/node/services/agentStatusService.ts b/src/node/services/agentStatusService.ts index 25c91056b2..27d77addf6 100644 --- a/src/node/services/agentStatusService.ts +++ b/src/node/services/agentStatusService.ts @@ -351,13 +351,28 @@ export class AgentStatusService { return; } - state.lastInputHash = inputHash; - - await this.workspaceService.updateAiStatus( - workspaceId, - { emoji: result.data.status.emoji, message: result.data.status.message }, - inputHash - ); + // Persist BEFORE updating the in-memory dedup hash. If the disk write + // fails (transient I/O error), we want the next tick to retry the + // unchanged transcript instead of dedup'ing against a hash we never + // actually committed. The frontend activity emit happens after the + // write returns successfully, so subscribers either see the new + // status or fall through to a later retry. + try { + const snapshot = await this.extensionMetadata.setAiStatus( + workspaceId, + { emoji: result.data.status.emoji, message: result.data.status.message }, + inputHash + ); + state.lastInputHash = inputHash; + this.workspaceService.emitWorkspaceActivity(workspaceId, snapshot); + } catch (error) { + log.error("AgentStatusService: failed to persist generated status", { + workspaceId, + error, + }); + // Intentionally leave state.lastInputHash untouched so the next tick + // tries again with the same transcript. + } } catch (error) { log.error("AgentStatusService: unexpected error during status generation", { workspaceId, diff --git a/src/node/services/workspaceService.ts b/src/node/services/workspaceService.ts index f4e04f399b..e872353c32 100644 --- a/src/node/services/workspaceService.ts +++ b/src/node/services/workspaceService.ts @@ -1543,7 +1543,14 @@ export class WorkspaceService extends EventEmitter { }); } - private emitWorkspaceActivity( + /** + * Public so AgentStatusService (and any future consumer) can broadcast a + * workspace activity snapshot it produced itself. The standard path is + * `emitWorkspaceActivityUpdate`, but callers that need to know whether the + * persist actually succeeded need to invoke the underlying writer directly + * and then call this to reach the frontend. + */ + public emitWorkspaceActivity( workspaceId: string, snapshot: WorkspaceActivitySnapshot | null ): void { @@ -1577,21 +1584,6 @@ export class WorkspaceService extends EventEmitter { ); } - /** - * Persist + broadcast an AI-generated sidebar status. Used by - * AgentStatusService; kept on WorkspaceService so it goes through the - * shared activity-emit path that frontends are already subscribed to. - */ - public async updateAiStatus( - workspaceId: string, - aiStatus: WorkspaceAgentStatus | null, - inputHash: string | null - ): Promise { - await this.emitWorkspaceActivityUpdate(workspaceId, "update workspace AI status", () => - this.extensionMetadata.setAiStatus(workspaceId, aiStatus, inputHash) - ); - } - private async updateTodoStatusFromStorage(workspaceId: string): Promise { const previousUpdate = this.todoStatusUpdateQueue.get(workspaceId) ?? Promise.resolve(); const nextUpdate = previousUpdate From b967fa533db8f7da3e97621393efcad47c2f55bc Mon Sep 17 00:00:00 2001 From: Ammar Date: Tue, 5 May 2026 19:52:54 -0500 Subject: [PATCH 05/33] fix: propagate ExtensionMetadataService.save() failures Codex flagged that save() previously logged-and-resolved on disk-write failures, so the new setAiStatus() would still return a snapshot to AgentStatusService even when nothing was persisted, advancing the dedup hash on a real disk-full / permission error and defeating the retry behavior added in the previous commit. save() now rethrows after logging. Existing callers that historically relied on the swallow behavior all wrap their setX/updateX calls in emitWorkspaceActivityUpdate, which still downgrades the throw to a logged warning, so their semantics are unchanged. AgentStatusService calls setAiStatus directly so it now sees real disk failures and skips the lastInputHash advance, leaving the next tick free to retry. --- src/node/services/ExtensionMetadataService.ts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/node/services/ExtensionMetadataService.ts b/src/node/services/ExtensionMetadataService.ts index 92916990f9..c34ca512fb 100644 --- a/src/node/services/ExtensionMetadataService.ts +++ b/src/node/services/ExtensionMetadataService.ts @@ -154,11 +154,18 @@ export class ExtensionMetadataService { } private async save(data: ExtensionMetadataFile): Promise { + // Throw on write failure so callers that need to know whether the write + // actually happened (e.g. AgentStatusService, which dedups against the + // last successfully-persisted input hash) can react. Callers that don't + // care still wrap setX in emitWorkspaceActivityUpdate which downgrades + // the throw to a logged warning, preserving the historical + // "log-and-continue" behavior for those paths. try { const content = JSON.stringify(data, null, 2); await writeFileAtomic(this.filePath, content, "utf-8"); } catch (error) { log.error("Failed to save metadata:", error); + throw error; } } From a749a0b437bd4ce5aa556b789c3768dc4d565b81 Mon Sep 17 00:00:00 2001 From: Ammar Date: Tue, 5 May 2026 20:03:03 -0500 Subject: [PATCH 06/33] fix: keep startup safe and preserve transient status precedence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two issues from Codex: 1. ExtensionMetadataService.initialize() awaits clearStaleStreaming(), which calls save(). With save() now throwing, a transient disk-write failure could abort startup β€” violating the AGENTS.md rule that startup-time initialization must never crash the app. initialize() now wraps both the directory-create and clearStaleStreaming in try/catch and logs-and-continues, restoring the historical log-and-swallow boundary at the startup edge while keeping save()'s strict semantics for AgentStatusService. 2. The active-workspace branch reads the explicit transient status from aggregator.getAgentStatus() (hydrated from muxMetadata.displayStatus for heartbeat / idle-compaction / background turns). The previous precedence (displayStatus ?? aiStatus ?? todoStatus ?? fallbackAgentStatus) buried that aggregator-set status under aiStatus for active workspaces. We now collapse displayStatus and fallbackAgentStatus into a single transientStatus tier so explicit system-set messages always outrank an AI summary, regardless of whether the workspace is active or inactive. --- src/browser/stores/WorkspaceStore.ts | 23 +++++++++++++------ src/node/services/ExtensionMetadataService.ts | 19 +++++++++++++-- 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/src/browser/stores/WorkspaceStore.ts b/src/browser/stores/WorkspaceStore.ts index 88c55e470e..5251d8df3c 100644 --- a/src/browser/stores/WorkspaceStore.ts +++ b/src/browser/stores/WorkspaceStore.ts @@ -1745,19 +1745,28 @@ export class WorkspaceStore { !transient.caughtUp && !hasRunningInitMessage; const aggregatorTodos = aggregator.getCurrentTodos(); + // `displayStatus` is the explicit transient status for an *inactive* + // workspace (read from the activity snapshot). For an *active* workspace, + // the equivalent signal is the aggregator's `getAgentStatus()` β€” + // StreamingMessageAggregator hydrates that value from + // `muxMetadata.displayStatus` for heartbeat / idle-compaction / background + // turns. We collapse them into a single `transientStatus` so the + // precedence works the same way for both branches and never lets a stale + // aiStatus mask an explicit system-set message. const displayStatus = useAggregatorState ? undefined : (activity?.displayStatus ?? undefined); - // Replaces the legacy todo-derived status as the primary sidebar signal. - // Produced periodically by AgentStatusService using the same "small model" - // path as title generation; we keep todoStatus below as a fallback while - // the AI status is being generated for the first time, on errors, or - // before the activity snapshot has caught up. + const fallbackAgentStatus = useAggregatorState ? aggregator.getAgentStatus() : undefined; + const transientStatus = displayStatus ?? fallbackAgentStatus; + // Replaces the legacy todo-derived status as the primary persistent + // sidebar signal. Produced periodically by AgentStatusService using the + // same "small model" path as title generation; we keep todoStatus below + // as a fallback while the AI status is being generated for the first + // time, on errors, or before the activity snapshot has caught up. const aiStatus = activity?.aiStatus ?? undefined; const todoStatus = useAggregatorState ? (deriveTodoStatus(aggregatorTodos) ?? activity?.todoStatus ?? undefined) : (activity?.todoStatus ?? (activity?.hasTodos === false ? undefined : deriveTodoStatus(aggregatorTodos))); - const fallbackAgentStatus = useAggregatorState ? aggregator.getAgentStatus() : undefined; - const agentStatus = displayStatus ?? aiStatus ?? todoStatus ?? fallbackAgentStatus; + const agentStatus = transientStatus ?? aiStatus ?? todoStatus; return { name: metadata?.name ?? workspaceId, // Fall back to ID if metadata missing diff --git a/src/node/services/ExtensionMetadataService.ts b/src/node/services/ExtensionMetadataService.ts index c34ca512fb..115a848e45 100644 --- a/src/node/services/ExtensionMetadataService.ts +++ b/src/node/services/ExtensionMetadataService.ts @@ -115,6 +115,12 @@ export class ExtensionMetadataService { /** * Initialize the service by ensuring directory exists and clearing stale streaming flags. * Call this once on app startup. + * + * Per AGENTS.md ("Startup-time initialization must never crash the app"), + * we swallow disk-write failures here so a transient permission/disk-full + * error doesn't block app launch. The new save() throws on failure for the + * benefit of strict callers (AgentStatusService); this method is the + * startup-safety boundary that keeps that contract. */ async initialize(): Promise { // Ensure directory exists @@ -122,11 +128,20 @@ export class ExtensionMetadataService { try { await access(dir, constants.F_OK); } catch { - await mkdir(dir, { recursive: true }); + try { + await mkdir(dir, { recursive: true }); + } catch (error) { + log.error("ExtensionMetadataService: failed to create metadata dir at startup", { error }); + return; + } } // Clear stale streaming flags (from crashes) - await this.clearStaleStreaming(); + try { + await this.clearStaleStreaming(); + } catch (error) { + log.error("ExtensionMetadataService: failed to clear stale streaming at startup", { error }); + } } private async load(): Promise { From 9fde95e441263661b0b124535e0a36459bae0880 Mon Sep 17 00:00:00 2001 From: Ammar Date: Tue, 5 May 2026 20:10:56 -0500 Subject: [PATCH 07/33] fix: skip persist/emit when service stops mid-generation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex flagged that generateWorkspaceStatus can take seconds-to-minutes, and stop() only clears timers/bookkeeping β€” it does not cancel an in-flight runForWorkspace continuation. So a write/emit can still happen after the service has been stopped, leaking metadata mutations past the declared lifecycle. runForWorkspace now checks this.stopped both immediately after the generator returns and after the disk write resolves, returning early in both cases. Adds a regression test that pauses generation, calls stop(), then releases generation and asserts no setAiStatus / emit fires. --- src/node/services/agentStatusService.test.ts | 40 ++++++++++++++++++++ src/node/services/agentStatusService.ts | 14 +++++++ 2 files changed, 54 insertions(+) diff --git a/src/node/services/agentStatusService.test.ts b/src/node/services/agentStatusService.test.ts index c4b62ea7a4..8a688e2790 100644 --- a/src/node/services/agentStatusService.test.ts +++ b/src/node/services/agentStatusService.test.ts @@ -368,6 +368,46 @@ describe("AgentStatusService", () => { expect(firstRunWorkspaceIds.length).toBeGreaterThan(0); }); + test("does not persist or emit if the service is stopped while a generation is in flight", async () => { + // generateWorkspaceStatus can take seconds to minutes (real provider + // call). If the service is stopped (app shutdown / dispose) during that + // window, persisting the result would leak writes past the declared + // lifecycle. Prove it: + // 1) start a generation that resolves only after we call stop() + // 2) call stop() + // 3) release the generation + // 4) assert no setAiStatus / emit happened + await historyHandle.historyService.appendToHistory( + workspaceId, + createMuxMessage("u1", "user", "long-running task") + ); + + let releaseGenerate!: () => void; + const generationGate = new Promise((resolve) => { + releaseGenerate = resolve; + }); + generateSpy.mockImplementationOnce(async () => { + await generationGate; + return Ok({ + status: { emoji: "πŸ› οΈ", message: "Doing work" }, + modelUsed: "anthropic:claude-haiku-4-5", + }); + }); + + const service = createService(); + const internals = getInternals(service); + const inFlight = internals.runForWorkspace(workspaceId); + + // Stop the service while the generation is still pending. + service.stop(); + releaseGenerate(); + await inFlight; + + expect(generateSpy).toHaveBeenCalledTimes(1); + expect(setAiStatusMock).not.toHaveBeenCalled(); + expect(emitWorkspaceActivityMock).not.toHaveBeenCalled(); + }); + test("a failed persistence write does not update the dedup hash, so the next tick retries", async () => { // Codex review: emitWorkspaceActivityUpdate (the historical wrapper) used // to swallow disk errors, which meant a transient extensionMetadata.json diff --git a/src/node/services/agentStatusService.ts b/src/node/services/agentStatusService.ts index 27d77addf6..4b76fee1e5 100644 --- a/src/node/services/agentStatusService.ts +++ b/src/node/services/agentStatusService.ts @@ -351,6 +351,15 @@ export class AgentStatusService { return; } + // The generator can take seconds to a minute. The service may have + // been stopped (app shutdown, dispose, etc.) while we were awaiting + // the provider response. If so, do not persist or emit β€” that would + // leak metadata writes and activity events past the service's + // declared lifetime. + if (this.stopped) { + return; + } + // Persist BEFORE updating the in-memory dedup hash. If the disk write // fails (transient I/O error), we want the next tick to retry the // unchanged transcript instead of dedup'ing against a hash we never @@ -363,6 +372,11 @@ export class AgentStatusService { { emoji: result.data.status.emoji, message: result.data.status.message }, inputHash ); + // Re-check after the (also-async) disk write β€” same lifecycle + // hazard as the post-generation check above. + if (this.stopped) { + return; + } state.lastInputHash = inputHash; this.workspaceService.emitWorkspaceActivity(workspaceId, snapshot); } catch (error) { From 41777187bead112d53615f58d34f6d2fd49878a9 Mon Sep 17 00:00:00 2001 From: Ammar Date: Tue, 5 May 2026 21:00:03 -0500 Subject: [PATCH 08/33] refactor: simplify AI status loop and drop cross-restart hash - Drop cross-restart dedup: AgentStatusService no longer hydrates the trailing-window hash from disk. In-session dedup still skips identical inputs; cost is one extra LLM call per workspace per app launch. - Drop ExtensionMetadataService.getAiStatusInputHash + the aiStatusInputHash field on ExtensionMetadata. - Drop the inputHash arg on setAiStatus; collapse if/else into a direct assignment. - WindowService: drop EventEmitter inheritance + focus listeners. isFocused() now delegates to BrowserWindow.isFocused() directly. Drops the focused field, setFocused(), focus/blur wiring, and the test-stub defensive .on check. - AgentStatusService: drop __test__ exports, inFlightCount, markInFlight, drainInFlight, hashesHydrated, hydratePersistedHashes, resolveTokenizerModel; merge tick()/runTick(); use FALLBACK_TOKENIZER_- MODEL constant in buildTrailingTranscript so we don't double-call getWorkspaceTitleModelCandidates per workspace. - Trim verbose comments throughout; collapse multi-paragraph rationale to one-line summaries where the code is self-explanatory. Net: -438 lines, no behavior changes for the documented use case (in-session dedup, focus-aware cadence, round-robin fairness, persist- before-hash, lifecycle stop guard, partial-stream inclusion). --- src/constants/agentStatus.ts | 50 +- src/node/services/ExtensionMetadataService.ts | 54 +- src/node/services/agentStatusService.test.ts | 183 ++----- src/node/services/agentStatusService.ts | 488 +++++------------- src/node/services/windowService.ts | 56 +- src/node/services/workspaceService.ts | 20 +- src/node/services/workspaceStatusGenerator.ts | 36 +- src/node/utils/extensionMetadata.ts | 15 +- 8 files changed, 232 insertions(+), 670 deletions(-) diff --git a/src/constants/agentStatus.ts b/src/constants/agentStatus.ts index 16aae58fbc..78d4533a98 100644 --- a/src/constants/agentStatus.ts +++ b/src/constants/agentStatus.ts @@ -2,65 +2,45 @@ * Constants controlling the AI-generated sidebar agent status. * * The status is produced by the same "small model" path used for workspace - * title generation (see {@link NAME_GEN_PREFERRED_MODELS}). To keep cost - * predictable, we only feed the model a trailing window of the chat - * transcript β€” capped both by message count and by token budget β€” and we - * skip regeneration whenever the input is byte-for-byte unchanged. + * title generation. We feed only a trailing window of chat (capped by both + * message count and token budget) and skip regeneration whenever the input + * is byte-for-byte unchanged. */ -/** - * How often a per-workspace status is regenerated when the desktop window is - * focused. Smaller intervals make the sidebar feel responsive to the user - * who is actively watching it. - */ +/** Per-workspace regen interval when the desktop window is focused. */ export const AGENT_STATUS_FOCUSED_INTERVAL_MS = 30 * 1000; -/** - * How often a per-workspace status is regenerated when the desktop window is - * blurred. Larger intervals respect the fact that the user isn't watching, - * while still picking up changes for any user who switches back to mux. - */ +/** Per-workspace regen interval when the desktop window is blurred. */ export const AGENT_STATUS_UNFOCUSED_INTERVAL_MS = 2 * 60 * 1000; /** * How often the scheduler wakes up to scan workspaces. Per-workspace cadence - * is enforced by comparing now() against each workspace's `nextEligibleAt`, - * so this can be small enough to make focus transitions feel snappy without - * causing redundant work β€” the cadence intervals above are the upper bound - * on actual generation frequency. + * is enforced separately, so this can be small enough to make focus + * transitions feel snappy without driving redundant work. */ export const AGENT_STATUS_TICK_INTERVAL_MS = 10 * 1000; /** - * Delay before the scheduler runs its first pass after startup. Lets initial - * chat replay and metadata bootstrap settle, and avoids a thundering herd of + * Delay before the first scheduler pass after startup. Lets initial chat + * replay and metadata bootstrap settle, and avoids a thundering herd of * model calls during launch. */ export const AGENT_STATUS_STARTUP_DELAY_MS = 30 * 1000; -/** - * Token budget for the trailing chat-transcript window we feed into the - * small model. Capped to keep cost bounded across long chats. - */ +/** Token budget for the trailing chat-transcript window we feed the model. */ export const AGENT_STATUS_MAX_TRANSCRIPT_TOKENS = 8000; -/** - * Cap on the number of trailing messages we ever pull off disk before token - * trimming kicks in. Bounds disk I/O for very chatty workspaces. - */ +/** Cap on the number of trailing messages we pull off disk before token trimming. */ export const AGENT_STATUS_MAX_TRAILING_MESSAGES = 80; /** - * Cap on per-message text length (post-trim) before we feed it to the - * tokenizer. Tool outputs and assistant turns can be enormous; we already - * have a token budget, but a per-message cap protects against pathological - * single messages that would otherwise burn the entire budget. + * Cap on per-message text length before tokenization. Bounds pathological + * single messages (huge tool outputs) that would otherwise burn the budget. */ export const AGENT_STATUS_MAX_MESSAGE_CHARS = 4000; /** - * Maximum number of concurrent model invocations across all workspaces. - * Keep this small so a multi-workspace sweep doesn't spike provider bills - * or trip rate limits. + * Maximum concurrent model invocations across all workspaces. Keep small so + * a multi-workspace sweep doesn't spike provider bills or rate limits. */ export const AGENT_STATUS_MAX_CONCURRENT = 1; diff --git a/src/node/services/ExtensionMetadataService.ts b/src/node/services/ExtensionMetadataService.ts index 115a848e45..f3889d1821 100644 --- a/src/node/services/ExtensionMetadataService.ts +++ b/src/node/services/ExtensionMetadataService.ts @@ -113,14 +113,12 @@ export class ExtensionMetadataService { } /** - * Initialize the service by ensuring directory exists and clearing stale streaming flags. - * Call this once on app startup. + * Initialize the service by ensuring directory exists and clearing stale + * streaming flags. Call once on app startup. * - * Per AGENTS.md ("Startup-time initialization must never crash the app"), - * we swallow disk-write failures here so a transient permission/disk-full - * error doesn't block app launch. The new save() throws on failure for the - * benefit of strict callers (AgentStatusService); this method is the - * startup-safety boundary that keeps that contract. + * Per AGENTS.md ("Startup-time initialization must never crash the app") + * disk failures here are logged and swallowed; save() itself throws so + * strict callers (e.g. AgentStatusService) can react. */ async initialize(): Promise { // Ensure directory exists @@ -169,12 +167,10 @@ export class ExtensionMetadataService { } private async save(data: ExtensionMetadataFile): Promise { - // Throw on write failure so callers that need to know whether the write - // actually happened (e.g. AgentStatusService, which dedups against the - // last successfully-persisted input hash) can react. Callers that don't - // care still wrap setX in emitWorkspaceActivityUpdate which downgrades - // the throw to a logged warning, preserving the historical - // "log-and-continue" behavior for those paths. + // Throws on failure so callers that need to know whether the write + // actually happened (e.g. AgentStatusService dedup) can react. + // emitWorkspaceActivityUpdate (the historical wrapper used elsewhere) + // downgrades throws to logged warnings for log-and-continue paths. try { const content = JSON.stringify(data, null, 2); await writeFileAtomic(this.filePath, content, "utf-8"); @@ -250,24 +246,14 @@ export class ExtensionMetadataService { /** * Update the AI-generated sidebar status payload for a workspace. - * - * `inputHash` is opaque from this service's perspective: AgentStatusService - * persists a fingerprint of the trailing transcript window so that on - * restart we can skip regeneration when the transcript is unchanged. - * Callers pass `null` to clear both the payload and the cached hash. + * Pass `null` to clear it. */ async setAiStatus( workspaceId: string, - aiStatus: ExtensionAgentStatus | null, - inputHash: string | null + aiStatus: ExtensionAgentStatus | null ): Promise { return this.mutateWorkspaceSnapshot(workspaceId, Date.now(), (workspace) => { - if (aiStatus) { - workspace.aiStatus = aiStatus; - } else { - workspace.aiStatus = null; - } - workspace.aiStatusInputHash = inputHash; + workspace.aiStatus = aiStatus; }); } @@ -311,22 +297,6 @@ export class ExtensionMetadataService { return this.toSnapshot(data.workspaces[workspaceId]); } - /** - * Read the persisted aiStatus input hash for a workspace, if any. - * - * Internal helper for AgentStatusService dedup across restarts. The hash is - * intentionally not part of WorkspaceActivitySnapshot because it has no - * sidebar/UI semantics β€” it's purely a backend bookkeeping field. - */ - async getAiStatusInputHash(workspaceId: string): Promise { - const data = await this.load(); - const normalized = coerceExtensionMetadata(data.workspaces[workspaceId]); - if (!normalized) { - return null; - } - return typeof normalized.aiStatusInputHash === "string" ? normalized.aiStatusInputHash : null; - } - /** * Delete metadata for a workspace. * Call this when a workspace is deleted. diff --git a/src/node/services/agentStatusService.test.ts b/src/node/services/agentStatusService.test.ts index 8a688e2790..b78c803b95 100644 --- a/src/node/services/agentStatusService.test.ts +++ b/src/node/services/agentStatusService.test.ts @@ -1,5 +1,4 @@ import { describe, test, expect, beforeEach, afterEach, mock, spyOn } from "bun:test"; -import { EventEmitter } from "events"; import type { ProjectsConfig, ProjectConfig, Workspace } from "@/common/types/project"; import { Ok } from "@/common/types/result"; import { createMuxMessage } from "@/common/types/message"; @@ -14,7 +13,6 @@ import * as workspaceStatusGenerator from "./workspaceStatusGenerator"; import { createTestHistoryService } from "./testHistoryService"; interface AgentStatusServiceInternals { - tick(): void; runTick(): Promise; runForWorkspace(workspaceId: string): Promise; } @@ -31,17 +29,13 @@ describe("AgentStatusService", () => { let mockTokenizer: TokenizerService; let mockAiService: AIService; let windowService: WindowService; + let isFocused = true; let setAiStatusMock: ReturnType< - typeof mock< - (workspaceId: string, status: unknown, hash: string | null) => Promise<{ recency: number }> - > + typeof mock<(workspaceId: string, status: unknown) => Promise<{ recency: number }>> >; let emitWorkspaceActivityMock: ReturnType< typeof mock<(workspaceId: string, snapshot: unknown) => void> >; - let getAiStatusInputHashMock: ReturnType< - typeof mock<(workspaceId: string) => Promise> - >; let generateSpy: ReturnType< typeof spyOn >; @@ -63,9 +57,7 @@ describe("AgentStatusService", () => { }; } - // Driver: instantiate the service with a controllable clock and synchronously - // run a tick. We intentionally bypass the scheduler timers so each test step - // is deterministic. + // Bypass the scheduler timers so each test step is deterministic. function createService(options?: { clock?: () => number }): AgentStatusService { return new AgentStatusService( mockConfig, @@ -78,8 +70,6 @@ describe("AgentStatusService", () => { { clock: options?.clock, startupDelayMs: 0, - // Use a very large tick interval so setInterval doesn't fire while - // the test is running; we drive ticks manually via getInternals(). tickIntervalMs: 60 * 60 * 1000, } ); @@ -104,18 +94,15 @@ describe("AgentStatusService", () => { emitWorkspaceActivity: emitWorkspaceActivityMock, } as unknown as WorkspaceService; - setAiStatusMock = mock((_workspaceId: string, _status: unknown, _hash: string | null) => + setAiStatusMock = mock((_workspaceId: string, _status: unknown) => Promise.resolve({ recency: 0 }) ); - getAiStatusInputHashMock = mock(() => Promise.resolve(null)); mockExtensionMetadata = { setAiStatus: setAiStatusMock, - getAiStatusInputHash: getAiStatusInputHashMock, } as unknown as ExtensionMetadataService; mockTokenizer = { - // Cheap deterministic tokenizer: 1 token per 4 chars. Avoids spinning up - // the real worker pool for each test. + // Cheap deterministic tokenizer (~1 token per 4 chars). countTokensBatch: mock((_model: string, texts: string[]) => Promise.resolve(texts.map((t) => Math.ceil(t.length / 4))) ), @@ -123,8 +110,8 @@ describe("AgentStatusService", () => { mockAiService = {} as unknown as AIService; - windowService = new EventEmitter() as unknown as WindowService; - (windowService as unknown as { isFocused: () => boolean }).isFocused = () => true; + isFocused = true; + windowService = { isFocused: () => isFocused } as unknown as WindowService; generateSpy = spyOn(workspaceStatusGenerator, "generateWorkspaceStatus").mockResolvedValue( Ok({ @@ -139,7 +126,7 @@ describe("AgentStatusService", () => { await historyHandle.cleanup(); }); - test("generates a fresh AI status when chat history exists and persists the input hash", async () => { + test("generates and persists a fresh AI status when chat history exists", async () => { await historyHandle.historyService.appendToHistory( workspaceId, createMuxMessage("u1", "user", "Please run the test suite") @@ -159,15 +146,13 @@ describe("AgentStatusService", () => { expect(generationCall[1]).toEqual(["anthropic:claude-haiku-4-5"]); expect(setAiStatusMock).toHaveBeenCalledTimes(1); - const updateCall = setAiStatusMock.mock.calls[0]; - expect(updateCall[0]).toBe(workspaceId); - expect(updateCall[1]).toEqual({ emoji: "πŸ› οΈ", message: "Editing source" }); - // The hash is persisted so subsequent runs can dedup against it. - expect(typeof updateCall[2]).toBe("string"); - expect(updateCall[2]!.length).toBeGreaterThan(0); + const [persistedWorkspaceId, persistedStatus] = setAiStatusMock.mock.calls[0]; + expect(persistedWorkspaceId).toBe(workspaceId); + expect(persistedStatus).toEqual({ emoji: "πŸ› οΈ", message: "Editing source" }); }); test("skips regeneration when the trailing transcript is unchanged (dedup)", async () => { + // "Frozen chat" behavior: identical hash β†’ no further LLM calls. await historyHandle.historyService.appendToHistory( workspaceId, createMuxMessage("u1", "user", "Idle workspace") @@ -178,20 +163,16 @@ describe("AgentStatusService", () => { expect(generateSpy).toHaveBeenCalledTimes(1); expect(setAiStatusMock).toHaveBeenCalledTimes(1); - // Second pass: history hasn't changed, so the input hash matches and we - // must not call the model again. This is the "frozen chat" behavior the - // user explicitly asked for. await getInternals(service).runForWorkspace(workspaceId); expect(generateSpy).toHaveBeenCalledTimes(1); expect(setAiStatusMock).toHaveBeenCalledTimes(1); }); test("includes the in-flight partial assistant message so the hash refreshes mid-stream", async () => { - // During an active stream the assistant's text/tool activity lives in - // partial.json before being committed to chat.jsonl. If buildTrailing- - // Transcript only saw committed messages, the hash would stay constant - // for the entire stream, defeating the whole point of the feature - // (showing what the agent is doing *right now*). + // The assistant's mid-stream output lives in partial.json before being + // committed to chat.jsonl. If buildTrailingTranscript ignored partials, + // the hash would stay constant during long streams and dedup would + // suppress the very updates the feature exists to surface. await historyHandle.historyService.appendToHistory( workspaceId, createMuxMessage("u1", "user", "kick off a long task") @@ -200,21 +181,15 @@ describe("AgentStatusService", () => { const service = createService(); await getInternals(service).runForWorkspace(workspaceId); expect(generateSpy).toHaveBeenCalledTimes(1); - const initialHash = setAiStatusMock.mock.calls[0][2]; - expect(typeof initialHash).toBe("string"); - // Stage a partial assistant message β€” same shape the streaming pipeline - // writes via writePartial. The runForWorkspace tick should now see this - // text in the transcript and regenerate. const partial = createMuxMessage("a-partial", "assistant", "Reading config files"); await historyHandle.historyService.writePartial(workspaceId, partial); + // Dedup would have suppressed this second call if the partial was missing + // from the trailing window. await getInternals(service).runForWorkspace(workspaceId); expect(generateSpy).toHaveBeenCalledTimes(2); - const transcriptArg = generateSpy.mock.calls[1][0]; - expect(transcriptArg).toContain("Assistant: Reading config files"); - const newHash = setAiStatusMock.mock.calls[1][2]; - expect(newHash).not.toBe(initialHash); + expect(generateSpy.mock.calls[1][0]).toContain("Assistant: Reading config files"); }); test("re-generates after the trailing transcript changes", async () => { @@ -226,8 +201,6 @@ describe("AgentStatusService", () => { await getInternals(service).runForWorkspace(workspaceId); expect(generateSpy).toHaveBeenCalledTimes(1); - // New user turn changes the trailing window β€” hash must differ and we - // must regenerate. await historyHandle.historyService.appendToHistory( workspaceId, createMuxMessage("u2", "user", "Second request") @@ -238,12 +211,10 @@ describe("AgentStatusService", () => { }); test("skips regeneration when there is no chat history yet", async () => { + // Empty workspaces have nothing to summarize. Don't pay for a + // hallucinated status, and don't blank an existing aiStatus on disk. const service = createService(); await getInternals(service).runForWorkspace(workspaceId); - - // Empty workspaces have nothing to summarize. We must not pay for an LLM - // call producing a hallucinated status, and we must not blank an - // existing aiStatus on disk. expect(generateSpy).not.toHaveBeenCalled(); expect(setAiStatusMock).not.toHaveBeenCalled(); }); @@ -262,34 +233,30 @@ describe("AgentStatusService", () => { const service = createService({ clock: () => now }); const internals = getInternals(service); - // First tick (focused) generates immediately. Mutate history afterwards - // so the dedup hash differs on subsequent ticks β€” otherwise this test - // would fail for the wrong reason. - (windowService as unknown as { isFocused: () => boolean }).isFocused = () => true; + // First focused tick generates. We mutate history between ticks so the + // dedup hash differs β€” otherwise this test would pass for the wrong + // reason. + isFocused = true; await internals.runTick(); await historyHandle.historyService.appendToHistory( workspaceId, createMuxMessage("u2", "user", "follow-up A") ); - expect(generateSpy).toHaveBeenCalledTimes(1); - // Advance time by less than the focused interval. The scheduler must - // skip this workspace. + // Inside the focused interval: skipped. now += 5_000; await internals.runTick(); expect(generateSpy).toHaveBeenCalledTimes(1); - // Advance past the focused interval; another generation should fire. + // Past the focused interval: regenerates. now += 30_000; await internals.runTick(); expect(generateSpy).toHaveBeenCalledTimes(2); - // Now go unfocused. Even after the focused interval elapses, the - // unfocused interval is longer (2 minutes) and we should not regenerate - // until that boundary. Advance another 60s (well past focused, well - // short of unfocused). - (windowService as unknown as { isFocused: () => boolean }).isFocused = () => false; + // Unfocused: 60s elapsed is past focused but short of the unfocused + // interval (2 minutes), so the scheduler must wait. + isFocused = false; await historyHandle.historyService.appendToHistory( workspaceId, createMuxMessage("u3", "user", "follow-up B") @@ -298,7 +265,7 @@ describe("AgentStatusService", () => { await internals.runTick(); expect(generateSpy).toHaveBeenCalledTimes(2); - // Past the unfocused interval β€” should regenerate. + // Past the unfocused interval: regenerates. now += 120_000; await internals.runTick(); expect(generateSpy).toHaveBeenCalledTimes(3); @@ -306,32 +273,19 @@ describe("AgentStatusService", () => { test("round-robins across multiple workspaces so none starve under MAX_CONCURRENT=1", async () => { // With MAX_CONCURRENT=1 and a fixed iteration order, the first workspace - // would always become re-eligible before later ones got their turn β€” - // workspaces 4+ would never produce a status. The scheduler must - // prioritize least-recently-run workspaces so each one gets fair - // attention even when many are eligible at the same time. + // would always become re-eligible before later ones got a turn. The + // scheduler must prioritize least-recently-run workspaces. const projectPathLocal = "/test/round-robin-project"; - const wsA: Workspace = { - id: "ws-a", - name: "ws-a", - path: "/test/path/a", - } as unknown as Workspace; - const wsB: Workspace = { - id: "ws-b", - name: "ws-b", - path: "/test/path/b", - } as unknown as Workspace; - const wsC: Workspace = { - id: "ws-c", - name: "ws-c", - path: "/test/path/c", - } as unknown as Workspace; + const ids = ["ws-a", "ws-b", "ws-c"]; + const workspaces = ids.map( + (id) => ({ id, name: id, path: `/test/path/${id}` }) as unknown as Workspace + ); projectsConfig = { projects: new Map([ - [projectPathLocal, { workspaces: [wsA, wsB, wsC] } as unknown as ProjectConfig], + [projectPathLocal, { workspaces } as unknown as ProjectConfig], ]), }; - for (const id of ["ws-a", "ws-b", "ws-c"]) { + for (const id of ids) { await historyHandle.historyService.appendToHistory( id, createMuxMessage(`u1-${id}`, "user", `prompt for ${id}`) @@ -342,52 +296,35 @@ describe("AgentStatusService", () => { const service = createService({ clock: () => now }); const internals = getInternals(service); - // Tick 1 β†’ first workspace runs. + // Tick 1 covers one workspace; ticks 2 and 3 each cover a distinct + // never-run workspace before any repeat (least-recently-run wins). await internals.runTick(); expect(generateSpy).toHaveBeenCalledTimes(1); - const firstRunWorkspaceIds = setAiStatusMock.mock.calls.map((call) => call[0]); - - // Advance just past one focused interval so all three are eligible. The - // scheduler must pick a workspace that hasn't run yet (lastRanAt=0) - // before re-running the workspace that just ran. now += 31_000; await internals.runTick(); expect(generateSpy).toHaveBeenCalledTimes(2); - const idsAfterTick2 = setAiStatusMock.mock.calls.map((call) => call[0]); - expect(new Set(idsAfterTick2).size).toBe(2); - - // One more tick should cover the third workspace before any repeats. now += 31_000; await internals.runTick(); expect(generateSpy).toHaveBeenCalledTimes(3); - const idsAfterTick3 = setAiStatusMock.mock.calls.map((call) => call[0]); - expect(new Set(idsAfterTick3)).toEqual(new Set(["ws-a", "ws-b", "ws-c"])); - - // Use the variable to satisfy lint / show intent: every workspace was - // covered at least once. - expect(firstRunWorkspaceIds.length).toBeGreaterThan(0); + const persistedIds = setAiStatusMock.mock.calls.map((call) => call[0]); + expect(new Set(persistedIds)).toEqual(new Set(ids)); }); test("does not persist or emit if the service is stopped while a generation is in flight", async () => { - // generateWorkspaceStatus can take seconds to minutes (real provider - // call). If the service is stopped (app shutdown / dispose) during that - // window, persisting the result would leak writes past the declared - // lifecycle. Prove it: - // 1) start a generation that resolves only after we call stop() - // 2) call stop() - // 3) release the generation - // 4) assert no setAiStatus / emit happened + // Real provider calls can take seconds to minutes. If stop() fires + // mid-generation (app shutdown), persisting afterwards would leak writes + // past the declared lifecycle. await historyHandle.historyService.appendToHistory( workspaceId, createMuxMessage("u1", "user", "long-running task") ); let releaseGenerate!: () => void; - const generationGate = new Promise((resolve) => { + const gate = new Promise((resolve) => { releaseGenerate = resolve; }); generateSpy.mockImplementationOnce(async () => { - await generationGate; + await gate; return Ok({ status: { emoji: "πŸ› οΈ", message: "Doing work" }, modelUsed: "anthropic:claude-haiku-4-5", @@ -395,10 +332,7 @@ describe("AgentStatusService", () => { }); const service = createService(); - const internals = getInternals(service); - const inFlight = internals.runForWorkspace(workspaceId); - - // Stop the service while the generation is still pending. + const inFlight = getInternals(service).runForWorkspace(workspaceId); service.stop(); releaseGenerate(); await inFlight; @@ -409,12 +343,9 @@ describe("AgentStatusService", () => { }); test("a failed persistence write does not update the dedup hash, so the next tick retries", async () => { - // Codex review: emitWorkspaceActivityUpdate (the historical wrapper) used - // to swallow disk errors, which meant a transient extensionMetadata.json - // write failure could leave the in-memory hash advanced even though the - // generated status never made it to disk or the frontend. After that, - // the next tick would dedup against the new hash and never retry. - // The fix is: only update lastInputHash AFTER a successful persist. + // Only update lastInputHash AFTER a successful persist. Otherwise a + // transient I/O failure would leave us dedup'ing against a hash that + // never made it to disk, silently dropping subsequent retries. await historyHandle.historyService.appendToHistory( workspaceId, createMuxMessage("u1", "user", "kick off a task") @@ -426,15 +357,13 @@ describe("AgentStatusService", () => { await getInternals(service).runForWorkspace(workspaceId); expect(generateSpy).toHaveBeenCalledTimes(1); - // setAiStatus was attempted but failed. expect(setAiStatusMock).toHaveBeenCalledTimes(1); - // Activity emit must NOT happen on persist failure β€” frontend must not - // see a status the disk doesn't actually have. + // Activity must not emit on persist failure. expect(emitWorkspaceActivityMock).not.toHaveBeenCalled(); - // The next runForWorkspace pass on the SAME transcript must retry, - // because the previous failure should have left lastInputHash null. - setAiStatusMock.mockImplementation((_w, _s, _h) => Promise.resolve({ recency: 0 })); + // Same transcript, second pass: retries because the previous failure + // left lastInputHash unchanged. + setAiStatusMock.mockImplementation((_w, _s) => Promise.resolve({ recency: 0 })); await getInternals(service).runForWorkspace(workspaceId); expect(generateSpy).toHaveBeenCalledTimes(2); expect(setAiStatusMock).toHaveBeenCalledTimes(2); diff --git a/src/node/services/agentStatusService.ts b/src/node/services/agentStatusService.ts index 4b76fee1e5..d2414a4770 100644 --- a/src/node/services/agentStatusService.ts +++ b/src/node/services/agentStatusService.ts @@ -22,125 +22,88 @@ import type { WorkspaceService } from "./workspaceService"; import { generateWorkspaceStatus } from "./workspaceStatusGenerator"; import { log } from "./log"; -/** - * Public-test surface for AgentStatusService. Real callers use the no-arg - * constructor; tests pass a `clock` to drive deterministic time and can - * skip the startup delay by passing `startupDelayMs: 0`. - */ +const FALLBACK_TOKENIZER_MODEL = "anthropic:claude-haiku-4-5"; + export interface AgentStatusServiceOptions { /** Override for test injection. Defaults to `Date.now`. */ clock?: () => number; - /** Override startup delay (ms). Defaults to {@link AGENT_STATUS_STARTUP_DELAY_MS}. */ + /** Override startup delay. Defaults to AGENT_STATUS_STARTUP_DELAY_MS. */ startupDelayMs?: number; - /** Override scheduler tick interval (ms). Defaults to {@link AGENT_STATUS_TICK_INTERVAL_MS}. */ + /** Override scheduler tick interval. Defaults to AGENT_STATUS_TICK_INTERVAL_MS. */ tickIntervalMs?: number; } -interface WorkspaceTrackingState { - /** Last time we successfully ran (or skipped due to dedup). 0 on first ever tick. */ +interface State { + /** Last time we ran (or skipped via dedup). 0 if we never ran. */ lastRanAt: number; - /** Hash of the most recent input we generated against. null if we never ran. */ + /** Hash of the input we last successfully generated for. null if never. */ lastInputHash: string | null; - /** Whether a generation is currently in flight for this workspace. */ + /** Whether a generation is currently in flight. */ inFlight: boolean; } /** - * Periodic backend job that produces the sidebar's AI-generated agent - * status using the same "small model" path as workspace titles. + * Periodic backend job that produces the sidebar's AI-generated agent status + * using the same "small model" path as workspace title generation. * - * Cadence: - * - The scheduler ticks every {@link AGENT_STATUS_TICK_INTERVAL_MS}. - * - Each workspace has its own per-tick eligibility window: focused windows - * regenerate at most every {@link AGENT_STATUS_FOCUSED_INTERVAL_MS}, blurred - * windows back off to {@link AGENT_STATUS_UNFOCUSED_INTERVAL_MS}. + * Cadence: per-workspace eligibility gates each tick. Focused windows + * regenerate at most every AGENT_STATUS_FOCUSED_INTERVAL_MS, blurred windows + * back off to AGENT_STATUS_UNFOCUSED_INTERVAL_MS. * - * Dedup: - * - Each generation hashes its trailing-transcript window. We persist the - * hash on disk via ExtensionMetadataService so a workspace whose chat is - * idle/frozen produces no further generations (input is unchanged). + * Dedup: each generation hashes its trailing-transcript window. Identical + * hash to the last successful run skips regeneration (idle/frozen chats). * - * Concurrency: - * - Bounded by {@link AGENT_STATUS_MAX_CONCURRENT} so a sweep across many - * workspaces never spikes provider load. + * Concurrency: bounded by AGENT_STATUS_MAX_CONCURRENT so a multi-workspace + * sweep never spikes provider load. */ export class AgentStatusService { - private readonly config: Config; - private readonly historyService: HistoryService; - private readonly tokenizerService: TokenizerService; - private readonly extensionMetadata: ExtensionMetadataService; - private readonly workspaceService: WorkspaceService; - private readonly windowService: WindowService; - private readonly aiService: AIService; - + private readonly tracked = new Map(); + private readonly inFlightPromises = new Set>(); private readonly clock: () => number; private readonly startupDelayMs: number; private readonly tickIntervalMs: number; - private readonly tracked = new Map(); - private inFlightCount = 0; - // Track in-flight per-workspace promises so a tick can be awaited cleanly - // in tests (and so shutdown can drain them if we ever need to). - private readonly inFlightPromises = new Set>(); - private startupTimeout: ReturnType | null = null; private checkInterval: ReturnType | null = null; - // Default to "running so the service is usable as soon as it's - // constructed (tests drive runTick() directly). stop() flips this true to - // gate any in-flight or scheduled work. private stopped = false; private tickInFlight = false; - private hashesHydrated = false; constructor( - config: Config, - historyService: HistoryService, - tokenizerService: TokenizerService, - extensionMetadata: ExtensionMetadataService, - workspaceService: WorkspaceService, - windowService: WindowService, - aiService: AIService, + private readonly config: Config, + private readonly historyService: HistoryService, + private readonly tokenizerService: TokenizerService, + private readonly extensionMetadata: ExtensionMetadataService, + private readonly workspaceService: WorkspaceService, + private readonly windowService: WindowService, + private readonly aiService: AIService, options: AgentStatusServiceOptions = {} ) { - this.config = config; - this.historyService = historyService; - this.tokenizerService = tokenizerService; - this.extensionMetadata = extensionMetadata; - this.workspaceService = workspaceService; - this.windowService = windowService; - this.aiService = aiService; - this.clock = options.clock ?? (() => Date.now()); this.startupDelayMs = options.startupDelayMs ?? AGENT_STATUS_STARTUP_DELAY_MS; this.tickIntervalMs = options.tickIntervalMs ?? AGENT_STATUS_TICK_INTERVAL_MS; } start(): void { - // Idempotent re-entry guard: callers in production wire start() once at - // initialize() time, but a defensive assert keeps double-start mistakes - // visible during development. assert( this.checkInterval === null && this.startupTimeout === null, "AgentStatusService.start() called while already running" ); this.stopped = false; - const scheduleTicks = () => { - if (this.stopped) { - return; - } - // Fire one tick immediately after the startup delay so the user sees an - // initial status without waiting a full interval. - this.tick(); - this.checkInterval = setInterval(() => this.tick(), this.tickIntervalMs); + const begin = () => { + if (this.stopped) return; + // Fire one tick immediately so the user sees an initial status without + // waiting a full interval after the startup delay. + void this.runTick(); + this.checkInterval = setInterval(() => void this.runTick(), this.tickIntervalMs); }; if (this.startupDelayMs <= 0) { - scheduleTicks(); + begin(); } else { this.startupTimeout = setTimeout(() => { this.startupTimeout = null; - scheduleTicks(); + begin(); }, this.startupDelayMs); } @@ -161,222 +124,97 @@ export class AgentStatusService { this.checkInterval = null; } this.tracked.clear(); - this.inFlightCount = 0; this.inFlightPromises.clear(); this.tickInFlight = false; - this.hashesHydrated = false; log.info("AgentStatusService stopped"); } - /** - * Synchronous best-effort tick entrypoint. Safe to call repeatedly; we - * guard with `tickInFlight` so overlapping ticks coalesce. - */ - private tick(): void { - if (this.stopped || this.tickInFlight) { - return; - } - this.tickInFlight = true; - void this.runTick().finally(() => { - this.tickInFlight = false; - }); - } - private async runTick(): Promise { + if (this.stopped || this.tickInFlight) return; + this.tickInFlight = true; try { - // First tick after start() needs to seed lastInputHash from disk so - // we honor the previous run's dedup state across restarts. - if (!this.hashesHydrated) { - await this.hydratePersistedHashes(); - this.hashesHydrated = true; - } - this.processEligibleWorkspaces(); - // Wait for the workspaces we just dispatched so callers (production - // schedulers + tests) observe their effects deterministically. - await this.drainInFlight(); + this.dispatch(); + // Awaited so production callers and tests observe completion. + await Promise.allSettled([...this.inFlightPromises]); } catch (error) { log.error("AgentStatusService tick failed", { error }); + } finally { + this.tickInFlight = false; } } - private async drainInFlight(): Promise { - while (this.inFlightPromises.size > 0) { - await Promise.allSettled(Array.from(this.inFlightPromises)); - } - } - - private async hydratePersistedHashes(): Promise { - const config = this.config.loadConfigOrDefault(); - for (const [, projectConfig] of config.projects) { - for (const workspace of projectConfig.workspaces) { - const workspaceId = workspace.id ?? workspace.name; - if (typeof workspaceId !== "string" || workspaceId.length === 0) { - continue; - } - const persistedHash = await this.extensionMetadata.getAiStatusInputHash(workspaceId); - if (persistedHash !== null) { - this.tracked.set(workspaceId, { - lastRanAt: 0, - lastInputHash: persistedHash, - inFlight: false, - }); - } - } - } - } - - // Synchronous: per-workspace dispatches go on inFlightPromises and are - // awaited by runTick via drainInFlight. Keeping this sync avoids a no-op - // Promise allocation on every tick. - private processEligibleWorkspaces(): void { + private dispatch(): void { const now = this.clock(); - const focused = this.windowService.isFocused(); - const interval = focused + const interval = this.windowService.isFocused() ? AGENT_STATUS_FOCUSED_INTERVAL_MS : AGENT_STATUS_UNFOCUSED_INTERVAL_MS; - const config = this.config.loadConfigOrDefault(); - - // Collect every eligible workspace first, then sort by lastRanAt - // ascending. With AGENT_STATUS_MAX_CONCURRENT=1 a fixed iteration order - // would let the first workspace starve everyone deeper in the list - // (it becomes re-eligible at 30s, and workspace[N>1] is never reached). - // Sorting by least-recently-run produces a fair round-robin without an - // explicit queue. - const eligible: Array<{ workspaceId: string; lastRanAt: number }> = []; - for (const [, projectConfig] of config.projects) { - for (const workspace of projectConfig.workspaces) { - const workspaceId = workspace.id ?? workspace.name; - if (typeof workspaceId !== "string" || workspaceId.length === 0) { - continue; - } - if (isWorkspaceArchived(workspace.archivedAt, workspace.unarchivedAt)) { - continue; - } - - const state = this.tracked.get(workspaceId); - if (state?.inFlight) { - continue; - } - if (state && now - state.lastRanAt < interval) { - continue; - } - - // Workspaces that have never run (state === undefined) get the - // earliest possible lastRanAt so they preempt previously-run - // workspaces on their first tick. - eligible.push({ workspaceId, lastRanAt: state?.lastRanAt ?? 0 }); + // Sort eligible workspaces by lastRanAt ascending. With MAX_CONCURRENT=1, + // a fixed iteration order would let the first workspace starve the rest; + // least-recently-run gives fair round-robin without an explicit queue. + const eligible: Array<{ id: string; lastRanAt: number }> = []; + for (const [, projectConfig] of this.config.loadConfigOrDefault().projects) { + for (const ws of projectConfig.workspaces) { + const id = ws.id ?? ws.name; + if (typeof id !== "string" || id.length === 0) continue; + if (isWorkspaceArchived(ws.archivedAt, ws.unarchivedAt)) continue; + const state = this.tracked.get(id); + if (state?.inFlight) continue; + if (state && now - state.lastRanAt < interval) continue; + eligible.push({ id, lastRanAt: state?.lastRanAt ?? 0 }); } } - eligible.sort((a, b) => a.lastRanAt - b.lastRanAt); - for (const { workspaceId } of eligible) { - if (this.stopped) { - return; - } - if (this.inFlightCount >= AGENT_STATUS_MAX_CONCURRENT) { - return; - } - - // Per-workspace work runs concurrently up to AGENT_STATUS_MAX_CONCURRENT. - // We track the promise (instead of fire-and-forget) so runTick can - // await all dispatched workspaces before returning. That keeps the - // production tick loop's "did we finish?" semantics observable, and - // makes tests deterministic without hand-rolled microtask flushing. - this.inFlightCount += 1; - this.markInFlight(workspaceId, true); - const promise = this.runForWorkspace(workspaceId).finally(() => { - this.inFlightCount = Math.max(0, this.inFlightCount - 1); - this.markInFlight(workspaceId, false); + for (const { id } of eligible) { + if (this.stopped || this.inFlightPromises.size >= AGENT_STATUS_MAX_CONCURRENT) return; + const state = this.ensureState(id); + state.inFlight = true; + const promise = this.runForWorkspace(id).finally(() => { + state.inFlight = false; this.inFlightPromises.delete(promise); }); this.inFlightPromises.add(promise); } } - private markInFlight(workspaceId: string, value: boolean): void { - const state = this.tracked.get(workspaceId); - if (state) { - state.inFlight = value; - return; - } - if (value) { - this.tracked.set(workspaceId, { lastRanAt: 0, lastInputHash: null, inFlight: true }); - } - } - private async runForWorkspace(workspaceId: string): Promise { try { const transcript = await this.buildTrailingTranscript(workspaceId); const inputHash = computeInputHash(transcript); - // Always update lastRanAt: even when we skip the LLM call, we don't - // want to reconsider this workspace until the next interval boundary. + // Bump lastRanAt regardless of skip/run so the scheduler doesn't + // reconsider this workspace until the next interval boundary. const state = this.ensureState(workspaceId); - const now = this.clock(); - state.lastRanAt = now; - - if (transcript.trim().length === 0) { - // A brand-new workspace with no chat content yet β€” skip silently. - // We deliberately do not clear an existing aiStatus here so that a - // post-compaction "empty boundary" doesn't blank a recently produced - // status. - return; - } + state.lastRanAt = this.clock(); - if (state.lastInputHash === inputHash) { - // Idle/frozen: identical trailing window, no point in regenerating. - // Still bump lastRanAt above so we won't revisit until the next - // interval boundary, which keeps the scheduler cheap. - return; - } + // Empty workspace: nothing to summarize. Don't blank an existing + // aiStatus β€” that would clobber a status produced before compaction. + if (transcript.trim().length === 0) return; + // Idle/frozen: identical trailing window since last successful run. + if (state.lastInputHash === inputHash) return; const candidates = await this.workspaceService.getWorkspaceTitleModelCandidates(workspaceId); - if (candidates.length === 0) { - log.debug("AgentStatusService: no model candidates for workspace, skipping", { - workspaceId, - }); - return; - } + if (candidates.length === 0) return; const result = await generateWorkspaceStatus(transcript, candidates, this.aiService); + // The generator can take seconds to a minute; bail if stop() fired + // mid-flight to avoid leaking writes past our lifecycle. + if (this.stopped) return; if (!result.success) { log.debug("AgentStatusService: status generation failed; will retry next tick", { workspaceId, error: result.error, }); - // Leave lastInputHash unchanged so the next tick retries even - // though the input is unchanged. - return; - } - - // The generator can take seconds to a minute. The service may have - // been stopped (app shutdown, dispose, etc.) while we were awaiting - // the provider response. If so, do not persist or emit β€” that would - // leak metadata writes and activity events past the service's - // declared lifetime. - if (this.stopped) { return; } // Persist BEFORE updating the in-memory dedup hash. If the disk write - // fails (transient I/O error), we want the next tick to retry the - // unchanged transcript instead of dedup'ing against a hash we never - // actually committed. The frontend activity emit happens after the - // write returns successfully, so subscribers either see the new - // status or fall through to a later retry. + // fails we want the next tick to retry against the same transcript + // instead of dedup'ing against a hash we never committed. try { - const snapshot = await this.extensionMetadata.setAiStatus( - workspaceId, - { emoji: result.data.status.emoji, message: result.data.status.message }, - inputHash - ); - // Re-check after the (also-async) disk write β€” same lifecycle - // hazard as the post-generation check above. - if (this.stopped) { - return; - } + const snapshot = await this.extensionMetadata.setAiStatus(workspaceId, result.data.status); + if (this.stopped) return; state.lastInputHash = inputHash; this.workspaceService.emitWorkspaceActivity(workspaceId, snapshot); } catch (error) { @@ -384,8 +222,6 @@ export class AgentStatusService { workspaceId, error, }); - // Intentionally leave state.lastInputHash untouched so the next tick - // tries again with the same transcript. } } catch (error) { log.error("AgentStatusService: unexpected error during status generation", { @@ -395,110 +231,68 @@ export class AgentStatusService { } } - private ensureState(workspaceId: string): WorkspaceTrackingState { - let state = this.tracked.get(workspaceId); + private ensureState(id: string): State { + let state = this.tracked.get(id); if (!state) { state = { lastRanAt: 0, lastInputHash: null, inFlight: false }; - this.tracked.set(workspaceId, state); + this.tracked.set(id, state); } return state; } /** - * Build the trailing chat transcript for a workspace, capped by both - * message count and {@link AGENT_STATUS_MAX_TRANSCRIPT_TOKENS} tokens. - * - * Returns an empty string if the workspace has no chat history yet. - * - * During an active stream the assistant's current text and tool calls live - * in `partial.json` (via HistoryService.writePartial) before being committed - * to `chat.jsonl`. We append the partial message after the committed tail - * so the hash changes β€” and the status refreshes β€” as the stream progresses, - * which is exactly when an "agent doing X right now" status is most useful. + * Build the trailing chat transcript, capped by message count and + * AGENT_STATUS_MAX_TRANSCRIPT_TOKENS. Includes the in-flight partial + * assistant message (HistoryService.readPartial) so the hash refreshes + * mid-stream β€” exactly when "what is the agent doing now" matters most. */ private async buildTrailingTranscript(workspaceId: string): Promise { const result = await this.historyService.getLastMessages( workspaceId, AGENT_STATUS_MAX_TRAILING_MESSAGES ); - if (!result.success) { - return ""; - } + if (!result.success) return ""; const messages: MuxMessage[] = [...result.data]; const partial = await this.historyService.readPartial(workspaceId); - if (partial) { - messages.push(partial); - } - - const formatted = messages.map(formatMessageForTranscript).filter((entry) => entry.length > 0); - - if (formatted.length === 0) { - return ""; - } - - // Trim from the front (oldest messages) until we fit within the token - // budget. The trailing-most messages carry the most signal for "what is - // the agent currently doing", so we never drop them. - // - // Use the first candidate model for tokenization. The tokenizer service - // gracefully falls back to a known family for unknown model strings, so - // this is safe even when the user's model is not in our table. - const tokenizerModel = await this.resolveTokenizerModel(workspaceId); - const tokenCounts = await this.tokenizerService.countTokensBatch(tokenizerModel, formatted); + if (partial) messages.push(partial); + + const formatted = messages.map(formatMessageForTranscript).filter((s) => s.length > 0); + if (formatted.length === 0) return ""; + + // Trim from the front (oldest) until we fit the token budget. Trailing + // messages carry the most signal for "what is the agent doing right now", + // so we never drop them. The tokenizer service falls back to a known + // family for unknown models, so the fallback constant is safe regardless + // of which model actually generates this workspace's status. + const tokenCounts = await this.tokenizerService.countTokensBatch( + FALLBACK_TOKENIZER_MODEL, + formatted + ); let totalTokens = tokenCounts.reduce((sum, n) => sum + n, 0); - let dropFromIndex = 0; - while ( - totalTokens > AGENT_STATUS_MAX_TRANSCRIPT_TOKENS && - dropFromIndex < formatted.length - 1 - ) { - totalTokens -= tokenCounts[dropFromIndex]; - dropFromIndex += 1; - } - - return formatted.slice(dropFromIndex).join("\n\n"); - } - - private async resolveTokenizerModel(workspaceId: string): Promise { - try { - const candidates = await this.workspaceService.getWorkspaceTitleModelCandidates(workspaceId); - // The first candidate is our preferred small model; tokenizing against - // it is good enough for budgeting purposes even if a fallback ends up - // being used. - return candidates[0] ?? "anthropic:claude-haiku-4-5"; - } catch { - return "anthropic:claude-haiku-4-5"; + let drop = 0; + while (totalTokens > AGENT_STATUS_MAX_TRANSCRIPT_TOKENS && drop < formatted.length - 1) { + totalTokens -= tokenCounts[drop]; + drop += 1; } + return formatted.slice(drop).join("\n\n"); } } function extractMessageText(message: MuxMessage): string { - if (!Array.isArray(message.parts)) { - return ""; - } - const textParts: string[] = []; - for (const part of message.parts) { - if (part?.type !== "text") { - continue; - } - const text = (part as { text?: unknown }).text; - if (typeof text === "string" && text.trim().length > 0) { - textParts.push(text.trim()); - } - } - return textParts.join("\n"); + return (message.parts ?? []) + .filter((part): part is { type: "text"; text: string } => part.type === "text") + .map((part) => part.text.trim()) + .filter((text) => text.length > 0) + .join("\n"); } function summarizeToolPart(part: unknown): string | null { - if (typeof part !== "object" || part === null) { - return null; - } - const record = part as Record; - const type = record.type; - if (typeof type !== "string") { - return null; - } + if (typeof part !== "object" || part === null) return null; + const record = part as { type?: unknown; toolName?: unknown }; + const type = typeof record.type === "string" ? record.type : null; + if (!type) return null; // Tool calls have type "tool-" or "dynamic-tool" with a toolName. const toolName = typeof record.toolName === "string" @@ -506,60 +300,26 @@ function summarizeToolPart(part: unknown): string | null { : type.startsWith("tool-") ? type.slice(5) : null; - if (!toolName) { - return null; - } - return `[tool ${toolName}]`; + return toolName ? `[tool ${toolName}]` : null; } function formatMessageForTranscript(message: MuxMessage): string { const role = message.role === "user" ? "User" : message.role === "assistant" ? "Assistant" : null; - if (!role) { - return ""; - } - const text = extractMessageText(message); - // Include a brief tool-call summary so the model can see *what* the agent - // is doing even when the assistant has not yet emitted natural-language - // text for the current step. We avoid inlining tool args/output to keep - // the cost predictable. - const toolSummaries: string[] = []; - if (Array.isArray(message.parts)) { - for (const part of message.parts) { - const summary = summarizeToolPart(part); - if (summary) { - toolSummaries.push(summary); - } - } - } + if (!role) return ""; const segments: string[] = []; - if (text.length > 0) { - segments.push(text.slice(0, AGENT_STATUS_MAX_MESSAGE_CHARS)); - } - if (toolSummaries.length > 0) { - segments.push(toolSummaries.join(" ")); - } + const text = extractMessageText(message).slice(0, AGENT_STATUS_MAX_MESSAGE_CHARS); + if (text) segments.push(text); - if (segments.length === 0) { - return ""; - } + // Tool-call summaries let the model see what the agent is doing even when + // the assistant has not emitted natural-language text yet. Args/output are + // intentionally omitted to keep cost predictable. + const tools = (message.parts ?? []).map(summarizeToolPart).filter((s): s is string => s !== null); + if (tools.length > 0) segments.push(tools.join(" ")); - return `${role}: ${segments.join("\n")}`; + return segments.length === 0 ? "" : `${role}: ${segments.join("\n")}`; } -/** - * Compute a stable hash of the trailing transcript window. Used by the - * scheduler to skip regeneration when the input hasn't changed since the - * last successful generation. SHA-256 is overkill but trivially cheap; - * the hash is opaque to everything outside this service. - */ function computeInputHash(transcript: string): string { return createHash("sha256").update(transcript).digest("hex"); } - -// Exported for tests. -export const __test__ = { - computeInputHash, - extractMessageText, - formatMessageForTranscript, -}; diff --git a/src/node/services/windowService.ts b/src/node/services/windowService.ts index 38a166be7c..35b0dce599 100644 --- a/src/node/services/windowService.ts +++ b/src/node/services/windowService.ts @@ -1,72 +1,26 @@ -import { EventEmitter } from "events"; import type { BrowserWindow } from "electron"; import { log } from "@/node/services/log"; type RestartAppHandler = () => void | Promise; -/** - * WindowService extends EventEmitter so backend services that need to react - * to window focus state (e.g. AgentStatusService cadence gating) can subscribe - * via `windowService.on("focus-change", listener)` without depending on - * Electron internals or polling. - */ -export class WindowService extends EventEmitter { +export class WindowService { private mainWindow: BrowserWindow | null = null; private restartAppHandler: RestartAppHandler | null = null; - // Default to true so headless/test environments behave as if the user is - // actively watching. Desktop wires this to BrowserWindow focus/blur events - // in `setMainWindow` below. - private focused = true; setMainWindow(window: BrowserWindow) { this.mainWindow = window; - - // Seed from the window's current state if we can. - try { - this.setFocused(typeof window.isFocused === "function" ? window.isFocused() : true); - } catch { - this.setFocused(true); - } - - // Wire focus/blur listeners directly to the window. The window is - // recreated only on app restart, so we don't need to teardown listeners. - // Tests pass a minimal stub without an EventEmitter surface; gracefully - // skip listener wiring in that case so unrelated suites don't crash. - const eventTarget = window as unknown as { - on?: (event: string, listener: () => void) => unknown; - }; - if (typeof eventTarget.on === "function") { - eventTarget.on("focus", () => this.setFocused(true)); - eventTarget.on("blur", () => this.setFocused(false)); - } } setRestartAppHandler(handler: RestartAppHandler | null): void { this.restartAppHandler = handler; } /** - * Returns whether the desktop main window is currently focused. Falls back - * to `true` in non-desktop contexts (CLI server, tests) so backend - * services don't accidentally throttle themselves to "unfocused" cadence - * when there is no window at all. + * Whether the desktop main window is currently focused. Falls back to + * `true` in non-desktop contexts (CLI server, tests) so backend services + * don't throttle themselves to "unfocused" cadence when there is no window. */ isFocused(): boolean { - return this.focused; - } - - /** - * Update the cached focus state. Emits `focus-change` only on transitions - * so subscribers don't have to debounce duplicate notifications. - * - * Exposed publicly to allow tests and headless callers to drive focus - * transitions without an actual BrowserWindow. - */ - setFocused(focused: boolean): void { - if (this.focused === focused) { - return; - } - this.focused = focused; - this.emit("focus-change", focused); + return this.mainWindow?.isFocused?.() ?? true; } async restartApp(): Promise<{ supported: true } | { supported: false; message: string }> { diff --git a/src/node/services/workspaceService.ts b/src/node/services/workspaceService.ts index e872353c32..615eda2a53 100644 --- a/src/node/services/workspaceService.ts +++ b/src/node/services/workspaceService.ts @@ -1544,11 +1544,9 @@ export class WorkspaceService extends EventEmitter { } /** - * Public so AgentStatusService (and any future consumer) can broadcast a - * workspace activity snapshot it produced itself. The standard path is - * `emitWorkspaceActivityUpdate`, but callers that need to know whether the - * persist actually succeeded need to invoke the underlying writer directly - * and then call this to reach the frontend. + * Public so AgentStatusService can broadcast a snapshot it produced after + * a direct setX call. (Most callers use emitWorkspaceActivityUpdate, which + * couples persist + emit but swallows persist errors.) */ public emitWorkspaceActivity( workspaceId: string, @@ -3855,14 +3853,10 @@ export class WorkspaceService extends EventEmitter { } /** - * Build the candidate list used by both title generation and the - * sidebar AI-status path. Starts with the global "small model" preferences - * and falls back to any model the workspace itself has configured so a - * custom-model workspace can still produce names/statuses when the global - * preferred models are unavailable. - * - * Public so AgentStatusService (and any future small-model consumer) can - * reuse the same precedence without duplicating the workspace lookup. + * Candidate list for "small model" callers (title + AI sidebar status). + * Global preferences first, then any workspace-configured model so a + * custom-model workspace still works when global preferences are + * unavailable. Public so AgentStatusService can share the precedence. */ public async getWorkspaceTitleModelCandidates(workspaceId: string): Promise { const candidates: string[] = [...NAME_GEN_PREFERRED_MODELS]; diff --git a/src/node/services/workspaceStatusGenerator.ts b/src/node/services/workspaceStatusGenerator.ts index a118e61136..0568b08642 100644 --- a/src/node/services/workspaceStatusGenerator.ts +++ b/src/node/services/workspaceStatusGenerator.ts @@ -11,11 +11,9 @@ import { } from "@/common/utils/tools/toolDefinitions"; /** - * AI-generated sidebar status summary. - * - * Emoji + short verb-led phrase, intentionally identical to the existing - * WorkspaceAgentStatus shape so the frontend can render it through the - * same WorkspaceStatusIndicator path used for displayStatus / todoStatus. + * AI-generated sidebar status: emoji + short verb-led phrase, matching + * WorkspaceAgentStatus so the frontend renders it through the same + * WorkspaceStatusIndicator path as displayStatus / todoStatus. */ export interface WorkspaceAgentStatusPayload { emoji: string; @@ -29,21 +27,15 @@ export interface GenerateWorkspaceStatusResult { } /** - * Build the prompt used by {@link generateWorkspaceStatus}. - * - * The transcript is supplied pre-trimmed (token budget enforced upstream). - * We deliberately keep the prompt short β€” the small model's job is to look - * at the trailing window and write a present-tense phrase. + * Build the prompt used by {@link generateWorkspaceStatus}. The transcript + * is supplied pre-trimmed (token budget enforced upstream). The prompt + * intentionally targets "current activity" not "overall task scope" β€” this + * is a sidebar status, not a workspace title. */ export function buildWorkspaceStatusPrompt(transcript: string): string { - // Sentinel for an empty trailing window (e.g., a fresh workspace with no - // text content). Shouldn't happen in practice because AgentStatusService - // skips empty inputs, but the model still needs *something* to ground on. + // Sentinel for an empty window. AgentStatusService skips empty inputs in + // practice, but the model still needs something to ground on. const body = transcript.trim().length > 0 ? transcript : "(no recent transcript)"; - - // The prompt avoids "summarize the whole task" framing on purpose: this - // is a sidebar status, not a workspace title. We want the *current* - // activity, not the overall scope. return [ "You produce a short sidebar status that tells the user what an AI coding agent is doing right now.\n\n", "Recent chat transcript (oldest first, newest last):\n", @@ -62,11 +54,8 @@ export function buildWorkspaceStatusPrompt(transcript: string): string { /** * Generate a sidebar agent-status summary using the same "small model" path - * that powers workspace title generation. - * - * Try candidates in order, retrying on transient API errors (auth, quota, - * 5xx, etc.) up to a small cap so a single misconfigured candidate doesn't - * silently disable status updates for everyone. + * that powers workspace title generation. Tries up to 3 candidates so a + * single misconfigured candidate can't permanently disable status updates. */ export async function generateWorkspaceStatus( transcript: string, @@ -80,10 +69,7 @@ export async function generateWorkspaceStatus( }); } - // Match workspaceTitleGenerator's retry behavior so a single API outage - // can't permanently disable the feature. const maxAttempts = Math.min(candidates.length, 3); - let lastError: NameGenerationError | null = null; for (let i = 0; i < maxAttempts; i++) { diff --git a/src/node/utils/extensionMetadata.ts b/src/node/utils/extensionMetadata.ts index b4dc6a0f66..47a147eb17 100644 --- a/src/node/utils/extensionMetadata.ts +++ b/src/node/utils/extensionMetadata.ts @@ -28,15 +28,9 @@ export interface ExtensionMetadata { // Persists the latest display-status URL so later updates without a URL // can still carry the last deep link even after displayStatus is cleared. lastStatusUrl?: string | null; - // AI-generated status summary produced by the small-model status path - // (workspaceStatusGenerator.ts). When present, takes precedence over - // todoStatus in the sidebar. + // AI-generated status summary (workspaceStatusGenerator). When present, + // takes precedence over todoStatus in the sidebar. aiStatus?: ExtensionAgentStatus | null; - // Hash of the trailing transcript window that produced `aiStatus`. Used by - // AgentStatusService to skip regeneration when the input is unchanged - // (idle/frozen chats). Survives restarts so we don't pay for redundant - // generations on relaunch. - aiStatusInputHash?: string | null; } /** @@ -122,11 +116,6 @@ export function coerceExtensionMetadata(value: unknown): ExtensionMetadata | nul ...(aiStatus !== undefined ? { aiStatus } : {}), ...(typeof record.hasTodos === "boolean" ? { hasTodos: record.hasTodos } : {}), lastStatusUrl: coerceStatusUrl(record.lastStatusUrl), - ...(typeof record.aiStatusInputHash === "string" - ? { aiStatusInputHash: record.aiStatusInputHash } - : record.aiStatusInputHash === null - ? { aiStatusInputHash: null } - : {}), }; } From 366cf0dfe2638180c4efae074805badec3acb808 Mon Sep 17 00:00:00 2001 From: Ammar Date: Tue, 5 May 2026 21:11:13 -0500 Subject: [PATCH 09/33] fix: setAiStatus preserves workspace recency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AgentStatusService is a background scheduler with no causal connection to user activity, so its writes must not bump 'recency' β€” that would re-sort idle workspaces every tick and mark them unread. For existing entries this was already a no-op in practice (mutateWorkspaceSnapshot's recency arg only seeds new entries), but the Date.now() seed would fire for the rare case where a workspace has chat history but no metadata entry yet. Inline the create logic so the seed is recency=0 for genuinely new entries and the existing recency is preserved otherwise. Adds a test that pins the contract directly against ExtensionMetadataService. --- src/node/services/ExtensionMetadataService.ts | 24 ++++++++++++++++++- src/node/services/agentStatusService.test.ts | 23 +++++++++++++++++- 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/src/node/services/ExtensionMetadataService.ts b/src/node/services/ExtensionMetadataService.ts index f3889d1821..4b0c09635b 100644 --- a/src/node/services/ExtensionMetadataService.ts +++ b/src/node/services/ExtensionMetadataService.ts @@ -247,13 +247,35 @@ export class ExtensionMetadataService { /** * Update the AI-generated sidebar status payload for a workspace. * Pass `null` to clear it. + * + * AgentStatusService is a background scheduler with no causal connection + * to user activity, so this writer never advances `recency`. Existing + * entries keep their user-interaction recency (otherwise idle workspaces + * would be re-sorted and marked unread every tick); brand-new entries + * (rare: workspace has chat but no metadata yet) are seeded with + * `recency=0` so the AI status doesn't artificially promote them. + * `updateRecency` will set the real value on the next user interaction. */ async setAiStatus( workspaceId: string, aiStatus: ExtensionAgentStatus | null ): Promise { - return this.mutateWorkspaceSnapshot(workspaceId, Date.now(), (workspace) => { + return this.withSerializedMutation(async () => { + const data = await this.load(); + const existing = coerceExtensionMetadata(data.workspaces[workspaceId]); + const workspace: ExtensionMetadata = existing ?? { + recency: 0, + streaming: false, + lastModel: null, + lastThinkingLevel: null, + agentStatus: null, + displayStatus: null, + lastStatusUrl: null, + }; workspace.aiStatus = aiStatus; + data.workspaces[workspaceId] = workspace; + await this.save(data); + return toWorkspaceActivitySnapshot(workspace); }); } diff --git a/src/node/services/agentStatusService.test.ts b/src/node/services/agentStatusService.test.ts index b78c803b95..ae3dd266c8 100644 --- a/src/node/services/agentStatusService.test.ts +++ b/src/node/services/agentStatusService.test.ts @@ -1,10 +1,13 @@ import { describe, test, expect, beforeEach, afterEach, mock, spyOn } from "bun:test"; +import { mkdtempSync, rmSync } from "fs"; +import { tmpdir } from "os"; +import { join } from "path"; import type { ProjectsConfig, ProjectConfig, Workspace } from "@/common/types/project"; import { Ok } from "@/common/types/result"; import { createMuxMessage } from "@/common/types/message"; import type { Config } from "@/node/config"; import type { AIService } from "./aiService"; -import type { ExtensionMetadataService } from "./ExtensionMetadataService"; +import { ExtensionMetadataService } from "./ExtensionMetadataService"; import type { WindowService } from "./windowService"; import type { WorkspaceService } from "./workspaceService"; import type { TokenizerService } from "./tokenizerService"; @@ -370,6 +373,24 @@ describe("AgentStatusService", () => { expect(emitWorkspaceActivityMock).toHaveBeenCalledTimes(1); }); + test("setAiStatus must not bump workspace recency (would re-sort idle workspaces)", async () => { + // AgentStatusService is a background scheduler with no causal + // connection to user activity, so its writes must not bump recency β€” + // that would promote idle workspaces in the sidebar and mark them + // unread every tick. Test ExtensionMetadataService directly to pin the + // contract for any future caller of setAiStatus. + const dir = mkdtempSync(join(tmpdir(), "mux-recency-")); + try { + const svc = new ExtensionMetadataService(join(dir, "metadata.json")); + await svc.updateRecency("ws", 100); + await svc.setAiStatus("ws", { emoji: "πŸ› οΈ", message: "Doing work" }); + const after = await svc.getSnapshot("ws"); + expect(after?.recency).toBe(100); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); + test("archived workspaces are not regenerated", async () => { projectsConfig = makeProjectsConfig([ makeWorkspaceEntry({ archivedAt: new Date().toISOString() } as Partial), From dbcb68dfabe8fb842c4bf55d0f36943f6f5d1fee Mon Sep 17 00:00:00 2001 From: Ammar Date: Tue, 5 May 2026 21:23:34 -0500 Subject: [PATCH 10/33] fix: skip generator if stopped during transcript build / candidates fetch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex review: stop() during the earlier awaits (buildTrailingTranscript and getWorkspaceTitleModelCandidates) wouldn't prevent the multi-second provider call from firing β€” only the persist after it. Add a stopped check immediately before generateWorkspaceStatus. Also pin the new behavior with a test that gates the candidates fetch on a release signal: stop fires while candidates are pending, the generator never runs. --- src/node/services/agentStatusService.test.ts | 44 +++++++++++++++++++- src/node/services/agentStatusService.ts | 9 +++- 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/src/node/services/agentStatusService.test.ts b/src/node/services/agentStatusService.test.ts index ae3dd266c8..4ba636dfaa 100644 --- a/src/node/services/agentStatusService.test.ts +++ b/src/node/services/agentStatusService.test.ts @@ -39,6 +39,7 @@ describe("AgentStatusService", () => { let emitWorkspaceActivityMock: ReturnType< typeof mock<(workspaceId: string, snapshot: unknown) => void> >; + let getCandidatesMock: ReturnType Promise>>; let generateSpy: ReturnType< typeof spyOn >; @@ -92,8 +93,9 @@ describe("AgentStatusService", () => { } as unknown as Config; emitWorkspaceActivityMock = mock(() => undefined); + getCandidatesMock = mock((_id: string) => Promise.resolve(["anthropic:claude-haiku-4-5"])); mockWorkspaceService = { - getWorkspaceTitleModelCandidates: mock(() => Promise.resolve(["anthropic:claude-haiku-4-5"])), + getWorkspaceTitleModelCandidates: getCandidatesMock, emitWorkspaceActivity: emitWorkspaceActivityMock, } as unknown as WorkspaceService; @@ -313,6 +315,36 @@ describe("AgentStatusService", () => { expect(new Set(persistedIds)).toEqual(new Set(ids)); }); + test("does not invoke the generator if stopped during transcript build or candidates fetch", async () => { + // Earlier awaits (history read, candidates fetch) are also yield points. + // If stop() fires during one of them, kicking off the multi-second + // provider call afterwards would leak LLM work past the service's + // declared lifecycle. + await historyHandle.historyService.appendToHistory( + workspaceId, + createMuxMessage("u1", "user", "long-running task") + ); + + let releaseCandidates!: () => void; + const gate = new Promise((resolve) => { + releaseCandidates = resolve; + }); + getCandidatesMock.mockImplementationOnce(async () => { + await gate; + return ["anthropic:claude-haiku-4-5"]; + }); + + const service = createService(); + const inFlight = getInternals(service).runForWorkspace(workspaceId); + service.stop(); + releaseCandidates(); + await inFlight; + + expect(generateSpy).not.toHaveBeenCalled(); + expect(setAiStatusMock).not.toHaveBeenCalled(); + expect(emitWorkspaceActivityMock).not.toHaveBeenCalled(); + }); + test("does not persist or emit if the service is stopped while a generation is in flight", async () => { // Real provider calls can take seconds to minutes. If stop() fires // mid-generation (app shutdown), persisting afterwards would leak writes @@ -322,11 +354,20 @@ describe("AgentStatusService", () => { createMuxMessage("u1", "user", "long-running task") ); + // Two-stage gate: signal when the generator actually starts (so the + // test can fire stop() after the pre-generator guard has passed) and + // a release the test holds until it's ready for the generator to + // resolve. + let signalStarted!: () => void; + const startedSignal = new Promise((resolve) => { + signalStarted = resolve; + }); let releaseGenerate!: () => void; const gate = new Promise((resolve) => { releaseGenerate = resolve; }); generateSpy.mockImplementationOnce(async () => { + signalStarted(); await gate; return Ok({ status: { emoji: "πŸ› οΈ", message: "Doing work" }, @@ -336,6 +377,7 @@ describe("AgentStatusService", () => { const service = createService(); const inFlight = getInternals(service).runForWorkspace(workspaceId); + await startedSignal; service.stop(); releaseGenerate(); await inFlight; diff --git a/src/node/services/agentStatusService.ts b/src/node/services/agentStatusService.ts index d2414a4770..a1f759ea2e 100644 --- a/src/node/services/agentStatusService.ts +++ b/src/node/services/agentStatusService.ts @@ -197,9 +197,14 @@ export class AgentStatusService { const candidates = await this.workspaceService.getWorkspaceTitleModelCandidates(workspaceId); if (candidates.length === 0) return; + // Skip the expensive provider call if stop() fired during any of the + // earlier awaits (transcript build, candidates fetch). The generator + // can take seconds to a minute, so kicking it off after shutdown + // would leak background LLM work past our lifecycle. + if (this.stopped) return; const result = await generateWorkspaceStatus(transcript, candidates, this.aiService); - // The generator can take seconds to a minute; bail if stop() fired - // mid-flight to avoid leaking writes past our lifecycle. + // Re-check after the generator returns: the same hazard at a later + // await boundary. if (this.stopped) return; if (!result.success) { log.debug("AgentStatusService: status generation failed; will retry next tick", { From 432459d3801ced1f1255ec160dafa56e661851e6 Mon Sep 17 00:00:00 2001 From: Ammar Date: Thu, 7 May 2026 10:40:42 -0500 Subject: [PATCH 11/33] refactor: address PR feedback - hide bespoke tools, collapse aiStatus, drop startup delay MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three review threads, all in service of minimal API surface: 1. Hide bespoke tools from docs (docs/hooks/tools.mdx). - Add 'internal: true' to propose_name and propose_status in TOOL_DEFINITIONS. Filter internal tools out of the hook env-var block in scripts/gen_docs.ts. Users can't write hooks for these tools (they run via bespoke streamText paths in their own services), so listing their env vars is misleading. - Auto-regenerated tools.mdx and builtInSkillContent.generated.ts no longer mention propose_name/propose_status. 2. Collapse aiStatus into existing todoStatus field. - The frontend never distinguished between aiStatus and todoStatus (just '?? chained' them). Drop the aiStatus schema field and write the AI-generated payload into the same todoStatus slot. - Rename setAiStatus β†’ setSidebarStatus to reflect the unified role: "persistent sidebar status, set by either the AI path or the todo-derivation path; last write wins". - Drop the field from ExtensionMetadata, coerceExtensionMetadata, and toWorkspaceActivitySnapshot. - Frontend precedence simplifies from 'transientStatus ?? aiStatus ?? todoStatus' to just 'transientStatus ?? todoStatus'. 3. Drop AGENT_STATUS_STARTUP_DELAY_MS. - With AGENT_STATUS_MAX_CONCURRENT=1, dispatch is already serialized across workspaces. No need for a separate startup delay β€” the per-tick interval naturally smooths load. Drops the startupTimeout field, begin() helper, and the startupDelayMs option. Net delta: +83 / -163 lines. All 125 targeted tests pass; full node suite (3663 tests) clean. --- docs/hooks/tools.mdx | 20 -------- scripts/gen_docs.ts | 3 ++ src/browser/stores/WorkspaceStore.test.ts | 29 ++++++----- src/browser/stores/WorkspaceStore.ts | 17 +++---- src/common/orpc/schemas/workspace.ts | 6 +-- src/common/utils/tools/toolDefinitions.ts | 6 +++ src/constants/agentStatus.ts | 11 ++--- src/node/services/ExtensionMetadataService.ts | 29 ++++++----- .../builtInSkillContent.generated.ts | 20 -------- src/node/services/agentStatusService.test.ts | 43 ++++++++-------- src/node/services/agentStatusService.ts | 49 +++++-------------- src/node/utils/extensionMetadata.ts | 13 ----- 12 files changed, 83 insertions(+), 163 deletions(-) diff --git a/docs/hooks/tools.mdx b/docs/hooks/tools.mdx index efafac7c79..6566917158 100644 --- a/docs/hooks/tools.mdx +++ b/docs/hooks/tools.mdx @@ -552,26 +552,6 @@ If a value is too large for the environment, it may be omitted (not set). Mux al
-
-propose_name (2) - -| Env var | JSON path | Type | Description | -| ---------------------- | --------- | ------ | -------------------------------------------------------------------------------------------------- | -| `MUX_TOOL_INPUT_NAME` | `name` | string | Codebase area (1-2 words, max 15 chars): lowercase, hyphens only, e.g. 'sidebar', 'auth', 'config' | -| `MUX_TOOL_INPUT_TITLE` | `title` | string | Human-readable title (2-5 words): verb-noun format like 'Fix plan mode' | - -
- -
-propose_status (2) - -| Env var | JSON path | Type | Description | -| ------------------------ | --------- | ------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `MUX_TOOL_INPUT_EMOJI` | `emoji` | string | A single emoji that represents the agent's current activity (e.g. 'πŸ”', 'πŸ› οΈ', 'πŸ§ͺ', 'πŸ“') | -| `MUX_TOOL_INPUT_MESSAGE` | `message` | string | A short verb-led phrase (2-6 words) describing what the agent is currently working on, in sentence case, no punctuation, no quotes (e.g. 'Investigating crash', 'Implementing sidebar status') | - -
-
skills_catalog_read (3) diff --git a/scripts/gen_docs.ts b/scripts/gen_docs.ts index 6aa6bdb54c..a8ca5e4826 100644 --- a/scripts/gen_docs.ts +++ b/scripts/gen_docs.ts @@ -675,6 +675,9 @@ function generateToolHookEnvVarsBlock(): string { const tools = Object.entries(TOOL_DEFINITIONS).sort(([a], [b]) => a.localeCompare(b)); for (const [toolName, def] of tools) { + // Skip internal/bespoke tools (e.g. propose_name, propose_status) β€” users + // can't write hooks for them, so listing their env vars is misleading. + if ((def as { internal?: boolean }).internal) continue; const vars = collectToolHookEnvVarsFromZodSchema(def.schema); if (vars.length === 0) continue; diff --git a/src/browser/stores/WorkspaceStore.test.ts b/src/browser/stores/WorkspaceStore.test.ts index 4564f75f29..b2a7ef713d 100644 --- a/src/browser/stores/WorkspaceStore.test.ts +++ b/src/browser/stores/WorkspaceStore.test.ts @@ -2634,19 +2634,18 @@ describe("WorkspaceStore", () => { expect(state.agentStatus).toEqual(activitySnapshot.displayStatus ?? undefined); }); - it("prefers AI-generated aiStatus over todo-derived status for inactive workspaces", async () => { - // The whole point of the small-model status path: when AgentStatusService - // has produced a fresh aiStatus, it should win over todoStatus in the - // sidebar. Without this precedence the sidebar would still surface the - // legacy todo derivation, defeating the feature. - const workspaceId = "activity-fallback-ai-status-workspace"; + it("uses todoStatus from the activity snapshot for inactive workspaces", async () => { + // todoStatus is the persistent sidebar slot β€” written by both the + // small-model AgentStatusService and the todo-derivation path. Inactive + // workspaces don't run the aggregator, so the snapshot's todoStatus is + // what the sidebar must show. + const workspaceId = "activity-fallback-todo-status-workspace"; const activitySnapshot: WorkspaceActivitySnapshot = { recency: new Date("2024-01-04T16:00:00.000Z").getTime(), streaming: false, lastModel: "claude-sonnet-4", lastThinkingLevel: null, - aiStatus: { emoji: "πŸ› οΈ", message: "Wiring sidebar precedence" }, - todoStatus: { emoji: "πŸ”„", message: "Run typecheck" }, + todoStatus: { emoji: "πŸ› οΈ", message: "Wiring sidebar precedence" }, hasTodos: true, }; @@ -2657,22 +2656,22 @@ describe("WorkspaceStore", () => { createAndAddWorkspace(store, workspaceId, { createdAt: "2020-01-01T00:00:00.000Z" }, false); const state = store.getWorkspaceState(workspaceId); - expect(state.agentStatus).toEqual(activitySnapshot.aiStatus ?? undefined); + expect(state.agentStatus).toEqual(activitySnapshot.todoStatus ?? undefined); }); - it("keeps displayStatus precedence over aiStatus so explicit system status still wins", async () => { + it("keeps displayStatus precedence over todoStatus so explicit system status still wins", async () => { // displayStatus is a deliberate, system-driven signal (e.g. "Compacting - // idle workspace…"). It must outrank aiStatus, otherwise the periodic - // small-model run would mask the explicit progress message the backend - // is trying to communicate. - const workspaceId = "activity-fallback-display-over-ai"; + // idle workspace…"). It must outrank todoStatus β€” otherwise a periodic + // small-model rewrite of todoStatus would mask the explicit progress + // message the backend is trying to communicate. + const workspaceId = "activity-fallback-display-over-todo"; const activitySnapshot: WorkspaceActivitySnapshot = { recency: new Date("2024-01-04T17:00:00.000Z").getTime(), streaming: false, lastModel: "claude-sonnet-4", lastThinkingLevel: null, displayStatus: { emoji: "πŸ’€", message: "Compacting idle workspace" }, - aiStatus: { emoji: "πŸ› οΈ", message: "Wiring sidebar precedence" }, + todoStatus: { emoji: "πŸ› οΈ", message: "Wiring sidebar precedence" }, hasTodos: false, }; diff --git a/src/browser/stores/WorkspaceStore.ts b/src/browser/stores/WorkspaceStore.ts index 5251d8df3c..d3ff61a4b1 100644 --- a/src/browser/stores/WorkspaceStore.ts +++ b/src/browser/stores/WorkspaceStore.ts @@ -1752,21 +1752,19 @@ export class WorkspaceStore { // `muxMetadata.displayStatus` for heartbeat / idle-compaction / background // turns. We collapse them into a single `transientStatus` so the // precedence works the same way for both branches and never lets a stale - // aiStatus mask an explicit system-set message. + // todoStatus mask an explicit system-set message. const displayStatus = useAggregatorState ? undefined : (activity?.displayStatus ?? undefined); const fallbackAgentStatus = useAggregatorState ? aggregator.getAgentStatus() : undefined; const transientStatus = displayStatus ?? fallbackAgentStatus; - // Replaces the legacy todo-derived status as the primary persistent - // sidebar signal. Produced periodically by AgentStatusService using the - // same "small model" path as title generation; we keep todoStatus below - // as a fallback while the AI status is being generated for the first - // time, on errors, or before the activity snapshot has caught up. - const aiStatus = activity?.aiStatus ?? undefined; + // Persistent sidebar status. Sourced from AgentStatusService (preferred, + // small-model summary of the trailing transcript) or derived from the + // current todo list (fallback for fresh workspaces). Both writers target + // the same `todoStatus` slot β€” last write wins. const todoStatus = useAggregatorState ? (deriveTodoStatus(aggregatorTodos) ?? activity?.todoStatus ?? undefined) : (activity?.todoStatus ?? (activity?.hasTodos === false ? undefined : deriveTodoStatus(aggregatorTodos))); - const agentStatus = transientStatus ?? aiStatus ?? todoStatus; + const agentStatus = transientStatus ?? todoStatus; return { name: metadata?.name ?? workspaceId, // Fall back to ID if metadata missing @@ -2464,8 +2462,7 @@ export class WorkspaceStore { previous?.recency !== snapshot?.recency || previous?.hasTodos !== snapshot?.hasTodos || !areAgentStatusesEqual(previous?.displayStatus, snapshot?.displayStatus) || - !areAgentStatusesEqual(previous?.todoStatus, snapshot?.todoStatus) || - !areAgentStatusesEqual(previous?.aiStatus, snapshot?.aiStatus); + !areAgentStatusesEqual(previous?.todoStatus, snapshot?.todoStatus); if (!changed) { return; diff --git a/src/common/orpc/schemas/workspace.ts b/src/common/orpc/schemas/workspace.ts index 28472ae82d..3e19d8fcd4 100644 --- a/src/common/orpc/schemas/workspace.ts +++ b/src/common/orpc/schemas/workspace.ts @@ -209,11 +209,7 @@ export const WorkspaceActivitySnapshotSchema = z.object({ }), todoStatus: WorkspaceAgentStatusSchema.nullable().optional().meta({ description: - "Status derived from the current todo list (legacy, kept as a fallback when aiStatus is unavailable).", - }), - aiStatus: WorkspaceAgentStatusSchema.nullable().optional().meta({ - description: - "AI-generated status summary produced by the small-model status path. When set, takes precedence over todoStatus in the sidebar.", + "Persistent sidebar status. Set by the small-model AgentStatusService when available, with a todo-derived fallback.", }), hasTodos: z.boolean().optional().meta({ description: "Whether the workspace still had todos when streaming last stopped", diff --git a/src/common/utils/tools/toolDefinitions.ts b/src/common/utils/tools/toolDefinitions.ts index 9e7951e392..4c869c056b 100644 --- a/src/common/utils/tools/toolDefinitions.ts +++ b/src/common/utils/tools/toolDefinitions.ts @@ -1341,17 +1341,23 @@ export const TOOL_DEFINITIONS = { "Each question must include 2–4 options; an 'Other' choice is provided automatically.", schema: AskUserQuestionToolArgsSchema, }, + // `internal` tools are excluded from user-facing tool docs (hooks/tools.mdx + // env-var tables) because users can't write hooks for them β€” they run via + // bespoke streamText paths in their own services, not the standard tool + // execution pipeline. See gen_docs.ts. propose_name: { description: "Propose a workspace name and title. You MUST call this tool exactly once with your chosen name and title. " + "Do not emit a text response; call this tool immediately.", schema: ProposeNameToolArgsSchema, + internal: true, }, propose_status: { description: "Propose a short sidebar status (emoji + 2-6 word verb-led phrase) summarizing what the agent is currently doing. " + "You MUST call this tool exactly once. Do not emit a text response; call this tool immediately.", schema: ProposeStatusToolArgsSchema, + internal: true, }, propose_plan: { description: diff --git a/src/constants/agentStatus.ts b/src/constants/agentStatus.ts index 78d4533a98..8e448ee57e 100644 --- a/src/constants/agentStatus.ts +++ b/src/constants/agentStatus.ts @@ -16,17 +16,12 @@ export const AGENT_STATUS_UNFOCUSED_INTERVAL_MS = 2 * 60 * 1000; /** * How often the scheduler wakes up to scan workspaces. Per-workspace cadence * is enforced separately, so this can be small enough to make focus - * transitions feel snappy without driving redundant work. + * transitions feel snappy without driving redundant work. With + * AGENT_STATUS_MAX_CONCURRENT=1 the per-tick dispatch naturally smooths load + * across many workspaces β€” no separate startup delay needed. */ export const AGENT_STATUS_TICK_INTERVAL_MS = 10 * 1000; -/** - * Delay before the first scheduler pass after startup. Lets initial chat - * replay and metadata bootstrap settle, and avoids a thundering herd of - * model calls during launch. - */ -export const AGENT_STATUS_STARTUP_DELAY_MS = 30 * 1000; - /** Token budget for the trailing chat-transcript window we feed the model. */ export const AGENT_STATUS_MAX_TRANSCRIPT_TOKENS = 8000; diff --git a/src/node/services/ExtensionMetadataService.ts b/src/node/services/ExtensionMetadataService.ts index 4b0c09635b..6714ba9b95 100644 --- a/src/node/services/ExtensionMetadataService.ts +++ b/src/node/services/ExtensionMetadataService.ts @@ -245,20 +245,21 @@ export class ExtensionMetadataService { } /** - * Update the AI-generated sidebar status payload for a workspace. - * Pass `null` to clear it. + * AgentStatusService writes its AI-generated payload into the same + * `todoStatus` field used by the todo-derived path. Passing `null` clears + * the slot. * - * AgentStatusService is a background scheduler with no causal connection - * to user activity, so this writer never advances `recency`. Existing - * entries keep their user-interaction recency (otherwise idle workspaces - * would be re-sorted and marked unread every tick); brand-new entries - * (rare: workspace has chat but no metadata yet) are seeded with - * `recency=0` so the AI status doesn't artificially promote them. - * `updateRecency` will set the real value on the next user interaction. + * Unlike `setTodoStatus`, this writer: + * - Never advances `recency`. Background regeneration must not promote + * idle workspaces in the sidebar or mark them unread. Existing entries + * keep their user-interaction recency; brand-new entries (rare: chat + * exists but no metadata yet) are seeded with `recency=0` until the + * next real user interaction. + * - Doesn't touch `hasTodos`. The todo-derivation path owns that flag. */ - async setAiStatus( + async setSidebarStatus( workspaceId: string, - aiStatus: ExtensionAgentStatus | null + status: ExtensionAgentStatus | null ): Promise { return this.withSerializedMutation(async () => { const data = await this.load(); @@ -272,7 +273,11 @@ export class ExtensionMetadataService { displayStatus: null, lastStatusUrl: null, }; - workspace.aiStatus = aiStatus; + if (status) { + workspace.todoStatus = status; + } else { + delete workspace.todoStatus; + } data.workspaces[workspaceId] = workspace; await this.save(data); return toWorkspaceActivitySnapshot(workspace); diff --git a/src/node/services/agentSkills/builtInSkillContent.generated.ts b/src/node/services/agentSkills/builtInSkillContent.generated.ts index 329b601f32..5112cb95a6 100644 --- a/src/node/services/agentSkills/builtInSkillContent.generated.ts +++ b/src/node/services/agentSkills/builtInSkillContent.generated.ts @@ -4200,26 +4200,6 @@ export const BUILTIN_SKILL_FILES: Record> = { "
", "", "
", - "propose_name (2)", - "", - "| Env var | JSON path | Type | Description |", - "| ---------------------- | --------- | ------ | -------------------------------------------------------------------------------------------------- |", - "| `MUX_TOOL_INPUT_NAME` | `name` | string | Codebase area (1-2 words, max 15 chars): lowercase, hyphens only, e.g. 'sidebar', 'auth', 'config' |", - "| `MUX_TOOL_INPUT_TITLE` | `title` | string | Human-readable title (2-5 words): verb-noun format like 'Fix plan mode' |", - "", - "
", - "", - "
", - "propose_status (2)", - "", - "| Env var | JSON path | Type | Description |", - "| ------------------------ | --------- | ------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |", - "| `MUX_TOOL_INPUT_EMOJI` | `emoji` | string | A single emoji that represents the agent's current activity (e.g. 'πŸ”', 'πŸ› οΈ', 'πŸ§ͺ', 'πŸ“') |", - "| `MUX_TOOL_INPUT_MESSAGE` | `message` | string | A short verb-led phrase (2-6 words) describing what the agent is currently working on, in sentence case, no punctuation, no quotes (e.g. 'Investigating crash', 'Implementing sidebar status') |", - "", - "
", - "", - "
", "skills_catalog_read (3)", "", "| Env var | JSON path | Type | Description |", diff --git a/src/node/services/agentStatusService.test.ts b/src/node/services/agentStatusService.test.ts index 4ba636dfaa..941de6adfb 100644 --- a/src/node/services/agentStatusService.test.ts +++ b/src/node/services/agentStatusService.test.ts @@ -33,7 +33,7 @@ describe("AgentStatusService", () => { let mockAiService: AIService; let windowService: WindowService; let isFocused = true; - let setAiStatusMock: ReturnType< + let setSidebarStatusMock: ReturnType< typeof mock<(workspaceId: string, status: unknown) => Promise<{ recency: number }>> >; let emitWorkspaceActivityMock: ReturnType< @@ -73,7 +73,6 @@ describe("AgentStatusService", () => { mockAiService, { clock: options?.clock, - startupDelayMs: 0, tickIntervalMs: 60 * 60 * 1000, } ); @@ -99,11 +98,11 @@ describe("AgentStatusService", () => { emitWorkspaceActivity: emitWorkspaceActivityMock, } as unknown as WorkspaceService; - setAiStatusMock = mock((_workspaceId: string, _status: unknown) => + setSidebarStatusMock = mock((_workspaceId: string, _status: unknown) => Promise.resolve({ recency: 0 }) ); mockExtensionMetadata = { - setAiStatus: setAiStatusMock, + setSidebarStatus: setSidebarStatusMock, } as unknown as ExtensionMetadataService; mockTokenizer = { @@ -150,8 +149,8 @@ describe("AgentStatusService", () => { expect(generationCall[0]).toContain("Assistant: Running tests now"); expect(generationCall[1]).toEqual(["anthropic:claude-haiku-4-5"]); - expect(setAiStatusMock).toHaveBeenCalledTimes(1); - const [persistedWorkspaceId, persistedStatus] = setAiStatusMock.mock.calls[0]; + expect(setSidebarStatusMock).toHaveBeenCalledTimes(1); + const [persistedWorkspaceId, persistedStatus] = setSidebarStatusMock.mock.calls[0]; expect(persistedWorkspaceId).toBe(workspaceId); expect(persistedStatus).toEqual({ emoji: "πŸ› οΈ", message: "Editing source" }); }); @@ -166,11 +165,11 @@ describe("AgentStatusService", () => { const service = createService(); await getInternals(service).runForWorkspace(workspaceId); expect(generateSpy).toHaveBeenCalledTimes(1); - expect(setAiStatusMock).toHaveBeenCalledTimes(1); + expect(setSidebarStatusMock).toHaveBeenCalledTimes(1); await getInternals(service).runForWorkspace(workspaceId); expect(generateSpy).toHaveBeenCalledTimes(1); - expect(setAiStatusMock).toHaveBeenCalledTimes(1); + expect(setSidebarStatusMock).toHaveBeenCalledTimes(1); }); test("includes the in-flight partial assistant message so the hash refreshes mid-stream", async () => { @@ -212,16 +211,16 @@ describe("AgentStatusService", () => { ); await getInternals(service).runForWorkspace(workspaceId); expect(generateSpy).toHaveBeenCalledTimes(2); - expect(setAiStatusMock).toHaveBeenCalledTimes(2); + expect(setSidebarStatusMock).toHaveBeenCalledTimes(2); }); test("skips regeneration when there is no chat history yet", async () => { // Empty workspaces have nothing to summarize. Don't pay for a - // hallucinated status, and don't blank an existing aiStatus on disk. + // hallucinated status, and don't blank an existing todoStatus on disk. const service = createService(); await getInternals(service).runForWorkspace(workspaceId); expect(generateSpy).not.toHaveBeenCalled(); - expect(setAiStatusMock).not.toHaveBeenCalled(); + expect(setSidebarStatusMock).not.toHaveBeenCalled(); }); test("focused windows regenerate at the focused interval; unfocused windows wait longer", async () => { @@ -311,7 +310,7 @@ describe("AgentStatusService", () => { now += 31_000; await internals.runTick(); expect(generateSpy).toHaveBeenCalledTimes(3); - const persistedIds = setAiStatusMock.mock.calls.map((call) => call[0]); + const persistedIds = setSidebarStatusMock.mock.calls.map((call) => call[0]); expect(new Set(persistedIds)).toEqual(new Set(ids)); }); @@ -341,7 +340,7 @@ describe("AgentStatusService", () => { await inFlight; expect(generateSpy).not.toHaveBeenCalled(); - expect(setAiStatusMock).not.toHaveBeenCalled(); + expect(setSidebarStatusMock).not.toHaveBeenCalled(); expect(emitWorkspaceActivityMock).not.toHaveBeenCalled(); }); @@ -383,7 +382,7 @@ describe("AgentStatusService", () => { await inFlight; expect(generateSpy).toHaveBeenCalledTimes(1); - expect(setAiStatusMock).not.toHaveBeenCalled(); + expect(setSidebarStatusMock).not.toHaveBeenCalled(); expect(emitWorkspaceActivityMock).not.toHaveBeenCalled(); }); @@ -396,36 +395,36 @@ describe("AgentStatusService", () => { createMuxMessage("u1", "user", "kick off a task") ); - setAiStatusMock.mockImplementationOnce(() => Promise.reject(new Error("disk full"))); + setSidebarStatusMock.mockImplementationOnce(() => Promise.reject(new Error("disk full"))); const service = createService(); await getInternals(service).runForWorkspace(workspaceId); expect(generateSpy).toHaveBeenCalledTimes(1); - expect(setAiStatusMock).toHaveBeenCalledTimes(1); + expect(setSidebarStatusMock).toHaveBeenCalledTimes(1); // Activity must not emit on persist failure. expect(emitWorkspaceActivityMock).not.toHaveBeenCalled(); // Same transcript, second pass: retries because the previous failure // left lastInputHash unchanged. - setAiStatusMock.mockImplementation((_w, _s) => Promise.resolve({ recency: 0 })); + setSidebarStatusMock.mockImplementation((_w, _s) => Promise.resolve({ recency: 0 })); await getInternals(service).runForWorkspace(workspaceId); expect(generateSpy).toHaveBeenCalledTimes(2); - expect(setAiStatusMock).toHaveBeenCalledTimes(2); + expect(setSidebarStatusMock).toHaveBeenCalledTimes(2); expect(emitWorkspaceActivityMock).toHaveBeenCalledTimes(1); }); - test("setAiStatus must not bump workspace recency (would re-sort idle workspaces)", async () => { + test("setSidebarStatus must not bump workspace recency (would re-sort idle workspaces)", async () => { // AgentStatusService is a background scheduler with no causal // connection to user activity, so its writes must not bump recency β€” // that would promote idle workspaces in the sidebar and mark them // unread every tick. Test ExtensionMetadataService directly to pin the - // contract for any future caller of setAiStatus. + // contract for any future caller of setSidebarStatus. const dir = mkdtempSync(join(tmpdir(), "mux-recency-")); try { const svc = new ExtensionMetadataService(join(dir, "metadata.json")); await svc.updateRecency("ws", 100); - await svc.setAiStatus("ws", { emoji: "πŸ› οΈ", message: "Doing work" }); + await svc.setSidebarStatus("ws", { emoji: "πŸ› οΈ", message: "Doing work" }); const after = await svc.getSnapshot("ws"); expect(after?.recency).toBe(100); } finally { @@ -446,6 +445,6 @@ describe("AgentStatusService", () => { await getInternals(service).runTick(); expect(generateSpy).not.toHaveBeenCalled(); - expect(setAiStatusMock).not.toHaveBeenCalled(); + expect(setSidebarStatusMock).not.toHaveBeenCalled(); }); }); diff --git a/src/node/services/agentStatusService.ts b/src/node/services/agentStatusService.ts index a1f759ea2e..5e194f500b 100644 --- a/src/node/services/agentStatusService.ts +++ b/src/node/services/agentStatusService.ts @@ -6,7 +6,6 @@ import { AGENT_STATUS_MAX_MESSAGE_CHARS, AGENT_STATUS_MAX_TRAILING_MESSAGES, AGENT_STATUS_MAX_TRANSCRIPT_TOKENS, - AGENT_STATUS_STARTUP_DELAY_MS, AGENT_STATUS_TICK_INTERVAL_MS, AGENT_STATUS_UNFOCUSED_INTERVAL_MS, } from "@/constants/agentStatus"; @@ -27,8 +26,6 @@ const FALLBACK_TOKENIZER_MODEL = "anthropic:claude-haiku-4-5"; export interface AgentStatusServiceOptions { /** Override for test injection. Defaults to `Date.now`. */ clock?: () => number; - /** Override startup delay. Defaults to AGENT_STATUS_STARTUP_DELAY_MS. */ - startupDelayMs?: number; /** Override scheduler tick interval. Defaults to AGENT_STATUS_TICK_INTERVAL_MS. */ tickIntervalMs?: number; } @@ -60,10 +57,8 @@ export class AgentStatusService { private readonly tracked = new Map(); private readonly inFlightPromises = new Set>(); private readonly clock: () => number; - private readonly startupDelayMs: number; private readonly tickIntervalMs: number; - private startupTimeout: ReturnType | null = null; private checkInterval: ReturnType | null = null; private stopped = false; private tickInFlight = false; @@ -79,46 +74,21 @@ export class AgentStatusService { options: AgentStatusServiceOptions = {} ) { this.clock = options.clock ?? (() => Date.now()); - this.startupDelayMs = options.startupDelayMs ?? AGENT_STATUS_STARTUP_DELAY_MS; this.tickIntervalMs = options.tickIntervalMs ?? AGENT_STATUS_TICK_INTERVAL_MS; } start(): void { - assert( - this.checkInterval === null && this.startupTimeout === null, - "AgentStatusService.start() called while already running" - ); + assert(this.checkInterval === null, "AgentStatusService.start() called while already running"); this.stopped = false; - - const begin = () => { - if (this.stopped) return; - // Fire one tick immediately so the user sees an initial status without - // waiting a full interval after the startup delay. - void this.runTick(); - this.checkInterval = setInterval(() => void this.runTick(), this.tickIntervalMs); - }; - - if (this.startupDelayMs <= 0) { - begin(); - } else { - this.startupTimeout = setTimeout(() => { - this.startupTimeout = null; - begin(); - }, this.startupDelayMs); - } - - log.info("AgentStatusService started", { - startupDelayMs: this.startupDelayMs, - tickIntervalMs: this.tickIntervalMs, - }); + // No startup delay: AGENT_STATUS_MAX_CONCURRENT=1 already serializes + // generation across workspaces, so the first tick can fire immediately + // without risking a thundering herd at launch. + this.checkInterval = setInterval(() => void this.runTick(), this.tickIntervalMs); + log.info("AgentStatusService started", { tickIntervalMs: this.tickIntervalMs }); } stop(): void { this.stopped = true; - if (this.startupTimeout) { - clearTimeout(this.startupTimeout); - this.startupTimeout = null; - } if (this.checkInterval) { clearInterval(this.checkInterval); this.checkInterval = null; @@ -189,7 +159,7 @@ export class AgentStatusService { state.lastRanAt = this.clock(); // Empty workspace: nothing to summarize. Don't blank an existing - // aiStatus β€” that would clobber a status produced before compaction. + // todoStatus β€” that would clobber a status produced before compaction. if (transcript.trim().length === 0) return; // Idle/frozen: identical trailing window since last successful run. if (state.lastInputHash === inputHash) return; @@ -218,7 +188,10 @@ export class AgentStatusService { // fails we want the next tick to retry against the same transcript // instead of dedup'ing against a hash we never committed. try { - const snapshot = await this.extensionMetadata.setAiStatus(workspaceId, result.data.status); + const snapshot = await this.extensionMetadata.setSidebarStatus( + workspaceId, + result.data.status + ); if (this.stopped) return; state.lastInputHash = inputHash; this.workspaceService.emitWorkspaceActivity(workspaceId, snapshot); diff --git a/src/node/utils/extensionMetadata.ts b/src/node/utils/extensionMetadata.ts index 47a147eb17..aebc8c63bf 100644 --- a/src/node/utils/extensionMetadata.ts +++ b/src/node/utils/extensionMetadata.ts @@ -28,9 +28,6 @@ export interface ExtensionMetadata { // Persists the latest display-status URL so later updates without a URL // can still carry the last deep link even after displayStatus is cleared. lastStatusUrl?: string | null; - // AI-generated status summary (workspaceStatusGenerator). When present, - // takes precedence over todoStatus in the sidebar. - aiStatus?: ExtensionAgentStatus | null; } /** @@ -95,12 +92,6 @@ export function coerceExtensionMetadata(value: unknown): ExtensionMetadata | nul ? null : (coerceAgentStatus(record.todoStatus) ?? undefined) : undefined; - const aiStatus = - "aiStatus" in record - ? record.aiStatus === null - ? null - : (coerceAgentStatus(record.aiStatus) ?? undefined) - : undefined; return { recency: record.recency, @@ -113,7 +104,6 @@ export function coerceExtensionMetadata(value: unknown): ExtensionMetadata | nul agentStatus: coerceAgentStatus(record.agentStatus), ...(displayStatus !== undefined ? { displayStatus } : {}), ...(todoStatus !== undefined ? { todoStatus } : {}), - ...(aiStatus !== undefined ? { aiStatus } : {}), ...(typeof record.hasTodos === "boolean" ? { hasTodos: record.hasTodos } : {}), lastStatusUrl: coerceStatusUrl(record.lastStatusUrl), }; @@ -132,8 +122,6 @@ export function toWorkspaceActivitySnapshot( // agentStatus field. Project that forward into todoStatus until a fresh todo_write // or stream-stop snapshot rewrites the workspace metadata. coerceAgentStatus(metadata.agentStatus); - const aiStatus = metadata.aiStatus !== undefined ? metadata.aiStatus : null; - return { recency: metadata.recency, streaming: metadata.streaming, @@ -144,7 +132,6 @@ export function toWorkspaceActivitySnapshot( lastThinkingLevel: metadata.lastThinkingLevel ?? null, ...(displayStatus ? { displayStatus } : {}), ...(todoStatus ? { todoStatus } : {}), - ...(aiStatus ? { aiStatus } : {}), ...(typeof metadata.hasTodos === "boolean" ? { hasTodos: metadata.hasTodos } : {}), }; } From 272e3ddfaaccf9db005851024be3c56451feebfa Mon Sep 17 00:00:00 2001 From: Ammar Date: Thu, 7 May 2026 10:47:04 -0500 Subject: [PATCH 12/33] fix: prefer activity todoStatus over live aggregator derivation for active workspaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex caught: when AgentStatusService writes its AI-generated payload to `activity.todoStatus`, the active-workspace branch was still preferring `deriveTodoStatus(aggregatorTodos)`. So any active workspace with todos would never surface the AI-generated status β€” defeating the feature for the most common case. Fix: invert the precedence in the active branch so the persisted todoStatus (whichever writer touched it last β€” AI or todo-derivation) wins, with the live aggregator derivation only as a fallback when the snapshot has no entry yet (brand-new workspaces). Add a regression test that gives the workspace BOTH live aggregator todos AND a persisted todoStatus, asserting the persisted one wins. --- src/browser/stores/WorkspaceStore.test.ts | 26 +++++++++++++++++++++++ src/browser/stores/WorkspaceStore.ts | 11 +++++----- 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/src/browser/stores/WorkspaceStore.test.ts b/src/browser/stores/WorkspaceStore.test.ts index b2a7ef713d..24d26405f6 100644 --- a/src/browser/stores/WorkspaceStore.test.ts +++ b/src/browser/stores/WorkspaceStore.test.ts @@ -2591,6 +2591,32 @@ describe("WorkspaceStore", () => { expect(state.agentStatus).toEqual({ emoji: "πŸ”„", message: "Run typecheck" }); }); + it("prefers persisted activity todoStatus over live aggregator todos for active workspaces", async () => { + // AgentStatusService writes its AI-generated payload into the same + // `todoStatus` slot. If the active branch always preferred the live + // aggregator derivation, the AI-generated status would never surface + // for any workspace with todos β€” defeating the feature. + const workspaceId = "active-ai-overrides-todos"; + const activitySnapshot: WorkspaceActivitySnapshot = { + recency: new Date("2024-01-04T13:00:00.000Z").getTime(), + streaming: true, + lastModel: "claude-sonnet-4", + lastThinkingLevel: null, + todoStatus: { emoji: "πŸ› οΈ", message: "AI-generated summary" }, + hasTodos: true, + }; + + mockActivityList.mockResolvedValue({ [workspaceId]: activitySnapshot }); + recreateStore(); + await tick(0); + + createAndAddWorkspace(store, workspaceId); + seedPinnedTodos(store, workspaceId, [{ content: "Run typecheck", status: "in_progress" }]); + + const state = store.getWorkspaceState(workspaceId); + expect(state.agentStatus).toEqual(activitySnapshot.todoStatus ?? undefined); + }); + it("prefers todo-derived activity status for inactive workspaces", async () => { const workspaceId = "activity-fallback-todo-status-workspace"; const activitySnapshot: WorkspaceActivitySnapshot = { diff --git a/src/browser/stores/WorkspaceStore.ts b/src/browser/stores/WorkspaceStore.ts index d3ff61a4b1..b581853d63 100644 --- a/src/browser/stores/WorkspaceStore.ts +++ b/src/browser/stores/WorkspaceStore.ts @@ -1756,12 +1756,13 @@ export class WorkspaceStore { const displayStatus = useAggregatorState ? undefined : (activity?.displayStatus ?? undefined); const fallbackAgentStatus = useAggregatorState ? aggregator.getAgentStatus() : undefined; const transientStatus = displayStatus ?? fallbackAgentStatus; - // Persistent sidebar status. Sourced from AgentStatusService (preferred, - // small-model summary of the trailing transcript) or derived from the - // current todo list (fallback for fresh workspaces). Both writers target - // the same `todoStatus` slot β€” last write wins. + // Persistent sidebar status. The activity snapshot's `todoStatus` is the + // canonical "last write wins" slot β€” both AgentStatusService and the + // todo-derivation path write to it. Prefer it in both branches, falling + // back to a live aggregator derivation only when the snapshot has no + // entry yet (brand-new workspaces before the first persist). const todoStatus = useAggregatorState - ? (deriveTodoStatus(aggregatorTodos) ?? activity?.todoStatus ?? undefined) + ? (activity?.todoStatus ?? deriveTodoStatus(aggregatorTodos) ?? undefined) : (activity?.todoStatus ?? (activity?.hasTodos === false ? undefined : deriveTodoStatus(aggregatorTodos))); const agentStatus = transientStatus ?? todoStatus; From 227cfb3acefbaa21f09bf632201e722daa85e774 Mon Sep 17 00:00:00 2001 From: Ammar Date: Thu, 7 May 2026 11:03:19 -0500 Subject: [PATCH 13/33] fix: stop sidebar status from defaulting to 'Awaiting next task' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous prompt explicitly handed the small model the literal phrase 'Awaiting next task' as the suggested output for any chat that 'looks idle or finished'. Right after an agent completes a turn, the chat *does* look finished β€” so the model just copies the example verbatim, producing 'Awaiting next task' for nearly every workspace. Reframe the prompt around 'most recent activity' (what the agent was last working on) instead of 'what the agent is doing right now', drop the special idle/finished branch, and explicitly forbid generic placeholder phrases so the model has to name the concrete activity it sees in the transcript. --- src/node/services/workspaceStatusGenerator.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/node/services/workspaceStatusGenerator.ts b/src/node/services/workspaceStatusGenerator.ts index 0568b08642..5515734032 100644 --- a/src/node/services/workspaceStatusGenerator.ts +++ b/src/node/services/workspaceStatusGenerator.ts @@ -37,17 +37,17 @@ export function buildWorkspaceStatusPrompt(transcript: string): string { // practice, but the model still needs something to ground on. const body = transcript.trim().length > 0 ? transcript : "(no recent transcript)"; return [ - "You produce a short sidebar status that tells the user what an AI coding agent is doing right now.\n\n", + "You produce a short sidebar status summarizing the most recent activity in an AI coding agent's chat.\n\n", "Recent chat transcript (oldest first, newest last):\n", "\n", body, "\n\n\n", "Requirements:\n", - "- Focus on the most recent activity, not the overall task scope.\n", + "- Describe the specific activity the agent was last working on, drawn from the actual transcript content.\n", + "- Do NOT use generic placeholders such as 'Awaiting next task', 'Doing work', or 'Idle'. Always name the concrete activity (file, feature, bug, command, etc.).\n", "- emoji: A single emoji that visually represents the activity.\n", "- message: 2-6 words, present tense, verb-led, sentence case, no punctuation, no quotes.\n", - '- Examples of good messages: "Investigating crash", "Implementing sidebar status", "Running tests", "Reading config files", "Awaiting user reply".\n', - '- If the agent appears idle or finished, describe that state instead (e.g. "Awaiting next task").\n\n', + '- Examples: "Investigating crash", "Implementing sidebar status", "Running tests", "Reading config files".\n\n', "Call propose_status exactly once with your chosen emoji and message. Do not emit any text response.", ].join(""); } From 3628b9f358d6e4288ef6b8be290c554a9c284c4f Mon Sep 17 00:00:00 2001 From: Ammar Date: Thu, 7 May 2026 11:12:48 -0500 Subject: [PATCH 14/33] feat: streaming-aware cadence + past-tense status for completed activities MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two refinements based on user feedback: 1. Streaming-aware cadence. AgentStatusService now reads the per-workspace `streaming` flag from the activity snapshots and picks intervals accordingly: active + focused: 10s (was 30s) active + unfocused: 30s (was 120s) idle + focused: 30s (unchanged) idle + unfocused: 120s (unchanged) So when an agent is actively working, the sidebar refreshes fast enough to follow along; when nothing's happening we conserve provider calls. Constants renamed: AGENT_STATUS_FOCUSED_INTERVAL_MS β†’ AGENT_STATUS_IDLE_FOCUSED_INTERVAL_MS AGENT_STATUS_UNFOCUSED_INTERVAL_MS β†’ AGENT_STATUS_IDLE_UNFOCUSED_INTERVAL_MS New: AGENT_STATUS_ACTIVE_FOCUSED_INTERVAL_MS, AGENT_STATUS_ACTIVE_UNFOCUSED_INTERVAL_MS dispatch() loads extensionMetadata.getAllSnapshots() once per tick and anchors lastRanAt to the tick start time (instead of clock-after- transcript-build) so eligibility math is exact: tick[k+1]-tick[k] === interval, no sub-ms drift bumping cadence to 2Γ— the configured value. 2. Past tense for completed activities. The prompt now allows past tense ("Wrote tests", "Fixed sidebar bug") when the most recent assistant turn looks complete, in addition to the existing present- tense form for in-progress activities. Helps the sidebar accurately reflect "the agent finished doing X" instead of always using the present continuous, which was misleading for completed work. Tests: new test pins the active intervals; existing focused/unfocused test renamed to clarify it covers the idle path. --- src/constants/agentStatus.ts | 16 +++-- src/node/services/agentStatusService.test.ts | 66 ++++++++++++++++++- src/node/services/agentStatusService.ts | 52 ++++++++++----- src/node/services/workspaceStatusGenerator.ts | 6 +- 4 files changed, 116 insertions(+), 24 deletions(-) diff --git a/src/constants/agentStatus.ts b/src/constants/agentStatus.ts index 8e448ee57e..125658a004 100644 --- a/src/constants/agentStatus.ts +++ b/src/constants/agentStatus.ts @@ -7,11 +7,17 @@ * is byte-for-byte unchanged. */ -/** Per-workspace regen interval when the desktop window is focused. */ -export const AGENT_STATUS_FOCUSED_INTERVAL_MS = 30 * 1000; - -/** Per-workspace regen interval when the desktop window is blurred. */ -export const AGENT_STATUS_UNFOCUSED_INTERVAL_MS = 2 * 60 * 1000; +/** + * Per-workspace regen intervals split four ways: streaming workspaces + * (active) refresh much faster so the user can follow the agent in real + * time; idle workspaces (no active stream) back off because the chat + * isn't moving anyway. Either case backs off further when the desktop + * window is blurred. + */ +export const AGENT_STATUS_ACTIVE_FOCUSED_INTERVAL_MS = 10 * 1000; +export const AGENT_STATUS_ACTIVE_UNFOCUSED_INTERVAL_MS = 30 * 1000; +export const AGENT_STATUS_IDLE_FOCUSED_INTERVAL_MS = 30 * 1000; +export const AGENT_STATUS_IDLE_UNFOCUSED_INTERVAL_MS = 2 * 60 * 1000; /** * How often the scheduler wakes up to scan workspaces. Per-workspace cadence diff --git a/src/node/services/agentStatusService.test.ts b/src/node/services/agentStatusService.test.ts index 941de6adfb..14b6d40f56 100644 --- a/src/node/services/agentStatusService.test.ts +++ b/src/node/services/agentStatusService.test.ts @@ -36,6 +36,9 @@ describe("AgentStatusService", () => { let setSidebarStatusMock: ReturnType< typeof mock<(workspaceId: string, status: unknown) => Promise<{ recency: number }>> >; + let getAllSnapshotsMock: ReturnType< + typeof mock<() => Promise>> + >; let emitWorkspaceActivityMock: ReturnType< typeof mock<(workspaceId: string, snapshot: unknown) => void> >; @@ -101,8 +104,12 @@ describe("AgentStatusService", () => { setSidebarStatusMock = mock((_workspaceId: string, _status: unknown) => Promise.resolve({ recency: 0 }) ); + // Default: no snapshots β†’ no workspaces are streaming β†’ idle intervals. + // Tests that exercise the active intervals override this per-test. + getAllSnapshotsMock = mock(() => Promise.resolve(new Map())); mockExtensionMetadata = { setSidebarStatus: setSidebarStatusMock, + getAllSnapshots: getAllSnapshotsMock, } as unknown as ExtensionMetadataService; mockTokenizer = { @@ -223,7 +230,7 @@ describe("AgentStatusService", () => { expect(setSidebarStatusMock).not.toHaveBeenCalled(); }); - test("focused windows regenerate at the focused interval; unfocused windows wait longer", async () => { + test("idle workspaces regenerate at the idle focused/unfocused intervals", async () => { await historyHandle.historyService.appendToHistory( workspaceId, createMuxMessage("u1", "user", "Hello") @@ -275,6 +282,63 @@ describe("AgentStatusService", () => { expect(generateSpy).toHaveBeenCalledTimes(3); }); + test("streaming workspaces regenerate at the active intervals (10s focused, 30s unfocused)", async () => { + // The user-visible reason this test exists: when an agent is actively + // working, the sidebar status should refresh fast enough that the user + // can follow along (every 10s when watching, every 30s otherwise), + // versus the slower 30s/120s cadence for chats that aren't moving. + await historyHandle.historyService.appendToHistory( + workspaceId, + createMuxMessage("u1", "user", "kick off a long task") + ); + // Mark the workspace as currently streaming so dispatch picks the + // active intervals. + getAllSnapshotsMock.mockImplementation(() => + Promise.resolve(new Map([[workspaceId, { streaming: true }]])) + ); + + let now = 1_000_000; + const service = createService({ clock: () => now }); + const internals = getInternals(service); + + isFocused = true; + await internals.runTick(); + expect(generateSpy).toHaveBeenCalledTimes(1); + + // 5s elapsed: inside the active-focused 10s interval β†’ skip. + now += 5_000; + await historyHandle.historyService.appendToHistory( + workspaceId, + createMuxMessage("a1", "assistant", "step one") + ); + await internals.runTick(); + expect(generateSpy).toHaveBeenCalledTimes(1); + + // 10s elapsed: at the active-focused interval β†’ regenerates. + now += 5_000; + await historyHandle.historyService.appendToHistory( + workspaceId, + createMuxMessage("a2", "assistant", "step two") + ); + await internals.runTick(); + expect(generateSpy).toHaveBeenCalledTimes(2); + + // Unfocused: 10s past last run is inside the 30s active-unfocused + // interval β†’ skip. Only at 30s does it regenerate. + isFocused = false; + now += 10_000; + await historyHandle.historyService.appendToHistory( + workspaceId, + createMuxMessage("a3", "assistant", "step three") + ); + await internals.runTick(); + expect(generateSpy).toHaveBeenCalledTimes(2); + + now += 20_000; + await internals.runTick(); + expect(generateSpy).toHaveBeenCalledTimes(3); + }); + test("round-robins across multiple workspaces so none starve under MAX_CONCURRENT=1", async () => { // With MAX_CONCURRENT=1 and a fixed iteration order, the first workspace // would always become re-eligible before later ones got a turn. The diff --git a/src/node/services/agentStatusService.ts b/src/node/services/agentStatusService.ts index 5e194f500b..538fcc1982 100644 --- a/src/node/services/agentStatusService.ts +++ b/src/node/services/agentStatusService.ts @@ -1,13 +1,15 @@ import { createHash } from "crypto"; import assert from "@/common/utils/assert"; import { - AGENT_STATUS_FOCUSED_INTERVAL_MS, + AGENT_STATUS_ACTIVE_FOCUSED_INTERVAL_MS, + AGENT_STATUS_ACTIVE_UNFOCUSED_INTERVAL_MS, + AGENT_STATUS_IDLE_FOCUSED_INTERVAL_MS, + AGENT_STATUS_IDLE_UNFOCUSED_INTERVAL_MS, AGENT_STATUS_MAX_CONCURRENT, AGENT_STATUS_MAX_MESSAGE_CHARS, AGENT_STATUS_MAX_TRAILING_MESSAGES, AGENT_STATUS_MAX_TRANSCRIPT_TOKENS, AGENT_STATUS_TICK_INTERVAL_MS, - AGENT_STATUS_UNFOCUSED_INTERVAL_MS, } from "@/constants/agentStatus"; import type { Config } from "@/node/config"; import type { MuxMessage } from "@/common/types/message"; @@ -43,9 +45,9 @@ interface State { * Periodic backend job that produces the sidebar's AI-generated agent status * using the same "small model" path as workspace title generation. * - * Cadence: per-workspace eligibility gates each tick. Focused windows - * regenerate at most every AGENT_STATUS_FOCUSED_INTERVAL_MS, blurred windows - * back off to AGENT_STATUS_UNFOCUSED_INTERVAL_MS. + * Cadence: streaming workspaces refresh fast so the user can follow along; + * idle workspaces back off. Both back off further when the desktop window + * is blurred. See ACTIVE_/IDLE_ intervals in @/constants/agentStatus. * * Dedup: each generation hashes its trailing-transcript window. Identical * hash to the last successful run skips regeneration (idle/frozen chats). @@ -103,7 +105,13 @@ export class AgentStatusService { if (this.stopped || this.tickInFlight) return; this.tickInFlight = true; try { - this.dispatch(); + // Anchor lastRanAt below to tick start time. With tick=10s and + // active-focused interval=10s, that makes the eligibility math exact: + // tick[k+1] - tick[k] === interval, so the workspace runs every tick. + // Otherwise sub-ms timer drift can degrade actual cadence to 2Γ— the + // configured interval. + const tickStartedAt = this.clock(); + await this.dispatch(tickStartedAt); // Awaited so production callers and tests observe completion. await Promise.allSettled([...this.inFlightPromises]); } catch (error) { @@ -113,11 +121,11 @@ export class AgentStatusService { } } - private dispatch(): void { - const now = this.clock(); - const interval = this.windowService.isFocused() - ? AGENT_STATUS_FOCUSED_INTERVAL_MS - : AGENT_STATUS_UNFOCUSED_INTERVAL_MS; + private async dispatch(tickStartedAt: number): Promise { + const focused = this.windowService.isFocused(); + // One disk read per tick for streaming state across all workspaces. + // Cheap, and avoids N reads inside the inner loop. + const snapshots = await this.extensionMetadata.getAllSnapshots(); // Sort eligible workspaces by lastRanAt ascending. With MAX_CONCURRENT=1, // a fixed iteration order would let the first workspace starve the rest; @@ -130,7 +138,8 @@ export class AgentStatusService { if (isWorkspaceArchived(ws.archivedAt, ws.unarchivedAt)) continue; const state = this.tracked.get(id); if (state?.inFlight) continue; - if (state && now - state.lastRanAt < interval) continue; + const interval = pickInterval(snapshots.get(id)?.streaming === true, focused); + if (state && tickStartedAt - state.lastRanAt < interval) continue; eligible.push({ id, lastRanAt: state?.lastRanAt ?? 0 }); } } @@ -140,6 +149,9 @@ export class AgentStatusService { if (this.stopped || this.inFlightPromises.size >= AGENT_STATUS_MAX_CONCURRENT) return; const state = this.ensureState(id); state.inFlight = true; + // Set lastRanAt at dispatch time (not after the async transcript + // build) so cadence is anchored to tick boundaries β€” see runTick. + state.lastRanAt = tickStartedAt; const promise = this.runForWorkspace(id).finally(() => { state.inFlight = false; this.inFlightPromises.delete(promise); @@ -152,11 +164,10 @@ export class AgentStatusService { try { const transcript = await this.buildTrailingTranscript(workspaceId); const inputHash = computeInputHash(transcript); - - // Bump lastRanAt regardless of skip/run so the scheduler doesn't - // reconsider this workspace until the next interval boundary. + // dispatch() set lastRanAt to the tick start time before kicking us + // off, so the scheduler already won't reconsider this workspace until + // the next interval boundary regardless of which branch we take below. const state = this.ensureState(workspaceId); - state.lastRanAt = this.clock(); // Empty workspace: nothing to summarize. Don't blank an existing // todoStatus β€” that would clobber a status produced before compaction. @@ -301,3 +312,12 @@ function formatMessageForTranscript(message: MuxMessage): string { function computeInputHash(transcript: string): string { return createHash("sha256").update(transcript).digest("hex"); } + +function pickInterval(streaming: boolean, focused: boolean): number { + if (streaming) { + return focused + ? AGENT_STATUS_ACTIVE_FOCUSED_INTERVAL_MS + : AGENT_STATUS_ACTIVE_UNFOCUSED_INTERVAL_MS; + } + return focused ? AGENT_STATUS_IDLE_FOCUSED_INTERVAL_MS : AGENT_STATUS_IDLE_UNFOCUSED_INTERVAL_MS; +} diff --git a/src/node/services/workspaceStatusGenerator.ts b/src/node/services/workspaceStatusGenerator.ts index 5515734032..ea4d69dcfb 100644 --- a/src/node/services/workspaceStatusGenerator.ts +++ b/src/node/services/workspaceStatusGenerator.ts @@ -45,9 +45,11 @@ export function buildWorkspaceStatusPrompt(transcript: string): string { "Requirements:\n", "- Describe the specific activity the agent was last working on, drawn from the actual transcript content.\n", "- Do NOT use generic placeholders such as 'Awaiting next task', 'Doing work', or 'Idle'. Always name the concrete activity (file, feature, bug, command, etc.).\n", + "- Tense: use present tense if the agent appears to still be in the middle of the activity; use past tense if the most recent assistant turn looks complete (e.g. wrapped up with a summary, no pending tool calls).\n", "- emoji: A single emoji that visually represents the activity.\n", - "- message: 2-6 words, present tense, verb-led, sentence case, no punctuation, no quotes.\n", - '- Examples: "Investigating crash", "Implementing sidebar status", "Running tests", "Reading config files".\n\n', + "- message: 2-6 words, verb-led, sentence case, no punctuation, no quotes.\n", + '- Examples (in progress): "Investigating crash", "Implementing sidebar status", "Running tests", "Reading config files".\n', + '- Examples (completed): "Wrote tests", "Fixed sidebar bug", "Investigated crash", "Refactored config loader".\n\n', "Call propose_status exactly once with your chosen emoji and message. Do not emit any text response.", ].join(""); } From ebfc08062640f83ee76b92f7e9d66b0d91039cd6 Mon Sep 17 00:00:00 2001 From: Ammar Date: Thu, 7 May 2026 11:18:05 -0500 Subject: [PATCH 15/33] fix: reject placeholder status messages post-generation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex caught a salience-bias risk in the previous fix: spelling 'Awaiting next task' in the prompt β€” even as a forbidden example β€” primes small models to copy it verbatim, which is exactly the deployed behavior I was trying to fix. Belt and suspenders: 1. Drop all literal placeholder phrases from the prompt; replace with general guidance that generic non-informative phrasing is rejected. 2. Add a post-generation denylist (PLACEHOLDER_STATUS_MESSAGES) that case-insensitive-exact-matches against a small set of known-bad messages. Rejections skip persist + emit, but advance lastInputHash so we don't re-call the model on the unchanged transcript on the next tick (which would just produce the same placeholder). Match is exact, not substring β€” 'Awaiting user reply' is legitimately informative and contains 'Awaiting'. New test pins the contract: model emits 'Awaiting next task' β†’ nothing reaches the sidebar; same transcript next tick β†’ no provider call; transcript changes β†’ fresh attempt. --- src/node/services/agentStatusService.test.ts | 44 +++++++++++++++++++ src/node/services/agentStatusService.ts | 39 ++++++++++++++++ src/node/services/workspaceStatusGenerator.ts | 2 +- 3 files changed, 84 insertions(+), 1 deletion(-) diff --git a/src/node/services/agentStatusService.test.ts b/src/node/services/agentStatusService.test.ts index 14b6d40f56..d95e34436d 100644 --- a/src/node/services/agentStatusService.test.ts +++ b/src/node/services/agentStatusService.test.ts @@ -496,6 +496,50 @@ describe("AgentStatusService", () => { } }); + test("rejects generic placeholder messages and advances dedup so we don't loop", async () => { + // Codex review: even with the prompt steering away from "Awaiting next + // task" et al., small models can still emit them. We must reject them + // post-generation so they never reach the sidebar β€” and we must NOT + // re-call the model on the same transcript, because we'd just get the + // same placeholder back and burn provider budget. + await historyHandle.historyService.appendToHistory( + workspaceId, + createMuxMessage("u1", "user", "kick off a task") + ); + + generateSpy.mockResolvedValueOnce( + Ok({ + status: { emoji: "πŸ’€", message: "Awaiting next task" }, + modelUsed: "anthropic:claude-haiku-4-5", + }) + ); + + const service = createService(); + await getInternals(service).runForWorkspace(workspaceId); + + // Generator was called, but persist was skipped: the placeholder must + // not reach the sidebar. + expect(generateSpy).toHaveBeenCalledTimes(1); + expect(setSidebarStatusMock).not.toHaveBeenCalled(); + expect(emitWorkspaceActivityMock).not.toHaveBeenCalled(); + + // Same transcript again: dedup must skip β€” we already learned this + // input produces a placeholder, no point retrying until it changes. + await getInternals(service).runForWorkspace(workspaceId); + expect(generateSpy).toHaveBeenCalledTimes(1); + expect(setSidebarStatusMock).not.toHaveBeenCalled(); + + // After a genuine transcript change, we try again with a fresh result. + await historyHandle.historyService.appendToHistory( + workspaceId, + createMuxMessage("u2", "user", "follow-up message") + ); + await getInternals(service).runForWorkspace(workspaceId); + expect(generateSpy).toHaveBeenCalledTimes(2); + expect(setSidebarStatusMock).toHaveBeenCalledTimes(1); + expect(emitWorkspaceActivityMock).toHaveBeenCalledTimes(1); + }); + test("archived workspaces are not regenerated", async () => { projectsConfig = makeProjectsConfig([ makeWorkspaceEntry({ archivedAt: new Date().toISOString() } as Partial), diff --git a/src/node/services/agentStatusService.ts b/src/node/services/agentStatusService.ts index 538fcc1982..015fe83f7f 100644 --- a/src/node/services/agentStatusService.ts +++ b/src/node/services/agentStatusService.ts @@ -195,6 +195,22 @@ export class AgentStatusService { return; } + // Defense in depth: even with a tuned prompt, small models can + // occasionally produce a generic placeholder ("Awaiting next task", + // "Doing work", etc.) that conveys no information. Reject those + // outputs before they reach the sidebar. Advance lastInputHash so we + // don't burn provider budget retrying the same transcript on every + // tick β€” the next genuine transcript change will trigger a fresh + // attempt. + if (isPlaceholderStatus(result.data.status.message)) { + log.debug("AgentStatusService: model produced placeholder status; skipping persist", { + workspaceId, + message: result.data.status.message, + }); + state.lastInputHash = inputHash; + return; + } + // Persist BEFORE updating the in-memory dedup hash. If the disk write // fails we want the next tick to retry against the same transcript // instead of dedup'ing against a hash we never committed. @@ -313,6 +329,29 @@ function computeInputHash(transcript: string): string { return createHash("sha256").update(transcript).digest("hex"); } +/** + * Generic non-informative status messages. Even with the prompt steering + * the model away from these, providers occasionally emit them (especially + * when the transcript is short or paused). We reject them post-generation + * rather than letting them reach the sidebar. + * + * Match is exact + case-insensitive on the trimmed message; we don't + * substring-match because legitimate phrases like "Awaiting user reply" + * contain "Awaiting" and shouldn't be filtered. + */ +const PLACEHOLDER_STATUS_MESSAGES: ReadonlySet = new Set([ + "awaiting next task", + "awaiting input", + "doing work", + "idle", + "working", + "no recent activity", +]); + +function isPlaceholderStatus(message: string): boolean { + return PLACEHOLDER_STATUS_MESSAGES.has(message.trim().toLowerCase()); +} + function pickInterval(streaming: boolean, focused: boolean): number { if (streaming) { return focused diff --git a/src/node/services/workspaceStatusGenerator.ts b/src/node/services/workspaceStatusGenerator.ts index ea4d69dcfb..66aec604f8 100644 --- a/src/node/services/workspaceStatusGenerator.ts +++ b/src/node/services/workspaceStatusGenerator.ts @@ -44,7 +44,7 @@ export function buildWorkspaceStatusPrompt(transcript: string): string { "\n\n\n", "Requirements:\n", "- Describe the specific activity the agent was last working on, drawn from the actual transcript content.\n", - "- Do NOT use generic placeholders such as 'Awaiting next task', 'Doing work', or 'Idle'. Always name the concrete activity (file, feature, bug, command, etc.).\n", + "- Always name a concrete activity (file, feature, bug, command, etc.) from the transcript. Generic non-informative phrasing is rejected and not shown.\n", "- Tense: use present tense if the agent appears to still be in the middle of the activity; use past tense if the most recent assistant turn looks complete (e.g. wrapped up with a summary, no pending tool calls).\n", "- emoji: A single emoji that visually represents the activity.\n", "- message: 2-6 words, verb-led, sentence case, no punctuation, no quotes.\n", From 6ce9581c44fd3642b11e36c6e79b777ea2b67edb Mon Sep 17 00:00:00 2001 From: Ammar Date: Thu, 7 May 2026 11:27:01 -0500 Subject: [PATCH 16/33] fix: revert active-workspace todoStatus precedence to live-first Codex flagged the round 4 fix as a regression: with persisted-first, fresh aggregator todos from a new turn are masked by the previous turn's persisted status until the async setTodoStatus + activity-emit round trip catches up. The two Codex rounds were pulling in opposite directions: - Round 4 P2: prefer persisted (so AI status surfaces for workspaces with todos). - Round 5 P2: prefer live (so fresh todos show immediately, no stale status leaks across turns). Reverting to live-first because: 1. The agent's explicit `todo_write` is a more reliable signal for "what's happening right now" than the AI's transcript-inferred summary. Round 4 was treating these as equivalent; they're not. 2. AI status still surfaces in the common "free-form chat without a todo list" case via the existing fallback chain. 3. The round 5 concern is a real correctness bug (stale status leaks across turns); the round 4 concern is a UX preference about which signal wins when they conflict. 4. With active-workspace cadence at 10s, AI status updates frequently enough that brief shadowing during todo handoffs is mild. Test updated: rather than pinning persisted-wins-with-todos, pin the weaker contract that AI status surfaces when there are no live todos. The existing "derives active workspace status from the current todo list" test already pins the live-wins-with-todos branch. --- src/browser/stores/WorkspaceStore.test.ts | 21 +++++++++++++-------- src/browser/stores/WorkspaceStore.ts | 23 +++++++++++++++++------ 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/src/browser/stores/WorkspaceStore.test.ts b/src/browser/stores/WorkspaceStore.test.ts index 24d26405f6..bb1556564f 100644 --- a/src/browser/stores/WorkspaceStore.test.ts +++ b/src/browser/stores/WorkspaceStore.test.ts @@ -2591,19 +2591,22 @@ describe("WorkspaceStore", () => { expect(state.agentStatus).toEqual({ emoji: "πŸ”„", message: "Run typecheck" }); }); - it("prefers persisted activity todoStatus over live aggregator todos for active workspaces", async () => { - // AgentStatusService writes its AI-generated payload into the same - // `todoStatus` slot. If the active branch always preferred the live - // aggregator derivation, the AI-generated status would never surface - // for any workspace with todos β€” defeating the feature. - const workspaceId = "active-ai-overrides-todos"; + it("falls back to persisted AI status for active workspaces with no live todos", async () => { + // Live aggregator todos are the freshest signal for "what is the + // agent doing right now" because `todo_write` is processed + // synchronously, before the async setTodoStatus + activity-emit round + // trip. So when the workspace has live todos we prefer those (see + // the existing "derives active workspace status from the current todo + // list" test). When there are NO live todos, the AI-generated + // todoStatus from AgentStatusService still has to surface β€” that's + // the common "free-form chat without a todo list" case. + const workspaceId = "active-ai-no-live-todos"; const activitySnapshot: WorkspaceActivitySnapshot = { recency: new Date("2024-01-04T13:00:00.000Z").getTime(), streaming: true, lastModel: "claude-sonnet-4", lastThinkingLevel: null, todoStatus: { emoji: "πŸ› οΈ", message: "AI-generated summary" }, - hasTodos: true, }; mockActivityList.mockResolvedValue({ [workspaceId]: activitySnapshot }); @@ -2611,7 +2614,9 @@ describe("WorkspaceStore", () => { await tick(0); createAndAddWorkspace(store, workspaceId); - seedPinnedTodos(store, workspaceId, [{ content: "Run typecheck", status: "in_progress" }]); + // Intentionally no seedPinnedTodos β€” the aggregator has no todos, so + // the live derivation returns undefined and the persisted AI status + // must surface through the fallback chain. const state = store.getWorkspaceState(workspaceId); expect(state.agentStatus).toEqual(activitySnapshot.todoStatus ?? undefined); diff --git a/src/browser/stores/WorkspaceStore.ts b/src/browser/stores/WorkspaceStore.ts index b581853d63..5b130783f5 100644 --- a/src/browser/stores/WorkspaceStore.ts +++ b/src/browser/stores/WorkspaceStore.ts @@ -1756,13 +1756,24 @@ export class WorkspaceStore { const displayStatus = useAggregatorState ? undefined : (activity?.displayStatus ?? undefined); const fallbackAgentStatus = useAggregatorState ? aggregator.getAgentStatus() : undefined; const transientStatus = displayStatus ?? fallbackAgentStatus; - // Persistent sidebar status. The activity snapshot's `todoStatus` is the - // canonical "last write wins" slot β€” both AgentStatusService and the - // todo-derivation path write to it. Prefer it in both branches, falling - // back to a live aggregator derivation only when the snapshot has no - // entry yet (brand-new workspaces before the first persist). + // Persistent sidebar status. Both AgentStatusService (small-model + // summary) and the todo-derivation path write to the same `todoStatus` + // slot β€” but they have different freshness semantics, so the precedence + // differs by branch: + // + // - Active workspaces: the live aggregator owns the freshest todo + // state (it sees `todo_write` events synchronously, before the async + // persist + activity-emit round-trip). So we prefer the live + // derivation. AI status falls through when the workspace has no + // live todos β€” that's the common "free-form chat without a todo + // list" case where the AI summary is most valuable. + // + // - Inactive workspaces: no aggregator, so the persisted snapshot is + // the only signal. `hasTodos === false` blocks fallback derivation + // so a freshly cleared todo list doesn't briefly resurrect the + // stale aggregator-derived status. const todoStatus = useAggregatorState - ? (activity?.todoStatus ?? deriveTodoStatus(aggregatorTodos) ?? undefined) + ? (deriveTodoStatus(aggregatorTodos) ?? activity?.todoStatus ?? undefined) : (activity?.todoStatus ?? (activity?.hasTodos === false ? undefined : deriveTodoStatus(aggregatorTodos))); const agentStatus = transientStatus ?? todoStatus; From 343e5862a91b30ff681efa658681fa526fe1ef25 Mon Sep 17 00:00:00 2001 From: Ammar Date: Thu, 7 May 2026 11:44:49 -0500 Subject: [PATCH 17/33] fix: split sidebar status precedence into 4 tiers (live todo > status_set) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex round 6: my refactor in commit 2264113ab ("keep startup safe and preserve transient status precedence") inadvertently elevated `aggregator.getAgentStatus()` above `todoStatus` in the active branch. Main's original precedence had it at the bottom: agentStatus = displayStatus ?? todoStatus ?? fallbackAgentStatus I introduced an over-broad `transientStatus` tier to satisfy an earlier Codex round (5) that wanted heartbeat/idle-compaction to win over the new aiStatus path. But `aggregator.getAgentStatus()` is a *blend* of two distinct signals β€” genuinely transient muxMeta.displayStatus (heartbeat etc.) and persisted `status_set` tool results β€” so elevating the whole thing also elevated `status_set`, masking fresh `todo_write` updates. Split the active-branch precedence into four tiers so each signal wins exactly when it should: 1. displayStatus β€” inactive workspace's transient from disk 2. liveTodoStatus β€” active workspace's freshest todo_write 3. fallbackAgentStatus β€” active workspace's heartbeat/status_set 4. persistedTodoStatus β€” disk snapshot (AI summary or stale todo) This satisfies all the Codex rounds simultaneously: - Round 5: heartbeat/idle-compaction (in fallbackAgentStatus) still beats persisted aiStatus (in persistedTodoStatus). βœ“ - Round 6: live todo_write beats stale aggregator getAgentStatus(). βœ“ - Round 4 "AI status surfaces with no live todos": persistedTodoStatus still surfaces as the lowest fallback. βœ“ New test pins live-todo-beats-aggregator-getAgentStatus by spying on the aggregator method and asserting the live derivation wins. --- src/browser/stores/WorkspaceStore.test.ts | 25 +++++++++- src/browser/stores/WorkspaceStore.ts | 58 ++++++++++++----------- 2 files changed, 54 insertions(+), 29 deletions(-) diff --git a/src/browser/stores/WorkspaceStore.test.ts b/src/browser/stores/WorkspaceStore.test.ts index bb1556564f..ef98bee413 100644 --- a/src/browser/stores/WorkspaceStore.test.ts +++ b/src/browser/stores/WorkspaceStore.test.ts @@ -1,4 +1,4 @@ -import { describe, expect, it, beforeEach, afterEach, mock, type Mock } from "bun:test"; +import { describe, expect, it, beforeEach, afterEach, mock, spyOn, type Mock } from "bun:test"; import type { DisplayedMessage } from "@/common/types/message"; import type { FrontendWorkspaceMetadata } from "@/common/types/workspace"; import type { StreamStartEvent, ToolCallStartEvent } from "@/common/types/stream"; @@ -2591,6 +2591,29 @@ describe("WorkspaceStore", () => { expect(state.agentStatus).toEqual({ emoji: "πŸ”„", message: "Run typecheck" }); }); + it("live todo derivation wins over aggregator getAgentStatus (status_set/heartbeat) for active workspaces", () => { + // Codex round 6: aggregator.getAgentStatus() conflates status_set and + // muxMeta.displayStatus into one field. A status_set value persisted + // from a previous turn could mask a fresh todo_write in the current + // turn. Live todo must win. + const workspaceId = "active-live-todo-beats-aggregator-status"; + createAndAddWorkspace(store, workspaceId); + seedPinnedTodos(store, workspaceId, [{ content: "Run typecheck", status: "in_progress" }]); + + // Simulate an aggregator that has a non-empty getAgentStatus() + // (e.g. an old status_set from a previous turn). The new precedence + // must ignore it because the live todo derivation is fresher. + const aggregator = store.getAggregator(workspaceId); + if (!aggregator) throw new Error("expected aggregator"); + spyOn(aggregator, "getAgentStatus").mockReturnValue({ + emoji: "πŸ”", + message: "Investigating crash", + }); + + const state = store.getWorkspaceState(workspaceId); + expect(state.agentStatus).toEqual({ emoji: "πŸ”„", message: "Run typecheck" }); + }); + it("falls back to persisted AI status for active workspaces with no live todos", async () => { // Live aggregator todos are the freshest signal for "what is the // agent doing right now" because `todo_write` is processed diff --git a/src/browser/stores/WorkspaceStore.ts b/src/browser/stores/WorkspaceStore.ts index 5b130783f5..7d4186f1ae 100644 --- a/src/browser/stores/WorkspaceStore.ts +++ b/src/browser/stores/WorkspaceStore.ts @@ -1745,38 +1745,40 @@ export class WorkspaceStore { !transient.caughtUp && !hasRunningInitMessage; const aggregatorTodos = aggregator.getCurrentTodos(); - // `displayStatus` is the explicit transient status for an *inactive* - // workspace (read from the activity snapshot). For an *active* workspace, - // the equivalent signal is the aggregator's `getAgentStatus()` β€” - // StreamingMessageAggregator hydrates that value from - // `muxMetadata.displayStatus` for heartbeat / idle-compaction / background - // turns. We collapse them into a single `transientStatus` so the - // precedence works the same way for both branches and never lets a stale - // todoStatus mask an explicit system-set message. + // Sidebar status precedence, split into four tiers so each signal + // wins exactly when it should. Active and inactive workspaces draw + // from different sources but resolve through the same priority. + // + // 1. displayStatus (inactive only): system-driven transient status + // from disk, e.g. "Compacting idle workspace…". Always wins. + // 2. liveTodoStatus (active only): the agent's most recent + // `todo_write`, processed synchronously by the aggregator. + // Beats the aggregator's persisted status_set value because + // todo_write is the freshest explicit signal; beats persisted + // todoStatus because the live aggregator state is ahead of + // the async setTodoStatus + activity-emit round-trip. + // 3. fallbackAgentStatus (active only): aggregator.getAgentStatus() + // β€” a blend of heartbeat / idle-compaction / background-turn + // `displayStatus` events (genuinely transient) and the agent's + // own `status_set` tool result (a pinned high-level intent). + // Wins over persisted todoStatus so an AI-generated summary + // doesn't mask an explicit system or agent-set message. + // 4. persistedTodoStatus: activity.todoStatus from disk. Either + // a stale todo derivation or an AgentStatusService AI summary β€” + // both writers target the same slot, last write wins. The + // lowest tier so a newer in-memory signal always preempts. + // For inactive workspaces, `hasTodos === false` blocks the + // legacy aggregator-derive fallback so a freshly cleared todo + // list doesn't briefly resurrect the stale derivation. const displayStatus = useAggregatorState ? undefined : (activity?.displayStatus ?? undefined); + const liveTodoStatus = useAggregatorState ? deriveTodoStatus(aggregatorTodos) : undefined; const fallbackAgentStatus = useAggregatorState ? aggregator.getAgentStatus() : undefined; - const transientStatus = displayStatus ?? fallbackAgentStatus; - // Persistent sidebar status. Both AgentStatusService (small-model - // summary) and the todo-derivation path write to the same `todoStatus` - // slot β€” but they have different freshness semantics, so the precedence - // differs by branch: - // - // - Active workspaces: the live aggregator owns the freshest todo - // state (it sees `todo_write` events synchronously, before the async - // persist + activity-emit round-trip). So we prefer the live - // derivation. AI status falls through when the workspace has no - // live todos β€” that's the common "free-form chat without a todo - // list" case where the AI summary is most valuable. - // - // - Inactive workspaces: no aggregator, so the persisted snapshot is - // the only signal. `hasTodos === false` blocks fallback derivation - // so a freshly cleared todo list doesn't briefly resurrect the - // stale aggregator-derived status. - const todoStatus = useAggregatorState - ? (deriveTodoStatus(aggregatorTodos) ?? activity?.todoStatus ?? undefined) + const persistedTodoStatus = useAggregatorState + ? (activity?.todoStatus ?? undefined) : (activity?.todoStatus ?? (activity?.hasTodos === false ? undefined : deriveTodoStatus(aggregatorTodos))); - const agentStatus = transientStatus ?? todoStatus; + const agentStatus = + displayStatus ?? liveTodoStatus ?? fallbackAgentStatus ?? persistedTodoStatus; return { name: metadata?.name ?? workspaceId, // Fall back to ID if metadata missing From 8f4b0b2d8bad12ec3f6cbbb233e9e137a13bf13d Mon Sep 17 00:00:00 2001 From: Ammar Date: Thu, 7 May 2026 11:52:46 -0500 Subject: [PATCH 18/33] fix: constrain status emojis to the rendered icon set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex P3: the sidebar renders status emojis through EmojiIcon, which maps a fixed set of glyphs to Lucide icons. Emojis outside the map fall back to a generic Sparkles icon β€” visually identical regardless of activity, defeating the point of having an emoji at all. The example I'd been seeding (πŸ› οΈ) was itself unmapped. Two complementary fixes: 1. Add πŸ›  (hammer-and-wrench) to the EmojiIcon map. Small models reach for it constantly when describing 'fixing/building' work, and it pairs naturally with the existing πŸ”§ β†’ Wrench mapping. 2. Constrain the prompt to a curated list of emojis that we know render correctly: πŸ” πŸ“ βœ… ❌ πŸš€ ⏳ πŸ”— πŸ”„ πŸ§ͺ πŸ€” πŸ”§ πŸ›  πŸ”” 🌐 πŸ“– πŸ“¦ πŸ’€ πŸ’‘ ⚠. Each gets a one-word semantic hint so the model picks the closest fit. The model can technically still emit something else, but the explicit menu makes the deviation rare and the placeholder denylist + EmojiIcon Sparkles fallback handle the stragglers gracefully. --- src/browser/components/icons/EmojiIcon/EmojiIcon.tsx | 4 ++++ src/node/services/workspaceStatusGenerator.ts | 6 +++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/browser/components/icons/EmojiIcon/EmojiIcon.tsx b/src/browser/components/icons/EmojiIcon/EmojiIcon.tsx index b9da8229ea..7acbee1ffa 100644 --- a/src/browser/components/icons/EmojiIcon/EmojiIcon.tsx +++ b/src/browser/components/icons/EmojiIcon/EmojiIcon.tsx @@ -62,6 +62,10 @@ const EMOJI_TO_ICON: Record = { // Tool-ish / app-ish "πŸ”§": Wrench, + // πŸ›  (hammer-and-wrench) is what small models pick most often for + // generic "fixing / building" sidebar status, so we map it to Wrench + // alongside πŸ”§ to avoid the Sparkles fallback. + "πŸ› ": Wrench, "πŸ””": Bell, "🌐": Globe, "πŸ“–": BookOpen, diff --git a/src/node/services/workspaceStatusGenerator.ts b/src/node/services/workspaceStatusGenerator.ts index 66aec604f8..ae3226ce4b 100644 --- a/src/node/services/workspaceStatusGenerator.ts +++ b/src/node/services/workspaceStatusGenerator.ts @@ -46,7 +46,11 @@ export function buildWorkspaceStatusPrompt(transcript: string): string { "- Describe the specific activity the agent was last working on, drawn from the actual transcript content.\n", "- Always name a concrete activity (file, feature, bug, command, etc.) from the transcript. Generic non-informative phrasing is rejected and not shown.\n", "- Tense: use present tense if the agent appears to still be in the middle of the activity; use past tense if the most recent assistant turn looks complete (e.g. wrapped up with a summary, no pending tool calls).\n", - "- emoji: A single emoji that visually represents the activity.\n", + // The sidebar renders the emoji through EmojiIcon, which maps a fixed + // set of glyphs to Lucide icons. Emojis outside this set fall back to + // a generic Sparkles icon, which looks identical regardless of the + // activity. Restrict the model to glyphs we know render correctly. + "- emoji: must be exactly one of: πŸ” πŸ“ βœ… ❌ πŸš€ ⏳ πŸ”— πŸ”„ πŸ§ͺ πŸ€” πŸ”§ πŸ›  πŸ”” 🌐 πŸ“– πŸ“¦ πŸ’€ πŸ’‘ ⚠. Pick the one that best matches the activity (πŸ” investigating, πŸ“ writing, βœ… done/completed, ❌ failed, πŸš€ deploying/launching, ⏳ waiting, πŸ”„ refreshing/iterating, πŸ§ͺ testing, πŸ€” deciding, πŸ”§ πŸ›  fixing/building, 🌐 network/web, πŸ“– reading docs, πŸ“¦ packaging, πŸ’€ idle, πŸ’‘ planning, ⚠ warning).\n", "- message: 2-6 words, verb-led, sentence case, no punctuation, no quotes.\n", '- Examples (in progress): "Investigating crash", "Implementing sidebar status", "Running tests", "Reading config files".\n', '- Examples (completed): "Wrote tests", "Fixed sidebar bug", "Investigated crash", "Refactored config loader".\n\n', From b08e639631c735e618f2be7755d1de54dc21c1a8 Mon Sep 17 00:00:00 2001 From: Ammar Date: Thu, 7 May 2026 12:01:02 -0500 Subject: [PATCH 19/33] fix: don't clobber AI status on stream-stop with no todos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex caught a real bug: AgentStatusService writes its AI-generated summary into the same `todoStatus` slot that `setTodoStatus` uses. The stream-stop path used to read an empty todo list and pass `todoStatus: null` to setStreaming, which deletes the slot. So every free-form (no-todo) turn would wipe the AI summary that had just been generated during the stream β€” exactly the case the AI summary is most valuable for. Fix: in updateStreamingStatus, leave `todoStatus` undefined when deriveTodoStatus(todos) returns nothing. setStreaming's guard (`update.todoStatus !== undefined`) then skips touching the slot. Explicit clears still work via setTodoStatus(null) when the agent calls `todo_write([])`. Updated two existing tests that asserted the old behavior. New regression test pins the contract: the setStreaming call payload must NOT contain a `todoStatus` property when there are no todos. --- src/node/services/workspaceService.test.ts | 61 +++++++++++++++++++++- src/node/services/workspaceService.ts | 8 ++- 2 files changed, 67 insertions(+), 2 deletions(-) diff --git a/src/node/services/workspaceService.test.ts b/src/node/services/workspaceService.test.ts index 4d4084ad4e..48f6e35471 100644 --- a/src/node/services/workspaceService.test.ts +++ b/src/node/services/workspaceService.test.ts @@ -1638,8 +1638,66 @@ describe("WorkspaceService idle compaction dispatch", () => { await internals.updateStreamingStatus(workspaceId, false); expect(internals.idleCompactingWorkspaces.has(workspaceId)).toBe(false); + // todoStatus is intentionally NOT passed when there are no todos β€” + // passing null would delete an AgentStatusService-written AI summary + // from the same slot. Explicit clears happen via setTodoStatus. expect(setStreaming).toHaveBeenCalledWith(workspaceId, false, { hasTodos: false, + }); + }); + + test("stream-stop with no todos does NOT clear todoStatus (preserves AI summary)", async () => { + // Codex: AgentStatusService writes its AI-generated summary into the + // same `todoStatus` slot that `setTodoStatus` uses. The stream-stop + // path used to read an empty todo list and pass `todoStatus: null`, + // which deleted the slot β€” wiping a summary that was just generated + // during the stream. Free-form chats (no todos) hit this every turn. + const workspaceId = "stream-stop-preserves-ai-status"; + const snapshot = { + recency: Date.now(), + streaming: false, + lastModel: "claude-sonnet-4", + lastThinkingLevel: null, + }; + const setStreaming = mock(() => Promise.resolve(snapshot)); + const emitWorkspaceActivity = mock( + (_workspaceId: string, _snapshot: typeof snapshot) => undefined + ); + + ( + workspaceService as unknown as { + extensionMetadata: ExtensionMetadataService; + emitWorkspaceActivity: typeof emitWorkspaceActivity; + } + ).extensionMetadata = { setStreaming } as unknown as ExtensionMetadataService; + ( + workspaceService as unknown as { + extensionMetadata: ExtensionMetadataService; + emitWorkspaceActivity: typeof emitWorkspaceActivity; + } + ).emitWorkspaceActivity = emitWorkspaceActivity; + + const internals = workspaceService as unknown as { + updateStreamingStatus: ( + workspaceId: string, + streaming: boolean, + options?: ExtensionMetadataStreamingUpdate + ) => Promise; + }; + + await internals.updateStreamingStatus(workspaceId, false); + + // The setStreaming call must omit `todoStatus` entirely. If it included + // `todoStatus: null`, ExtensionMetadataService.setStreaming would delete + // the slot (see the `update.todoStatus !== undefined` branch there). + expect(setStreaming).toHaveBeenCalledTimes(1); + expect(setStreaming).toHaveBeenCalledWith(workspaceId, false, { hasTodos: false }); + // Defensive double-check that the assertion is strict β€” toHaveBeenCalledWith + // with an object literal in some matchers tolerates extra fields. Use + // `not` against an explicit `todoStatus: null` payload to lock the + // contract. + expect(setStreaming).not.toHaveBeenCalledWith(workspaceId, false, { + hasTodos: false, todoStatus: null, }); }); @@ -3546,9 +3604,10 @@ describe("WorkspaceService metadata listeners", () => { await new Promise((resolve) => setTimeout(resolve, 0)); expect(setStreaming).toHaveBeenCalledTimes(1); + // todoStatus is intentionally NOT passed when there are no todos β€” + // see updateStreamingStatus comment for rationale. expect(setStreaming).toHaveBeenCalledWith(workspaceId, false, { hasTodos: false, - todoStatus: null, generation: 0, }); }); diff --git a/src/node/services/workspaceService.ts b/src/node/services/workspaceService.ts index 615eda2a53..46bc0b5da2 100644 --- a/src/node/services/workspaceService.ts +++ b/src/node/services/workspaceService.ts @@ -1619,7 +1619,13 @@ export class WorkspaceService extends EventEmitter { const sessionDir = this.config.getSessionDir(workspaceId); const todos = await readTodosForSessionDir(sessionDir); hasTodos ??= todos.length > 0; - todoStatus ??= deriveTodoStatus(todos) ?? null; + // When there are no todos to derive from, leave `todoStatus` undefined + // so setStreaming doesn't touch the slot. AgentStatusService writes + // its AI-generated summary into the same `todoStatus` field β€” passing + // `null` here would clobber a freshly generated summary every time a + // free-form (no-todo) turn ends. Explicit clears still happen via + // setTodoStatus(null) when the agent calls `todo_write([])`. + todoStatus ??= deriveTodoStatus(todos); } if ( !streaming && From 6539796a4657d723375289d5648d71809bc33cfe Mon Sep 17 00:00:00 2001 From: Ammar Date: Sat, 9 May 2026 15:51:46 -0500 Subject: [PATCH 20/33] fix: clean up language model after each status generation candidate The OpenAI Responses WebSocket transport (added in #3241) attaches a `webSocketTransport.close` cleanup hook to every model returned by `providerModelFactory`. `workspaceTitleGenerator` already drains it via `runLanguageModelCleanup` in its finally block, but the new periodic `AgentStatusService` path was leaking transports for every successful or failed candidate, every tick, every workspace. Mirror the title-generator pattern with a finally block so cleanup runs whether the candidate returns a result, throws, or falls through to the next retry. --- src/node/services/workspaceStatusGenerator.ts | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/node/services/workspaceStatusGenerator.ts b/src/node/services/workspaceStatusGenerator.ts index ae3226ce4b..4ca733cb6b 100644 --- a/src/node/services/workspaceStatusGenerator.ts +++ b/src/node/services/workspaceStatusGenerator.ts @@ -1,6 +1,7 @@ import { streamText, tool } from "ai"; import type { AIService } from "./aiService"; import { log } from "./log"; +import { runLanguageModelCleanup } from "./languageModelCleanup"; import { mapModelCreationError, mapNameGenerationError } from "./workspaceTitleGenerator"; import type { Result } from "@/common/types/result"; import { Ok, Err } from "@/common/types/result"; @@ -125,6 +126,14 @@ export async function generateWorkspaceStatus( error: lastError, }); continue; + } finally { + // Mirror workspaceTitleGenerator: some providers attach cleanup hooks + // to the created model (notably the OpenAI Responses WebSocket + // transport, which attaches webSocketTransport.close). Without this + // call the periodic AgentStatusService loop would leak transports + // for every successful or failed candidate, every tick, every + // workspace. + runLanguageModelCleanup(modelResult.data); } } From d41716ac61df8afa9d672c513b568cd4dba835bb Mon Sep 17 00:00:00 2001 From: Ammar Date: Sat, 9 May 2026 15:59:22 -0500 Subject: [PATCH 21/33] fix: stop AgentStatusService in dispose() too, not just shutdown() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ServiceContainer has two parallel teardown paths: - shutdown() β€” used by graceful flows that cleanly close everything. - dispose() β€” used by the desktop before-quit and ACP in-process close handlers. AgentStatusService.start() schedules a ref'd setInterval, so leaving it running across a dispose() keeps the Node process alive and continues calling generateWorkspaceStatus against services that are about to be torn down below this line. Mirror what shutdown() already does so dispose() also stops the loop before closing the rest of the container. --- src/node/services/serviceContainer.ts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/node/services/serviceContainer.ts b/src/node/services/serviceContainer.ts index 7556595b14..1893c04142 100644 --- a/src/node/services/serviceContainer.ts +++ b/src/node/services/serviceContainer.ts @@ -549,6 +549,13 @@ export class ServiceContainer { await this.desktopBridgeServer.stop(); this.desktopTokenManager.dispose(); await this.desktopSessionManager.closeAll(); + // Stop the periodic AgentStatusService loop here too (not just in + // shutdown()): dispose() is the path used by the desktop before-quit + // and ACP in-process close handlers, and the ref'd setInterval would + // otherwise keep the process alive and continue calling + // generateWorkspaceStatus against services that are about to be torn + // down below. + this.agentStatusService.stop(); await this.browserBridgeServer.stop(); this.browserSessionStateHub.dispose(); this.browserBridgeTokenManager.dispose(); From 28037e322545c096c602c6385c38e1b38a1123b8 Mon Sep 17 00:00:00 2001 From: Ammar Date: Sat, 9 May 2026 16:08:07 -0500 Subject: [PATCH 22/33] fix: dedup failed status generations to stop indefinite retry loops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When generateWorkspaceStatus returns Err (e.g., the chosen model refuses to call propose_status, or hits a persistent provider error after all candidates), the previous code left lastInputHash unchanged. The next focused/idle tick would compute the same hash, miss the dedup check, and fire the generator again β€” burning tokens against a workspace whose chat hasn't moved. Treat 'last attempted' as the dedup key: advance lastInputHash on failure too, so the next genuine transcript change is the natural retry trigger. Mirrors how placeholder rejection already advances the hash. Persist-write failures still leave lastInputHash unchanged (covered by the existing 'failed persistence write does not update the dedup hash' test) so transient I/O blips still get retried. --- src/node/services/agentStatusService.test.ts | 45 +++++++++++++++++++- src/node/services/agentStatusService.ts | 33 +++++++++++--- 2 files changed, 72 insertions(+), 6 deletions(-) diff --git a/src/node/services/agentStatusService.test.ts b/src/node/services/agentStatusService.test.ts index d95e34436d..c85a11aab1 100644 --- a/src/node/services/agentStatusService.test.ts +++ b/src/node/services/agentStatusService.test.ts @@ -3,7 +3,7 @@ import { mkdtempSync, rmSync } from "fs"; import { tmpdir } from "os"; import { join } from "path"; import type { ProjectsConfig, ProjectConfig, Workspace } from "@/common/types/project"; -import { Ok } from "@/common/types/result"; +import { Ok, Err } from "@/common/types/result"; import { createMuxMessage } from "@/common/types/message"; import type { Config } from "@/node/config"; import type { AIService } from "./aiService"; @@ -540,6 +540,49 @@ describe("AgentStatusService", () => { expect(emitWorkspaceActivityMock).toHaveBeenCalledTimes(1); }); + test("failed generation advances dedup so we don't resend the same transcript every tick", async () => { + // Codex review: when status generation fails after the provider call + // (e.g., the chosen model refuses to call propose_status, or hits a + // persistent provider error), leaving lastInputHash unchanged would let + // the scheduler resend the exact same trailing transcript on every + // focused/idle interval, burning tokens against a workspace that is + // stuck. Once we've attempted generation, the only retry signal that + // matters is a real transcript change. + await historyHandle.historyService.appendToHistory( + workspaceId, + createMuxMessage("u1", "user", "kick off a task") + ); + + generateSpy.mockResolvedValueOnce( + Err({ type: "unknown", raw: "model did not call propose_status" }) + ); + + const service = createService(); + await getInternals(service).runForWorkspace(workspaceId); + + // Generator was called and failed; nothing reached the sidebar. + expect(generateSpy).toHaveBeenCalledTimes(1); + expect(setSidebarStatusMock).not.toHaveBeenCalled(); + expect(emitWorkspaceActivityMock).not.toHaveBeenCalled(); + + // Same transcript again: dedup must skip β€” we already learned that this + // input fails, no point retrying until something changes. + await getInternals(service).runForWorkspace(workspaceId); + expect(generateSpy).toHaveBeenCalledTimes(1); + expect(setSidebarStatusMock).not.toHaveBeenCalled(); + + // After a genuine transcript change, we try again. This time the + // generator returns a fresh result and it gets persisted normally. + await historyHandle.historyService.appendToHistory( + workspaceId, + createMuxMessage("u2", "user", "follow-up message") + ); + await getInternals(service).runForWorkspace(workspaceId); + expect(generateSpy).toHaveBeenCalledTimes(2); + expect(setSidebarStatusMock).toHaveBeenCalledTimes(1); + expect(emitWorkspaceActivityMock).toHaveBeenCalledTimes(1); + }); + test("archived workspaces are not regenerated", async () => { projectsConfig = makeProjectsConfig([ makeWorkspaceEntry({ archivedAt: new Date().toISOString() } as Partial), diff --git a/src/node/services/agentStatusService.ts b/src/node/services/agentStatusService.ts index 015fe83f7f..d7941e40fe 100644 --- a/src/node/services/agentStatusService.ts +++ b/src/node/services/agentStatusService.ts @@ -35,7 +35,21 @@ export interface AgentStatusServiceOptions { interface State { /** Last time we ran (or skipped via dedup). 0 if we never ran. */ lastRanAt: number; - /** Hash of the input we last successfully generated for. null if never. */ + /** + * Hash of the input we last *attempted* to generate for β€” covers + * successful persists, post-generation placeholder rejection, and + * (intentionally) candidate failures that reached the provider. + * + * Why "attempted" rather than "successful": if all candidates fail + * (e.g., a configured model repeatedly refuses to call propose_status, + * or a persistent provider error), leaving this unset would let the + * scheduler resend the same trailing transcript every focused/idle + * interval, burning tokens on a workspace that is stuck. Advancing the + * hash on failure means the next genuine transcript change is the + * natural retry trigger, while idle/frozen workspaces stay quiet. + * + * null if we have never attempted on this workspace. + */ lastInputHash: string | null; /** Whether a generation is currently in flight. */ inFlight: boolean; @@ -188,10 +202,19 @@ export class AgentStatusService { // await boundary. if (this.stopped) return; if (!result.success) { - log.debug("AgentStatusService: status generation failed; will retry next tick", { - workspaceId, - error: result.error, - }); + // Advance the dedup hash so we don't resend the same frozen + // transcript every tick when a workspace is stuck on a model that + // consistently fails (refuses propose_status, persistent provider + // error, etc.). The next genuine transcript change will trigger a + // fresh attempt. + log.debug( + "AgentStatusService: status generation failed; deferring until transcript changes", + { + workspaceId, + error: result.error, + } + ); + state.lastInputHash = inputHash; return; } From cfc11210e5e63bbcde54372c7a6c6a3f454c90ba Mon Sep 17 00:00:00 2001 From: Ammar Date: Sat, 9 May 2026 16:17:19 -0500 Subject: [PATCH 23/33] fix: don't cache pre-provider failures in status dedup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If every status-generation candidate fails inside aiService.createModel (no API key, OAuth not connected, provider disabled, model not available, policy denied, etc.) we never actually crossed the wire to a provider. Treating that like a post-provider failure and advancing lastInputHash by transcript hash would freeze the workspace out of AI status until the next chat message β€” even after the user fixes credentials, switches providers, or re-enables a model. Distinguish the two cases by widening the generator's error result to { error, reachedProvider: boolean }: - reachedProvider=true (model refused tool, rate limit, persistent provider error, etc.) β†’ cache by transcript so we don't loop. - reachedProvider=false (auth/config preflight failures) β†’ leave the hash untouched so the next tick retries against the same transcript. Cover the two cases with explicit unit tests so future regressions show up loud. --- src/node/services/agentStatusService.test.ts | 78 ++++++++++++++++--- src/node/services/agentStatusService.ts | 62 +++++++++------ .../services/workspaceStatusGenerator.test.ts | 8 +- src/node/services/workspaceStatusGenerator.ts | 45 +++++++++-- 4 files changed, 151 insertions(+), 42 deletions(-) diff --git a/src/node/services/agentStatusService.test.ts b/src/node/services/agentStatusService.test.ts index c85a11aab1..d7f6d2b9ea 100644 --- a/src/node/services/agentStatusService.test.ts +++ b/src/node/services/agentStatusService.test.ts @@ -540,21 +540,24 @@ describe("AgentStatusService", () => { expect(emitWorkspaceActivityMock).toHaveBeenCalledTimes(1); }); - test("failed generation advances dedup so we don't resend the same transcript every tick", async () => { - // Codex review: when status generation fails after the provider call - // (e.g., the chosen model refuses to call propose_status, or hits a - // persistent provider error), leaving lastInputHash unchanged would let - // the scheduler resend the exact same trailing transcript on every - // focused/idle interval, burning tokens against a workspace that is - // stuck. Once we've attempted generation, the only retry signal that - // matters is a real transcript change. + test("post-provider failure advances dedup so we don't resend the same transcript every tick", async () => { + // Codex review: when status generation fails AFTER reaching the + // provider (e.g., the chosen model refuses to call propose_status, or + // hits a persistent provider error), leaving lastInputHash unchanged + // would let the scheduler resend the exact same trailing transcript on + // every focused/idle interval, burning tokens against a workspace that + // is stuck. Once we've attempted generation against the provider, the + // only retry signal that matters is a real transcript change. await historyHandle.historyService.appendToHistory( workspaceId, createMuxMessage("u1", "user", "kick off a task") ); generateSpy.mockResolvedValueOnce( - Err({ type: "unknown", raw: "model did not call propose_status" }) + Err({ + error: { type: "unknown", raw: "model did not call propose_status" }, + reachedProvider: true, + }) ); const service = createService(); @@ -583,6 +586,63 @@ describe("AgentStatusService", () => { expect(emitWorkspaceActivityMock).toHaveBeenCalledTimes(1); }); + test("pre-provider failure (auth/config) keeps retrying so a later credential fix recovers", async () => { + // Codex review: if the first attempt happens before the user has + // connected OAuth / configured an API key (or while a provider is + // disabled), generateWorkspaceStatus returns an Err whose + // reachedProvider flag is false β€” every candidate failed at + // createModel, never crossed the wire to a provider. Caching that + // failure with the transcript hash would silently freeze the workspace + // out of AI status until the chat advances on its own. Pre-provider + // failures must therefore stay retriable: the next tick must call + // generateWorkspaceStatus again so a later credential/provider fix + // recovers without requiring a new user message. + await historyHandle.historyService.appendToHistory( + workspaceId, + createMuxMessage("u1", "user", "kick off a task") + ); + + generateSpy.mockResolvedValueOnce( + Err({ + error: { + type: "authentication", + authKind: "api_key_missing", + provider: "anthropic", + }, + reachedProvider: false, + }) + ); + + const service = createService(); + await getInternals(service).runForWorkspace(workspaceId); + + expect(generateSpy).toHaveBeenCalledTimes(1); + expect(setSidebarStatusMock).not.toHaveBeenCalled(); + + // Same transcript, no fix yet: must retry. The scheduler still picks + // this workspace up because the dedup hash didn't advance. + generateSpy.mockResolvedValueOnce( + Err({ + error: { + type: "authentication", + authKind: "api_key_missing", + provider: "anthropic", + }, + reachedProvider: false, + }) + ); + await getInternals(service).runForWorkspace(workspaceId); + expect(generateSpy).toHaveBeenCalledTimes(2); + expect(setSidebarStatusMock).not.toHaveBeenCalled(); + + // User fixes credentials β†’ next attempt succeeds against the same + // transcript (no chat change required). + await getInternals(service).runForWorkspace(workspaceId); + expect(generateSpy).toHaveBeenCalledTimes(3); + expect(setSidebarStatusMock).toHaveBeenCalledTimes(1); + expect(emitWorkspaceActivityMock).toHaveBeenCalledTimes(1); + }); + test("archived workspaces are not regenerated", async () => { projectsConfig = makeProjectsConfig([ makeWorkspaceEntry({ archivedAt: new Date().toISOString() } as Partial), diff --git a/src/node/services/agentStatusService.ts b/src/node/services/agentStatusService.ts index d7941e40fe..375e044cef 100644 --- a/src/node/services/agentStatusService.ts +++ b/src/node/services/agentStatusService.ts @@ -36,19 +36,24 @@ interface State { /** Last time we ran (or skipped via dedup). 0 if we never ran. */ lastRanAt: number; /** - * Hash of the input we last *attempted* to generate for β€” covers - * successful persists, post-generation placeholder rejection, and - * (intentionally) candidate failures that reached the provider. + * Hash of the input we last "settled" on β€” i.e. an outcome that depends + * on the *transcript* and shouldn't be retried until the transcript + * changes. That covers: + * - successful persists (Ok result, status written), + * - post-generation placeholder rejection, + * - generation failures that reached the provider (model refused tool, + * rate limit, persistent provider error, etc.). * - * Why "attempted" rather than "successful": if all candidates fail - * (e.g., a configured model repeatedly refuses to call propose_status, - * or a persistent provider error), leaving this unset would let the - * scheduler resend the same trailing transcript every focused/idle - * interval, burning tokens on a workspace that is stuck. Advancing the - * hash on failure means the next genuine transcript change is the - * natural retry trigger, while idle/frozen workspaces stay quiet. + * Pre-provider failures (no API key, OAuth not connected, provider + * disabled, model not available, policy denied β€” anything that fails + * inside createModel before we cross the wire) intentionally do NOT + * advance this hash. Those are properties of the user's *config*, and + * caching them by transcript would freeze a workspace out of AI status + * until a new chat message arrived, even after the user fixed + * credentials. See the `result.error.reachedProvider` branch in + * `runForWorkspace`. * - * null if we have never attempted on this workspace. + * null if we have never settled on a transcript for this workspace. */ lastInputHash: string | null; /** Whether a generation is currently in flight. */ @@ -202,19 +207,28 @@ export class AgentStatusService { // await boundary. if (this.stopped) return; if (!result.success) { - // Advance the dedup hash so we don't resend the same frozen - // transcript every tick when a workspace is stuck on a model that - // consistently fails (refuses propose_status, persistent provider - // error, etc.). The next genuine transcript change will trigger a - // fresh attempt. - log.debug( - "AgentStatusService: status generation failed; deferring until transcript changes", - { - workspaceId, - error: result.error, - } - ); - state.lastInputHash = inputHash; + // Only advance the dedup hash when at least one candidate actually + // reached the provider. If every candidate failed during model + // construction (no API key, OAuth not connected, provider disabled, + // model not available, policy denied, etc.), the failure is about + // the user's *config* rather than the transcript β€” caching it would + // permanently skip this workspace until they happen to send another + // message, even after they fix credentials. Post-provider failures + // (model refused tool, rate limit, persistent provider error) are + // properties of the transcript and should defer until the chat + // changes. + if (result.error.reachedProvider) { + log.debug( + "AgentStatusService: status generation failed at provider; deferring until transcript changes", + { workspaceId, error: result.error.error } + ); + state.lastInputHash = inputHash; + } else { + log.debug( + "AgentStatusService: status generation failed before reaching provider; will retry next tick", + { workspaceId, error: result.error.error } + ); + } return; } diff --git a/src/node/services/workspaceStatusGenerator.test.ts b/src/node/services/workspaceStatusGenerator.test.ts index ffae75f863..f751ddc7e9 100644 --- a/src/node/services/workspaceStatusGenerator.test.ts +++ b/src/node/services/workspaceStatusGenerator.test.ts @@ -40,8 +40,12 @@ describe("generateWorkspaceStatus error paths", () => { const result = await generateWorkspaceStatus("hello", [], fakeAiService); expect(result.success).toBe(false); if (!result.success) { - expect(result.error.type).toBe("unknown"); - expect(result.error.raw).toContain("No model candidates"); + expect(result.error.error.type).toBe("unknown"); + expect(result.error.error.raw).toContain("No model candidates"); + // No candidates means we never even attempted createModel, so the + // failure has nothing to do with the transcript β€” caller must keep + // retrying so a future config change recovers without a new message. + expect(result.error.reachedProvider).toBe(false); } }); }); diff --git a/src/node/services/workspaceStatusGenerator.ts b/src/node/services/workspaceStatusGenerator.ts index 4ca733cb6b..bee1a054be 100644 --- a/src/node/services/workspaceStatusGenerator.ts +++ b/src/node/services/workspaceStatusGenerator.ts @@ -27,6 +27,26 @@ export interface GenerateWorkspaceStatusResult { modelUsed: string; } +export interface GenerateWorkspaceStatusFailure { + error: NameGenerationError; + /** + * True if at least one candidate's `createModel` call succeeded, meaning + * we actually reached the provider with a request. False if every + * candidate failed during model construction (auth not connected, API + * key missing, provider disabled, model not available, policy denied, + * etc.). + * + * The caller uses this to decide whether to advance its dedup hash: + * post-provider failures (model refused tool, rate limit, network blip, + * persistent provider error) are properties of the *transcript* and + * should defer until the chat changes. Pre-provider failures are + * properties of the user's *config* and must remain retriable so a + * later credential/provider fix recovers without requiring a transcript + * change first. + */ + reachedProvider: boolean; +} + /** * Build the prompt used by {@link generateWorkspaceStatus}. The transcript * is supplied pre-trimmed (token budget enforced upstream). The prompt @@ -68,16 +88,25 @@ export async function generateWorkspaceStatus( transcript: string, candidates: readonly string[], aiService: AIService -): Promise> { +): Promise> { if (candidates.length === 0) { return Err({ - type: "unknown", - raw: "No model candidates provided for workspace status generation", + error: { + type: "unknown", + raw: "No model candidates provided for workspace status generation", + }, + reachedProvider: false, }); } const maxAttempts = Math.min(candidates.length, 3); let lastError: NameGenerationError | null = null; + // Track whether any candidate's createModel call succeeded β€” i.e., whether + // we actually crossed the wire to a provider. If every attempt fails at + // construction (no API key, OAuth not connected, provider disabled, etc.), + // the failure is about the user's config rather than the transcript and + // the caller must keep retrying so a later fix recovers. + let reachedProvider = false; for (let i = 0; i < maxAttempts; i++) { const modelString = candidates[i]; @@ -90,6 +119,7 @@ export async function generateWorkspaceStatus( log.debug(`Status generation: skipping ${modelString} (${modelResult.error.type})`); continue; } + reachedProvider = true; try { const currentStream = streamText({ @@ -137,10 +167,11 @@ export async function generateWorkspaceStatus( } } - return Err( - lastError ?? { + return Err({ + error: lastError ?? { type: "configuration", raw: "No working model candidates were available for workspace status generation.", - } - ); + }, + reachedProvider, + }); } From 2c3743c430c0cdc17958a53f1a0a1e1b1abdaf89 Mon Sep 17 00:00:00 2001 From: Ammar Date: Sat, 9 May 2026 16:36:17 -0500 Subject: [PATCH 24/33] fix: refresh sidebar status after user message pivots --- src/node/services/agentStatusService.test.ts | 57 +++++++++++++++++++- src/node/services/agentStatusService.ts | 53 ++++++++++++++---- 2 files changed, 98 insertions(+), 12 deletions(-) diff --git a/src/node/services/agentStatusService.test.ts b/src/node/services/agentStatusService.test.ts index d7f6d2b9ea..0ddbe420bf 100644 --- a/src/node/services/agentStatusService.test.ts +++ b/src/node/services/agentStatusService.test.ts @@ -20,6 +20,11 @@ interface AgentStatusServiceInternals { runForWorkspace(workspaceId: string): Promise; } +interface ActivitySnapshotForTest { + streaming: boolean; + recency?: number; +} + describe("AgentStatusService", () => { const workspaceId = "ws-test"; const projectPath = "/test/project"; @@ -37,7 +42,7 @@ describe("AgentStatusService", () => { typeof mock<(workspaceId: string, status: unknown) => Promise<{ recency: number }>> >; let getAllSnapshotsMock: ReturnType< - typeof mock<() => Promise>> + typeof mock<() => Promise>> >; let emitWorkspaceActivityMock: ReturnType< typeof mock<(workspaceId: string, snapshot: unknown) => void> @@ -106,7 +111,7 @@ describe("AgentStatusService", () => { ); // Default: no snapshots β†’ no workspaces are streaming β†’ idle intervals. // Tests that exercise the active intervals override this per-test. - getAllSnapshotsMock = mock(() => Promise.resolve(new Map())); + getAllSnapshotsMock = mock(() => Promise.resolve(new Map())); mockExtensionMetadata = { setSidebarStatus: setSidebarStatusMock, getAllSnapshots: getAllSnapshotsMock, @@ -282,6 +287,54 @@ describe("AgentStatusService", () => { expect(generateSpy).toHaveBeenCalledTimes(3); }); + test("a user message recency bump bypasses the idle cadence so stale pre-pivot status refreshes", async () => { + // User rationale: a chat message is often a real pivot to the task at + // hand. If we wait for the normal idle cadence, the sidebar can keep + // showing the old pre-pivot status after the user has clearly changed + // direction. + await historyHandle.historyService.appendToHistory( + workspaceId, + createMuxMessage("u1", "user", "Initial request") + ); + + let recency = 100; + getAllSnapshotsMock.mockImplementation(() => + Promise.resolve( + new Map([[workspaceId, { streaming: false, recency }]]) + ) + ); + + let now = 1_000_000; + const service = createService({ clock: () => now }); + const internals = getInternals(service); + + await internals.runTick(); + expect(generateSpy).toHaveBeenCalledTimes(1); + + now += 5_000; + recency = 200; + await historyHandle.historyService.appendToHistory( + workspaceId, + createMuxMessage("u2", "user", "Pivot to new task") + ); + await internals.runTick(); + + // Still inside the 30s idle-focused interval, but the user-recency bump + // resets the clock so we regenerate against the pivot immediately. + expect(generateSpy).toHaveBeenCalledTimes(2); + expect(generateSpy.mock.calls[1][0]).toContain("User: Pivot to new task"); + + now += 5_000; + await historyHandle.historyService.appendToHistory( + workspaceId, + createMuxMessage("a1", "assistant", "Acknowledged") + ); + await internals.runTick(); + + // Non-user transcript changes still obey cadence when recency is stable. + expect(generateSpy).toHaveBeenCalledTimes(2); + }); + test("streaming workspaces regenerate at the active intervals (10s focused, 30s unfocused)", async () => { // The user-visible reason this test exists: when an agent is actively // working, the sidebar status should refresh fast enough that the user diff --git a/src/node/services/agentStatusService.ts b/src/node/services/agentStatusService.ts index 375e044cef..645f835340 100644 --- a/src/node/services/agentStatusService.ts +++ b/src/node/services/agentStatusService.ts @@ -56,6 +56,13 @@ interface State { * null if we have never settled on a transcript for this workspace. */ lastInputHash: string | null; + /** + * Recency timestamp observed the last time the scheduler considered this + * workspace. User messages update recency, so an increased value is a + * strong signal that the old sidebar status may now be stale even if the + * normal idle/active cadence has not elapsed yet. + */ + lastObservedRecency: number | null; /** Whether a generation is currently in flight. */ inFlight: boolean; } @@ -69,7 +76,7 @@ interface State { * is blurred. See ACTIVE_/IDLE_ intervals in @/constants/agentStatus. * * Dedup: each generation hashes its trailing-transcript window. Identical - * hash to the last successful run skips regeneration (idle/frozen chats). + * hash to the last settled run skips regeneration (idle/frozen chats). * * Concurrency: bounded by AGENT_STATUS_MAX_CONCURRENT so a multi-workspace * sweep never spikes provider load. @@ -149,7 +156,12 @@ export class AgentStatusService { // Sort eligible workspaces by lastRanAt ascending. With MAX_CONCURRENT=1, // a fixed iteration order would let the first workspace starve the rest; // least-recently-run gives fair round-robin without an explicit queue. - const eligible: Array<{ id: string; lastRanAt: number }> = []; + const eligible: Array<{ + id: string; + lastRanAt: number; + recency: number | null; + recencyAdvanced: boolean; + }> = []; for (const [, projectConfig] of this.config.loadConfigOrDefault().projects) { for (const ws of projectConfig.workspaces) { const id = ws.id ?? ws.name; @@ -157,20 +169,32 @@ export class AgentStatusService { if (isWorkspaceArchived(ws.archivedAt, ws.unarchivedAt)) continue; const state = this.tracked.get(id); if (state?.inFlight) continue; - const interval = pickInterval(snapshots.get(id)?.streaming === true, focused); - if (state && tickStartedAt - state.lastRanAt < interval) continue; - eligible.push({ id, lastRanAt: state?.lastRanAt ?? 0 }); + const snapshot = snapshots.get(id); + const recency = typeof snapshot?.recency === "number" ? snapshot.recency : null; + const recencyAdvanced = hasRecencyAdvanced(state, recency); + const interval = pickInterval(snapshot?.streaming === true, focused); + if (state && !recencyAdvanced && tickStartedAt - state.lastRanAt < interval) continue; + eligible.push({ id, lastRanAt: state?.lastRanAt ?? 0, recency, recencyAdvanced }); } } - eligible.sort((a, b) => a.lastRanAt - b.lastRanAt); + eligible.sort((a, b) => { + if (a.recencyAdvanced !== b.recencyAdvanced) { + // A user message is usually a task pivot. Put those workspaces ahead + // of ordinary cadence refreshes so stale pre-pivot statuses don't + // linger behind background idle work. + return a.recencyAdvanced ? -1 : 1; + } + return a.lastRanAt - b.lastRanAt; + }); - for (const { id } of eligible) { + for (const { id, recency } of eligible) { if (this.stopped || this.inFlightPromises.size >= AGENT_STATUS_MAX_CONCURRENT) return; const state = this.ensureState(id); state.inFlight = true; // Set lastRanAt at dispatch time (not after the async transcript // build) so cadence is anchored to tick boundaries β€” see runTick. state.lastRanAt = tickStartedAt; + state.lastObservedRecency = recency; const promise = this.runForWorkspace(id).finally(() => { state.inFlight = false; this.inFlightPromises.delete(promise); @@ -184,8 +208,9 @@ export class AgentStatusService { const transcript = await this.buildTrailingTranscript(workspaceId); const inputHash = computeInputHash(transcript); // dispatch() set lastRanAt to the tick start time before kicking us - // off, so the scheduler already won't reconsider this workspace until - // the next interval boundary regardless of which branch we take below. + // off, so the scheduler won't reconsider this workspace until the next + // interval boundary unless a newer user-recency timestamp indicates the + // chat pivoted again. const state = this.ensureState(workspaceId); // Empty workspace: nothing to summarize. Don't blank an existing @@ -276,7 +301,7 @@ export class AgentStatusService { private ensureState(id: string): State { let state = this.tracked.get(id); if (!state) { - state = { lastRanAt: 0, lastInputHash: null, inFlight: false }; + state = { lastRanAt: 0, lastInputHash: null, lastObservedRecency: null, inFlight: false }; this.tracked.set(id, state); } return state; @@ -389,6 +414,14 @@ function isPlaceholderStatus(message: string): boolean { return PLACEHOLDER_STATUS_MESSAGES.has(message.trim().toLowerCase()); } +function hasRecencyAdvanced(state: State | undefined, recency: number | null): boolean { + return ( + state !== undefined && + recency !== null && + (state.lastObservedRecency === null || recency > state.lastObservedRecency) + ); +} + function pickInterval(streaming: boolean, focused: boolean): number { if (streaming) { return focused From 32a90729c5540fbc68d123973b0d0fd10cda3244 Mon Sep 17 00:00:00 2001 From: Ammar Date: Sat, 9 May 2026 16:44:34 -0500 Subject: [PATCH 25/33] fix: avoid consuming recency before transcript updates --- src/node/services/agentStatusService.test.ts | 41 ++++++++++++++++++++ src/node/services/agentStatusService.ts | 23 +++++++++-- 2 files changed, 60 insertions(+), 4 deletions(-) diff --git a/src/node/services/agentStatusService.test.ts b/src/node/services/agentStatusService.test.ts index 0ddbe420bf..304ad171c3 100644 --- a/src/node/services/agentStatusService.test.ts +++ b/src/node/services/agentStatusService.test.ts @@ -335,6 +335,47 @@ describe("AgentStatusService", () => { expect(generateSpy).toHaveBeenCalledTimes(2); }); + test("does not consume a user recency bump until the pivot message reaches history", async () => { + await historyHandle.historyService.appendToHistory( + workspaceId, + createMuxMessage("u1", "user", "Initial request") + ); + + let recency = 100; + getAllSnapshotsMock.mockImplementation(() => + Promise.resolve( + new Map([[workspaceId, { streaming: false, recency }]]) + ) + ); + + let now = 1_000_000; + const service = createService({ clock: () => now }); + const internals = getInternals(service); + + await internals.runTick(); + expect(generateSpy).toHaveBeenCalledTimes(1); + + // sendMessage updates workspace recency before the user message is + // durably appended to history. A scheduler tick in that gap sees the + // recency bump but the old transcript hash; it must leave the bump + // unconsumed so the next tick can still bypass cadence once history + // catches up. + now += 5_000; + recency = 200; + await internals.runTick(); + expect(generateSpy).toHaveBeenCalledTimes(1); + + await historyHandle.historyService.appendToHistory( + workspaceId, + createMuxMessage("u2", "user", "Pivot after recency") + ); + now += 5_000; + await internals.runTick(); + + expect(generateSpy).toHaveBeenCalledTimes(2); + expect(generateSpy.mock.calls[1][0]).toContain("User: Pivot after recency"); + }); + test("streaming workspaces regenerate at the active intervals (10s focused, 30s unfocused)", async () => { // The user-visible reason this test exists: when an agent is actively // working, the sidebar status should refresh fast enough that the user diff --git a/src/node/services/agentStatusService.ts b/src/node/services/agentStatusService.ts index 645f835340..b04d33323b 100644 --- a/src/node/services/agentStatusService.ts +++ b/src/node/services/agentStatusService.ts @@ -194,8 +194,7 @@ export class AgentStatusService { // Set lastRanAt at dispatch time (not after the async transcript // build) so cadence is anchored to tick boundaries β€” see runTick. state.lastRanAt = tickStartedAt; - state.lastObservedRecency = recency; - const promise = this.runForWorkspace(id).finally(() => { + const promise = this.runForWorkspace(id, recency).finally(() => { state.inFlight = false; this.inFlightPromises.delete(promise); }); @@ -203,7 +202,10 @@ export class AgentStatusService { } } - private async runForWorkspace(workspaceId: string): Promise { + private async runForWorkspace( + workspaceId: string, + observedRecency: number | null = null + ): Promise { try { const transcript = await this.buildTrailingTranscript(workspaceId); const inputHash = computeInputHash(transcript); @@ -213,10 +215,20 @@ export class AgentStatusService { // chat pivoted again. const state = this.ensureState(workspaceId); + const markRecencyObserved = () => { + if (observedRecency !== null) { + state.lastObservedRecency = observedRecency; + } + }; + // Empty workspace: nothing to summarize. Don't blank an existing // todoStatus β€” that would clobber a status produced before compaction. if (transcript.trim().length === 0) return; - // Idle/frozen: identical trailing window since last successful run. + // Idle/frozen: identical trailing window since last settled run. Do not + // consume observedRecency here: WorkspaceService can bump recency before + // the user message is durably in history, and consuming it against the + // old hash would reintroduce stale pre-pivot statuses until cadence + // expires. if (state.lastInputHash === inputHash) return; const candidates = await this.workspaceService.getWorkspaceTitleModelCandidates(workspaceId); @@ -247,6 +259,7 @@ export class AgentStatusService { "AgentStatusService: status generation failed at provider; deferring until transcript changes", { workspaceId, error: result.error.error } ); + markRecencyObserved(); state.lastInputHash = inputHash; } else { log.debug( @@ -269,6 +282,7 @@ export class AgentStatusService { workspaceId, message: result.data.status.message, }); + markRecencyObserved(); state.lastInputHash = inputHash; return; } @@ -282,6 +296,7 @@ export class AgentStatusService { result.data.status ); if (this.stopped) return; + markRecencyObserved(); state.lastInputHash = inputHash; this.workspaceService.emitWorkspaceActivity(workspaceId, snapshot); } catch (error) { From fd88f3f4a71f6b200a65a7d16f78d47e4f4ae97a Mon Sep 17 00:00:00 2001 From: Ammar Date: Sat, 9 May 2026 16:52:50 -0500 Subject: [PATCH 26/33] fix: defer first status refresh during recency races --- src/node/services/agentStatusService.test.ts | 35 ++++++++++++++++++++ src/node/services/agentStatusService.ts | 34 +++++++++++++++++++ 2 files changed, 69 insertions(+) diff --git a/src/node/services/agentStatusService.test.ts b/src/node/services/agentStatusService.test.ts index 304ad171c3..7246f47c12 100644 --- a/src/node/services/agentStatusService.test.ts +++ b/src/node/services/agentStatusService.test.ts @@ -376,6 +376,41 @@ describe("AgentStatusService", () => { expect(generateSpy.mock.calls[1][0]).toContain("User: Pivot after recency"); }); + test("defers a first recent recency bump so startup cannot settle on stale pre-pivot history", async () => { + await historyHandle.historyService.appendToHistory( + workspaceId, + createMuxMessage("u1", "user", "Old request before restart") + ); + + let now = 1_000_000; + const recency = now - 1_000; + getAllSnapshotsMock.mockImplementation(() => + Promise.resolve( + new Map([[workspaceId, { streaming: false, recency }]]) + ) + ); + + const service = createService({ clock: () => now }); + const internals = getInternals(service); + + // After a restart the in-memory hash baseline is empty. If this tick is + // racing with sendMessage's recency update, generating now would settle + // on old history and consume the pivot signal before the user message is + // appended. Defer one tick instead. + await internals.runTick(); + expect(generateSpy).not.toHaveBeenCalled(); + + await historyHandle.historyService.appendToHistory( + workspaceId, + createMuxMessage("u2", "user", "Pivot after restart") + ); + now += 10_000; + await internals.runTick(); + + expect(generateSpy).toHaveBeenCalledTimes(1); + expect(generateSpy.mock.calls[0][0]).toContain("User: Pivot after restart"); + }); + test("streaming workspaces regenerate at the active intervals (10s focused, 30s unfocused)", async () => { // The user-visible reason this test exists: when an agent is actively // working, the sidebar status should refresh fast enough that the user diff --git a/src/node/services/agentStatusService.ts b/src/node/services/agentStatusService.ts index b04d33323b..c3b4096105 100644 --- a/src/node/services/agentStatusService.ts +++ b/src/node/services/agentStatusService.ts @@ -215,6 +215,26 @@ export class AgentStatusService { // chat pivoted again. const state = this.ensureState(workspaceId); + if ( + shouldWaitForFirstRecentRecency( + state, + observedRecency, + this.clock(), + AGENT_STATUS_TICK_INTERVAL_MS + ) + ) { + // We may be seeing WorkspaceService's recency update before the + // corresponding user message is appended to history. With no settled + // hash baseline after startup, generating now could persist a stale + // pre-pivot status and consume the only recency signal. Wait one + // scheduler interval so the history write can catch up. + log.debug("AgentStatusService: waiting for recent recency bump to reach history", { + workspaceId, + observedRecency, + }); + return; + } + const markRecencyObserved = () => { if (observedRecency !== null) { state.lastObservedRecency = observedRecency; @@ -429,6 +449,20 @@ function isPlaceholderStatus(message: string): boolean { return PLACEHOLDER_STATUS_MESSAGES.has(message.trim().toLowerCase()); } +function shouldWaitForFirstRecentRecency( + state: State, + observedRecency: number | null, + now: number, + tickIntervalMs: number +): boolean { + return ( + state.lastInputHash === null && + state.lastObservedRecency === null && + observedRecency !== null && + now - observedRecency < tickIntervalMs + ); +} + function hasRecencyAdvanced(state: State | undefined, recency: number | null): boolean { return ( state !== undefined && From 40d38dd213a9c3eb8472f8a690239a87c91ca531 Mon Sep 17 00:00:00 2001 From: Ammar Date: Sat, 9 May 2026 16:58:18 -0500 Subject: [PATCH 27/33] fix: consume recency for empty status transcripts --- src/node/services/agentStatusService.test.ts | 32 ++++++++++++++++++++ src/node/services/agentStatusService.ts | 20 +++++++----- 2 files changed, 45 insertions(+), 7 deletions(-) diff --git a/src/node/services/agentStatusService.test.ts b/src/node/services/agentStatusService.test.ts index 7246f47c12..20375e7e46 100644 --- a/src/node/services/agentStatusService.test.ts +++ b/src/node/services/agentStatusService.test.ts @@ -235,6 +235,38 @@ describe("AgentStatusService", () => { expect(setSidebarStatusMock).not.toHaveBeenCalled(); }); + test("empty workspaces consume observed recency so they do not starve populated workspaces", async () => { + const emptyWorkspaceId = "ws-empty"; + projectsConfig = makeProjectsConfig([ + makeWorkspaceEntry({ id: emptyWorkspaceId, name: emptyWorkspaceId } as Partial), + makeWorkspaceEntry(), + ]); + await historyHandle.historyService.appendToHistory( + workspaceId, + createMuxMessage("u1", "user", "Populated workspace") + ); + getAllSnapshotsMock.mockImplementation(() => + Promise.resolve( + new Map([ + [emptyWorkspaceId, { streaming: false, recency: 100 }], + ]) + ) + ); + + let now = 1_000_000; + const service = createService({ clock: () => now }); + const internals = getInternals(service); + + await internals.runTick(); + expect(generateSpy).not.toHaveBeenCalled(); + + now += 10_000; + await internals.runTick(); + + expect(generateSpy).toHaveBeenCalledTimes(1); + expect(generateSpy.mock.calls[0][0]).toContain("User: Populated workspace"); + }); + test("idle workspaces regenerate at the idle focused/unfocused intervals", async () => { await historyHandle.historyService.appendToHistory( workspaceId, diff --git a/src/node/services/agentStatusService.ts b/src/node/services/agentStatusService.ts index c3b4096105..41211df022 100644 --- a/src/node/services/agentStatusService.ts +++ b/src/node/services/agentStatusService.ts @@ -215,6 +215,12 @@ export class AgentStatusService { // chat pivoted again. const state = this.ensureState(workspaceId); + const markRecencyObserved = () => { + if (observedRecency !== null) { + state.lastObservedRecency = observedRecency; + } + }; + if ( shouldWaitForFirstRecentRecency( state, @@ -235,15 +241,15 @@ export class AgentStatusService { return; } - const markRecencyObserved = () => { - if (observedRecency !== null) { - state.lastObservedRecency = observedRecency; - } - }; - // Empty workspace: nothing to summarize. Don't blank an existing // todoStatus β€” that would clobber a status produced before compaction. - if (transcript.trim().length === 0) return; + // Still consume non-racy recency so an empty workspace doesn't sort as + // "recency advanced" forever and starve other workspaces under the + // single-concurrency scheduler. + if (transcript.trim().length === 0) { + markRecencyObserved(); + return; + } // Idle/frozen: identical trailing window since last settled run. Do not // consume observedRecency here: WorkspaceService can bump recency before // the user message is durably in history, and consuming it against the From bb21f74090cfeadb035ce15232d55705b862e092 Mon Sep 17 00:00:00 2001 From: Ammar Date: Sat, 9 May 2026 17:04:17 -0500 Subject: [PATCH 28/33] fix: consume recency priority on status config failures --- src/node/services/agentStatusService.test.ts | 50 ++++++++++++++++++++ src/node/services/agentStatusService.ts | 7 ++- 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/src/node/services/agentStatusService.test.ts b/src/node/services/agentStatusService.test.ts index 20375e7e46..44535079ef 100644 --- a/src/node/services/agentStatusService.test.ts +++ b/src/node/services/agentStatusService.test.ts @@ -804,6 +804,56 @@ describe("AgentStatusService", () => { expect(emitWorkspaceActivityMock).toHaveBeenCalledTimes(1); }); + test("pre-provider failures consume recency priority without advancing transcript dedup", async () => { + const misconfiguredWorkspaceId = "ws-misconfigured"; + projectsConfig = makeProjectsConfig([ + makeWorkspaceEntry({ + id: misconfiguredWorkspaceId, + name: misconfiguredWorkspaceId, + } as Partial), + makeWorkspaceEntry(), + ]); + await historyHandle.historyService.appendToHistory( + misconfiguredWorkspaceId, + createMuxMessage("u-bad", "user", "Misconfigured workspace") + ); + await historyHandle.historyService.appendToHistory( + workspaceId, + createMuxMessage("u-good", "user", "Healthy workspace") + ); + getAllSnapshotsMock.mockImplementation(() => + Promise.resolve( + new Map([ + [misconfiguredWorkspaceId, { streaming: false, recency: 100 }], + ]) + ) + ); + generateSpy.mockResolvedValueOnce( + Err({ + error: { + type: "authentication", + authKind: "api_key_missing", + provider: "anthropic", + }, + reachedProvider: false, + }) + ); + + let now = 1_000_000; + const service = createService({ clock: () => now }); + const internals = getInternals(service); + + await internals.runTick(); + expect(generateSpy).toHaveBeenCalledTimes(1); + expect(generateSpy.mock.calls[0][0]).toContain("User: Misconfigured workspace"); + + now += 10_000; + await internals.runTick(); + + expect(generateSpy).toHaveBeenCalledTimes(2); + expect(generateSpy.mock.calls[1][0]).toContain("User: Healthy workspace"); + }); + test("archived workspaces are not regenerated", async () => { projectsConfig = makeProjectsConfig([ makeWorkspaceEntry({ archivedAt: new Date().toISOString() } as Partial), diff --git a/src/node/services/agentStatusService.ts b/src/node/services/agentStatusService.ts index 41211df022..9a3a3fee2d 100644 --- a/src/node/services/agentStatusService.ts +++ b/src/node/services/agentStatusService.ts @@ -289,9 +289,14 @@ export class AgentStatusService { state.lastInputHash = inputHash; } else { log.debug( - "AgentStatusService: status generation failed before reaching provider; will retry next tick", + "AgentStatusService: status generation failed before reaching provider; will retry on cadence", { workspaceId, error: result.error.error } ); + // Consume recency without advancing lastInputHash: credential/config + // fixes should still retry the same transcript, but a misconfigured + // workspace must not retain permanent recency-advanced priority and + // starve other workspaces under max concurrency 1. + markRecencyObserved(); } return; } From 37a9dc9a7202e42482b3650a8b0112f1e65beeb9 Mon Sep 17 00:00:00 2001 From: Ammar Date: Sat, 9 May 2026 17:10:16 -0500 Subject: [PATCH 29/33] fix: track seen transcripts for recency catchup --- src/node/services/agentStatusService.test.ts | 45 ++++++++++++++++++++ src/node/services/agentStatusService.ts | 36 ++++++++++++---- 2 files changed, 72 insertions(+), 9 deletions(-) diff --git a/src/node/services/agentStatusService.test.ts b/src/node/services/agentStatusService.test.ts index 44535079ef..64fbe1a282 100644 --- a/src/node/services/agentStatusService.test.ts +++ b/src/node/services/agentStatusService.test.ts @@ -854,6 +854,51 @@ describe("AgentStatusService", () => { expect(generateSpy.mock.calls[1][0]).toContain("User: Healthy workspace"); }); + test("pre-provider retry state does not consume a recency bump before history catches up", async () => { + await historyHandle.historyService.appendToHistory( + workspaceId, + createMuxMessage("u1", "user", "Old misconfigured request") + ); + let recency = 100; + getAllSnapshotsMock.mockImplementation(() => + Promise.resolve( + new Map([[workspaceId, { streaming: false, recency }]]) + ) + ); + generateSpy.mockResolvedValueOnce( + Err({ + error: { + type: "authentication", + authKind: "api_key_missing", + provider: "anthropic", + }, + reachedProvider: false, + }) + ); + + let now = 1_000_000; + const service = createService({ clock: () => now }); + const internals = getInternals(service); + + await internals.runTick(); + expect(generateSpy).toHaveBeenCalledTimes(1); + + now += 5_000; + recency = now; + await internals.runTick(); + expect(generateSpy).toHaveBeenCalledTimes(1); + + await historyHandle.historyService.appendToHistory( + workspaceId, + createMuxMessage("u2", "user", "Pivot after config failure") + ); + now += 10_000; + await internals.runTick(); + + expect(generateSpy).toHaveBeenCalledTimes(2); + expect(generateSpy.mock.calls[1][0]).toContain("User: Pivot after config failure"); + }); + test("archived workspaces are not regenerated", async () => { projectsConfig = makeProjectsConfig([ makeWorkspaceEntry({ archivedAt: new Date().toISOString() } as Partial), diff --git a/src/node/services/agentStatusService.ts b/src/node/services/agentStatusService.ts index 9a3a3fee2d..5467d63a2d 100644 --- a/src/node/services/agentStatusService.ts +++ b/src/node/services/agentStatusService.ts @@ -56,6 +56,13 @@ interface State { * null if we have never settled on a transcript for this workspace. */ lastInputHash: string | null; + /** + * Hash of the transcript the scheduler last examined, even if that input + * did not settle into a sidebar status (for example, a pre-provider config + * failure). Used to avoid consuming a recency bump while history is still + * catching up to the user message that caused it. + */ + lastSeenInputHash: string | null; /** * Recency timestamp observed the last time the scheduler considered this * workspace. User messages update recency, so an increased value is a @@ -222,16 +229,19 @@ export class AgentStatusService { }; if ( - shouldWaitForFirstRecentRecency( + isRecentRecencyAheadOfHistory( state, + inputHash, observedRecency, this.clock(), AGENT_STATUS_TICK_INTERVAL_MS ) ) { + state.lastSeenInputHash = inputHash; // We may be seeing WorkspaceService's recency update before the - // corresponding user message is appended to history. With no settled - // hash baseline after startup, generating now could persist a stale + // corresponding user message is appended to history. If the transcript + // is unchanged from the last one we examined (or we have no baseline + // immediately after startup), generating now could persist a stale // pre-pivot status and consume the only recency signal. Wait one // scheduler interval so the history write can catch up. log.debug("AgentStatusService: waiting for recent recency bump to reach history", { @@ -240,6 +250,7 @@ export class AgentStatusService { }); return; } + state.lastSeenInputHash = inputHash; // Empty workspace: nothing to summarize. Don't blank an existing // todoStatus β€” that would clobber a status produced before compaction. @@ -347,7 +358,13 @@ export class AgentStatusService { private ensureState(id: string): State { let state = this.tracked.get(id); if (!state) { - state = { lastRanAt: 0, lastInputHash: null, lastObservedRecency: null, inFlight: false }; + state = { + lastRanAt: 0, + lastInputHash: null, + lastSeenInputHash: null, + lastObservedRecency: null, + inFlight: false, + }; this.tracked.set(id, state); } return state; @@ -460,17 +477,18 @@ function isPlaceholderStatus(message: string): boolean { return PLACEHOLDER_STATUS_MESSAGES.has(message.trim().toLowerCase()); } -function shouldWaitForFirstRecentRecency( +function isRecentRecencyAheadOfHistory( state: State, + inputHash: string, observedRecency: number | null, now: number, - tickIntervalMs: number + historyCatchupWindowMs: number ): boolean { return ( - state.lastInputHash === null && - state.lastObservedRecency === null && + hasRecencyAdvanced(state, observedRecency) && + (state.lastSeenInputHash === null || state.lastSeenInputHash === inputHash) && observedRecency !== null && - now - observedRecency < tickIntervalMs + now - observedRecency < historyCatchupWindowMs ); } From be1bf725a0a25af73db3cc1ec2647c9c4fdff643 Mon Sep 17 00:00:00 2001 From: Ammar Date: Sat, 9 May 2026 17:18:37 -0500 Subject: [PATCH 30/33] fix: consume stale recency on status dedup skips --- src/node/services/agentStatusService.test.ts | 50 +++++++++++++++++++- src/node/services/agentStatusService.ts | 14 +++--- 2 files changed, 56 insertions(+), 8 deletions(-) diff --git a/src/node/services/agentStatusService.test.ts b/src/node/services/agentStatusService.test.ts index 64fbe1a282..df1ff78cdc 100644 --- a/src/node/services/agentStatusService.test.ts +++ b/src/node/services/agentStatusService.test.ts @@ -393,7 +393,7 @@ describe("AgentStatusService", () => { // unconsumed so the next tick can still bypass cadence once history // catches up. now += 5_000; - recency = 200; + recency = now; await internals.runTick(); expect(generateSpy).toHaveBeenCalledTimes(1); @@ -401,7 +401,7 @@ describe("AgentStatusService", () => { workspaceId, createMuxMessage("u2", "user", "Pivot after recency") ); - now += 5_000; + now += 10_000; await internals.runTick(); expect(generateSpy).toHaveBeenCalledTimes(2); @@ -443,6 +443,52 @@ describe("AgentStatusService", () => { expect(generateSpy.mock.calls[0][0]).toContain("User: Pivot after restart"); }); + test("dedup skips consume stale recency priority after the history catchup window", async () => { + const staleWorkspaceId = "ws-stale-recency"; + projectsConfig = makeProjectsConfig([ + makeWorkspaceEntry({ id: staleWorkspaceId, name: staleWorkspaceId } as Partial), + makeWorkspaceEntry(), + ]); + await historyHandle.historyService.appendToHistory( + staleWorkspaceId, + createMuxMessage("u-stale", "user", "Already summarized") + ); + await historyHandle.historyService.appendToHistory( + workspaceId, + createMuxMessage("u-good", "user", "Waiting behind stale recency") + ); + + let now = 1_000_000; + let recency = 100; + getAllSnapshotsMock.mockImplementation(() => + Promise.resolve( + new Map([ + [staleWorkspaceId, { streaming: false, recency }], + ]) + ) + ); + const service = createService({ clock: () => now }); + const internals = getInternals(service); + + await internals.runTick(); + expect(generateSpy).toHaveBeenCalledTimes(1); + expect(generateSpy.mock.calls[0][0]).toContain("User: Already summarized"); + + now += 5_000; + recency = now; + await internals.runTick(); + expect(generateSpy).toHaveBeenCalledTimes(1); + + now += 10_000; + await internals.runTick(); + expect(generateSpy).toHaveBeenCalledTimes(1); + + now += 10_000; + await internals.runTick(); + expect(generateSpy).toHaveBeenCalledTimes(2); + expect(generateSpy.mock.calls[1][0]).toContain("User: Waiting behind stale recency"); + }); + test("streaming workspaces regenerate at the active intervals (10s focused, 30s unfocused)", async () => { // The user-visible reason this test exists: when an agent is actively // working, the sidebar status should refresh fast enough that the user diff --git a/src/node/services/agentStatusService.ts b/src/node/services/agentStatusService.ts index 5467d63a2d..4c4ffbdac1 100644 --- a/src/node/services/agentStatusService.ts +++ b/src/node/services/agentStatusService.ts @@ -261,12 +261,14 @@ export class AgentStatusService { markRecencyObserved(); return; } - // Idle/frozen: identical trailing window since last settled run. Do not - // consume observedRecency here: WorkspaceService can bump recency before - // the user message is durably in history, and consuming it against the - // old hash would reintroduce stale pre-pivot statuses until cadence - // expires. - if (state.lastInputHash === inputHash) return; + // Idle/frozen: identical trailing window since last settled run. The + // recent race path above already handles recency that may be ahead of + // history, so any recency reaching this dedup branch is stale/non-racy: + // consume it to avoid permanent recency-advanced priority. + if (state.lastInputHash === inputHash) { + markRecencyObserved(); + return; + } const candidates = await this.workspaceService.getWorkspaceTitleModelCandidates(workspaceId); if (candidates.length === 0) return; From 07e2ef0c88cea91a25f6091370ea85a1609adba2 Mon Sep 17 00:00:00 2001 From: Ammar Date: Sat, 9 May 2026 17:25:08 -0500 Subject: [PATCH 31/33] fix: run initial sidebar status sweep on start --- src/node/services/agentStatusService.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/node/services/agentStatusService.ts b/src/node/services/agentStatusService.ts index 4c4ffbdac1..ca32c70ed8 100644 --- a/src/node/services/agentStatusService.ts +++ b/src/node/services/agentStatusService.ts @@ -116,9 +116,10 @@ export class AgentStatusService { assert(this.checkInterval === null, "AgentStatusService.start() called while already running"); this.stopped = false; // No startup delay: AGENT_STATUS_MAX_CONCURRENT=1 already serializes - // generation across workspaces, so the first tick can fire immediately - // without risking a thundering herd at launch. + // generation across workspaces, so an immediate first tick won't create a + // thundering herd at launch. this.checkInterval = setInterval(() => void this.runTick(), this.tickIntervalMs); + void this.runTick(); log.info("AgentStatusService started", { tickIntervalMs: this.tickIntervalMs }); } From c8f8d35d8068ac4a697aa502f89775d9c467e89a Mon Sep 17 00:00:00 2001 From: Ammar Date: Sat, 9 May 2026 17:32:55 -0500 Subject: [PATCH 32/33] fix: discard stale status when recency advances in flight --- src/node/services/agentStatusService.test.ts | 54 +++++++++++++++++++- src/node/services/agentStatusService.ts | 22 ++++++++ 2 files changed, 75 insertions(+), 1 deletion(-) diff --git a/src/node/services/agentStatusService.test.ts b/src/node/services/agentStatusService.test.ts index df1ff78cdc..bfdd70556a 100644 --- a/src/node/services/agentStatusService.test.ts +++ b/src/node/services/agentStatusService.test.ts @@ -17,7 +17,7 @@ import { createTestHistoryService } from "./testHistoryService"; interface AgentStatusServiceInternals { runTick(): Promise; - runForWorkspace(workspaceId: string): Promise; + runForWorkspace(workspaceId: string, observedRecency?: number | null): Promise; } interface ActivitySnapshotForTest { @@ -44,6 +44,9 @@ describe("AgentStatusService", () => { let getAllSnapshotsMock: ReturnType< typeof mock<() => Promise>> >; + let getSnapshotMock: ReturnType< + typeof mock<(workspaceId: string) => Promise<{ recency: number } | null>> + >; let emitWorkspaceActivityMock: ReturnType< typeof mock<(workspaceId: string, snapshot: unknown) => void> >; @@ -112,9 +115,11 @@ describe("AgentStatusService", () => { // Default: no snapshots β†’ no workspaces are streaming β†’ idle intervals. // Tests that exercise the active intervals override this per-test. getAllSnapshotsMock = mock(() => Promise.resolve(new Map())); + getSnapshotMock = mock((_workspaceId: string) => Promise.resolve(null)); mockExtensionMetadata = { setSidebarStatus: setSidebarStatusMock, getAllSnapshots: getAllSnapshotsMock, + getSnapshot: getSnapshotMock, } as unknown as ExtensionMetadataService; mockTokenizer = { @@ -657,6 +662,53 @@ describe("AgentStatusService", () => { expect(emitWorkspaceActivityMock).not.toHaveBeenCalled(); }); + test("drops a generated status if workspace recency advances while provider call is in flight", async () => { + await historyHandle.historyService.appendToHistory( + workspaceId, + createMuxMessage("u1", "user", "Old task") + ); + + let recency = 100; + getAllSnapshotsMock.mockImplementation(() => + Promise.resolve( + new Map([[workspaceId, { streaming: false, recency }]]) + ) + ); + getSnapshotMock.mockImplementation(() => Promise.resolve({ recency })); + + let signalStarted!: () => void; + const startedSignal = new Promise((resolve) => { + signalStarted = resolve; + }); + let releaseGenerate!: () => void; + const gate = new Promise((resolve) => { + releaseGenerate = resolve; + }); + generateSpy.mockImplementationOnce(async () => { + signalStarted(); + await gate; + return Ok({ + status: { emoji: "πŸ› οΈ", message: "Summarizing old task" }, + modelUsed: "anthropic:claude-haiku-4-5", + }); + }); + + const service = createService(); + const inFlight = getInternals(service).runForWorkspace(workspaceId, recency); + await startedSignal; + + // A user message can advance recency while the provider is still working + // on the old transcript. The old result must not be written after that + // pivot, or the sidebar can resurrect stale pre-pivot status. + recency = 200; + releaseGenerate(); + await inFlight; + + expect(generateSpy).toHaveBeenCalledTimes(1); + expect(setSidebarStatusMock).not.toHaveBeenCalled(); + expect(emitWorkspaceActivityMock).not.toHaveBeenCalled(); + }); + test("a failed persistence write does not update the dedup hash, so the next tick retries", async () => { // Only update lastInputHash AFTER a successful persist. Otherwise a // transient I/O failure would leave us dedup'ing against a hash that diff --git a/src/node/services/agentStatusService.ts b/src/node/services/agentStatusService.ts index ca32c70ed8..cc4d6c8bc7 100644 --- a/src/node/services/agentStatusService.ts +++ b/src/node/services/agentStatusService.ts @@ -283,6 +283,16 @@ export class AgentStatusService { // Re-check after the generator returns: the same hazard at a later // await boundary. if (this.stopped) return; + if (await this.hasNewerRecency(workspaceId, observedRecency)) { + // A user turn landed while the provider was generating against the + // previous transcript. Drop the old result so it cannot resurrect a + // pre-pivot sidebar status after the user message cleared/changed it. + log.debug("AgentStatusService: dropping generated status after newer recency", { + workspaceId, + observedRecency, + }); + return; + } if (!result.success) { // Only advance the dedup hash when at least one candidate actually // reached the provider. If every candidate failed during model @@ -358,6 +368,18 @@ export class AgentStatusService { } } + private async hasNewerRecency( + workspaceId: string, + observedRecency: number | null + ): Promise { + const snapshot = await this.extensionMetadata.getSnapshot(workspaceId); + const currentRecency = snapshot?.recency; + return ( + typeof currentRecency === "number" && + (observedRecency === null || currentRecency > observedRecency) + ); + } + private ensureState(id: string): State { let state = this.tracked.get(id); if (!state) { From 5c13f054aacf5b88257d6668df261a606f22fcdd Mon Sep 17 00:00:00 2001 From: Ammar Date: Sat, 9 May 2026 17:39:53 -0500 Subject: [PATCH 33/33] fix: atomically skip stale sidebar status writes --- src/node/services/ExtensionMetadataService.ts | 13 +++++- src/node/services/agentStatusService.test.ts | 41 +++++++++++++++++-- src/node/services/agentStatusService.ts | 37 +++++++---------- 3 files changed, 62 insertions(+), 29 deletions(-) diff --git a/src/node/services/ExtensionMetadataService.ts b/src/node/services/ExtensionMetadataService.ts index 6714ba9b95..cd1272e8e1 100644 --- a/src/node/services/ExtensionMetadataService.ts +++ b/src/node/services/ExtensionMetadataService.ts @@ -259,8 +259,9 @@ export class ExtensionMetadataService { */ async setSidebarStatus( workspaceId: string, - status: ExtensionAgentStatus | null - ): Promise { + status: ExtensionAgentStatus | null, + options: { skipIfRecencyAdvancedSince?: number | null } = {} + ): Promise { return this.withSerializedMutation(async () => { const data = await this.load(); const existing = coerceExtensionMetadata(data.workspaces[workspaceId]); @@ -273,6 +274,14 @@ export class ExtensionMetadataService { displayStatus: null, lastStatusUrl: null, }; + if ( + options.skipIfRecencyAdvancedSince !== undefined && + existing && + (options.skipIfRecencyAdvancedSince === null || + existing.recency > options.skipIfRecencyAdvancedSince) + ) { + return null; + } if (status) { workspace.todoStatus = status; } else { diff --git a/src/node/services/agentStatusService.test.ts b/src/node/services/agentStatusService.test.ts index bfdd70556a..133ebb7b9e 100644 --- a/src/node/services/agentStatusService.test.ts +++ b/src/node/services/agentStatusService.test.ts @@ -39,7 +39,13 @@ describe("AgentStatusService", () => { let windowService: WindowService; let isFocused = true; let setSidebarStatusMock: ReturnType< - typeof mock<(workspaceId: string, status: unknown) => Promise<{ recency: number }>> + typeof mock< + ( + workspaceId: string, + status: unknown, + options?: { skipIfRecencyAdvancedSince?: number | null } + ) => Promise<{ recency: number } | null> + > >; let getAllSnapshotsMock: ReturnType< typeof mock<() => Promise>> @@ -109,7 +115,7 @@ describe("AgentStatusService", () => { emitWorkspaceActivity: emitWorkspaceActivityMock, } as unknown as WorkspaceService; - setSidebarStatusMock = mock((_workspaceId: string, _status: unknown) => + setSidebarStatusMock = mock((_workspaceId: string, _status: unknown, _options?: unknown) => Promise.resolve({ recency: 0 }) ); // Default: no snapshots β†’ no workspaces are streaming β†’ idle intervals. @@ -674,7 +680,13 @@ describe("AgentStatusService", () => { new Map([[workspaceId, { streaming: false, recency }]]) ) ); - getSnapshotMock.mockImplementation(() => Promise.resolve({ recency })); + setSidebarStatusMock.mockImplementation((_workspaceId, _status, options) => + Promise.resolve( + options?.skipIfRecencyAdvancedSince != null && recency > options.skipIfRecencyAdvancedSince + ? null + : { recency } + ) + ); let signalStarted!: () => void; const startedSignal = new Promise((resolve) => { @@ -705,7 +717,8 @@ describe("AgentStatusService", () => { await inFlight; expect(generateSpy).toHaveBeenCalledTimes(1); - expect(setSidebarStatusMock).not.toHaveBeenCalled(); + expect(setSidebarStatusMock).toHaveBeenCalledTimes(1); + expect(setSidebarStatusMock.mock.calls[0][2]).toEqual({ skipIfRecencyAdvancedSince: 100 }); expect(emitWorkspaceActivityMock).not.toHaveBeenCalled(); }); @@ -755,6 +768,26 @@ describe("AgentStatusService", () => { } }); + test("setSidebarStatus can atomically skip when recency advanced", async () => { + const dir = mkdtempSync(join(tmpdir(), "mux-recency-skip-")); + try { + const svc = new ExtensionMetadataService(join(dir, "metadata.json")); + await svc.updateRecency("ws", 200); + const skipped = await svc.setSidebarStatus( + "ws", + { emoji: "πŸ› οΈ", message: "Old status" }, + { skipIfRecencyAdvancedSince: 100 } + ); + const after = await svc.getSnapshot("ws"); + + expect(skipped).toBeNull(); + expect(after?.todoStatus).toBeUndefined(); + expect(after?.recency).toBe(200); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); + test("rejects generic placeholder messages and advances dedup so we don't loop", async () => { // Codex review: even with the prompt steering away from "Awaiting next // task" et al., small models can still emit them. We must reject them diff --git a/src/node/services/agentStatusService.ts b/src/node/services/agentStatusService.ts index cc4d6c8bc7..8464581d3a 100644 --- a/src/node/services/agentStatusService.ts +++ b/src/node/services/agentStatusService.ts @@ -283,16 +283,6 @@ export class AgentStatusService { // Re-check after the generator returns: the same hazard at a later // await boundary. if (this.stopped) return; - if (await this.hasNewerRecency(workspaceId, observedRecency)) { - // A user turn landed while the provider was generating against the - // previous transcript. Drop the old result so it cannot resurrect a - // pre-pivot sidebar status after the user message cleared/changed it. - log.debug("AgentStatusService: dropping generated status after newer recency", { - workspaceId, - observedRecency, - }); - return; - } if (!result.success) { // Only advance the dedup hash when at least one candidate actually // reached the provider. If every candidate failed during model @@ -348,9 +338,22 @@ export class AgentStatusService { try { const snapshot = await this.extensionMetadata.setSidebarStatus( workspaceId, - result.data.status + result.data.status, + { skipIfRecencyAdvancedSince: observedRecency } ); if (this.stopped) return; + if (!snapshot) { + // The recency check happens inside ExtensionMetadataService's + // serialized mutation queue, immediately before the status write. + // That makes it atomic with fire-and-forget user-recency writes: + // a slow provider response cannot resurrect a pre-pivot status + // after a newer user turn has queued or committed its recency bump. + log.debug("AgentStatusService: dropping generated status after newer recency", { + workspaceId, + observedRecency, + }); + return; + } markRecencyObserved(); state.lastInputHash = inputHash; this.workspaceService.emitWorkspaceActivity(workspaceId, snapshot); @@ -368,18 +371,6 @@ export class AgentStatusService { } } - private async hasNewerRecency( - workspaceId: string, - observedRecency: number | null - ): Promise { - const snapshot = await this.extensionMetadata.getSnapshot(workspaceId); - const currentRecency = snapshot?.recency; - return ( - typeof currentRecency === "number" && - (observedRecency === null || currentRecency > observedRecency) - ); - } - private ensureState(id: string): State { let state = this.tracked.get(id); if (!state) {