From f26b27ac377fad208d9a5aad3aad043fd682e45c Mon Sep 17 00:00:00 2001 From: zerob13 Date: Tue, 17 Mar 2026 22:30:27 +0800 Subject: [PATCH 1/4] refactor(browser): unify yobrowser sessions --- docs/architecture/tool-system.md | 13 +- docs/specs/yobrowser-optimization/plan.md | 42 + docs/specs/yobrowser-optimization/spec.md | 82 +- docs/specs/yobrowser-optimization/tasks.md | 9 + electron.vite.config.ts | 2 - .../agentPresenter/acp/agentToolManager.ts | 6 +- .../agentPresenter/loop/toolCallProcessor.ts | 2 +- .../browser/BrowserContextBuilder.ts | 20 +- .../presenter/browser/YoBrowserPresenter.ts | 1182 ++++++----------- .../browser/YoBrowserToolDefinitions.ts | 41 +- .../presenter/browser/YoBrowserToolHandler.ts | 152 +-- .../deepchatAgentPresenter/toolOutputGuard.ts | 4 +- src/main/presenter/newAgentPresenter/index.ts | 6 + src/main/presenter/tabPresenter.ts | 22 - src/main/presenter/toolPresenter/index.ts | 24 +- src/main/presenter/windowPresenter/index.ts | 114 +- src/renderer/browser/App.vue | 79 -- src/renderer/browser/components/AppBar.vue | 173 --- .../browser/components/BrowserToolbar.vue | 151 --- .../browser/components/icons/CloseIcon.vue | 19 - .../browser/components/icons/MaximizeIcon.vue | 20 - .../browser/components/icons/MinimizeIcon.vue | 19 - .../browser/components/icons/RestoreIcon.vue | 18 - src/renderer/browser/index.html | 14 - src/renderer/browser/lib/events.ts | 20 - src/renderer/browser/main.ts | 27 - src/renderer/browser/stores/window.ts | 90 -- .../src/components/sidepanel/BrowserPanel.vue | 291 ++-- .../sidepanel}/BrowserPlaceholder.vue | 6 +- .../components/sidepanel/ChatSidePanel.vue | 5 +- src/shared/types/browser.ts | 31 +- .../types/presenters/legacy.presenters.d.ts | 66 +- .../main/presenter/YoBrowserPresenter.test.ts | 143 +- .../browser/YoBrowserToolHandler.test.ts | 59 + .../deepchatAgentPresenter.test.ts | 16 +- .../deepchatAgentPresenter/dispatch.test.ts | 34 +- .../deepchatAgentPresenter/process.test.ts | 12 +- .../newAgentPresenter.test.ts | 19 +- .../toolPresenter/toolPresenter.test.ts | 4 +- test/renderer/components/BrowserPanel.test.ts | 73 +- test/renderer/components/McpIndicator.test.ts | 2 +- .../renderer/components/NewThreadPage.test.ts | 4 +- 42 files changed, 1056 insertions(+), 2060 deletions(-) create mode 100644 docs/specs/yobrowser-optimization/plan.md create mode 100644 docs/specs/yobrowser-optimization/tasks.md delete mode 100644 src/renderer/browser/App.vue delete mode 100644 src/renderer/browser/components/AppBar.vue delete mode 100644 src/renderer/browser/components/BrowserToolbar.vue delete mode 100644 src/renderer/browser/components/icons/CloseIcon.vue delete mode 100644 src/renderer/browser/components/icons/MaximizeIcon.vue delete mode 100644 src/renderer/browser/components/icons/MinimizeIcon.vue delete mode 100644 src/renderer/browser/components/icons/RestoreIcon.vue delete mode 100644 src/renderer/browser/index.html delete mode 100644 src/renderer/browser/lib/events.ts delete mode 100644 src/renderer/browser/main.ts delete mode 100644 src/renderer/browser/stores/window.ts rename src/renderer/{browser/components => src/components/sidepanel}/BrowserPlaceholder.vue (55%) create mode 100644 test/main/presenter/browser/YoBrowserToolHandler.test.ts diff --git a/docs/architecture/tool-system.md b/docs/architecture/tool-system.md index 9886a72f1..64d9edaf7 100644 --- a/docs/architecture/tool-system.md +++ b/docs/architecture/tool-system.md @@ -624,16 +624,15 @@ class AgentFileSystemHandler { ### YoBrowser CDP 工具 -YoBrowser 提供基于 Chrome DevTools Protocol (CDP) 的最小工具集,在 agent 模式下直接可用。 +YoBrowser 在 agent 模式下直接提供 session 级单实例 browser 能力,每个 session 最多绑定一个 sidepanel browser。 **可用工具**: -- `yo_browser_tab_list` - 列出所有浏览器 tabs -- `yo_browser_tab_new` - 创建新 tab -- `yo_browser_tab_activate` - 激活指定 tab -- `yo_browser_tab_close` - 关闭 tab -- `yo_browser_cdp_send` - 发送 CDP 命令 +- `load_url` - 懒创建当前 session 的 browser 并导航到目标 URL +- `get_browser_status` - 返回当前 session browser 的页面、导航和可见性状态 +- `cdp_send` - 向当前 session browser 发送 CDP 命令 -**安全约束**: +**约束**: +- `cdp_send` 不会自动创建 browser;必须先调用 `load_url` - `local://` URL 禁止 CDP attach(在 `BrowserTab.ensureSession()` 中检查) - 所有 CDP 命令通过 `webContents.debugger.sendCommand()` 执行 diff --git a/docs/specs/yobrowser-optimization/plan.md b/docs/specs/yobrowser-optimization/plan.md new file mode 100644 index 000000000..03432b02d --- /dev/null +++ b/docs/specs/yobrowser-optimization/plan.md @@ -0,0 +1,42 @@ +# YoBrowser Session 单实例实施计划 + +## 1. 主进程模型 + +1. `YoBrowserPresenter` 用 `Map` 替代全局单状态。 +2. 每个 `SessionBrowserState` 仅包含一个 `WebContentsView`、一个 `BrowserTab`、attach 信息、可见性和最后一次 bounds。 +3. `load_url` 负责首次懒加载:创建 browser、发起 sidepanel open、等待 host ready、再导航。 + +## 2. 工具路由 + +1. `YoBrowserToolDefinitions` 仅注册 `load_url`、`get_browser_status`、`cdp_send`。 +2. `YoBrowserToolHandler.callTool` 必须接收 `conversationId` 并据此路由 session。 +3. `AgentToolManager` 和 `ToolPresenter` 把这 3 个名字视为内建 YoBrowser 工具。 +4. MCP 同名工具在定义收集阶段直接过滤。 + +## 3. Renderer 行为 + +1. `BrowserPanel` 接收 `sessionId`,所有 presenter 调用都显式带 sessionId。 +2. 切换 session 时先 detach 旧 session browser。 +3. 若旧 session 状态不是 `working`,立即 destroy。 +4. 若旧 session 状态是 `working`,加入 `pendingBrowserDestroySessionIds`,等状态变更后再 destroy。 +5. YoBrowser 事件 payload 带 `sessionId`,只更新当前 panel 对应的会话。 + +## 4. 独立 browser 下线 + +1. 删除 `src/renderer/browser` 旧壳入口。 +2. `windowPresenter` 中旧 `browser` window 类型不再创建独立窗口,统一回退到 chat window。 +3. 清理 `tabPresenter` 中依赖 `browserTabId` 的 YoBrowser 分支。 + +## 5. 测试策略 + +1. main: + - tool definitions 只剩 3 个新工具。 + - 旧工具名报 unknown tool。 + - `load_url` 懒加载与 host-ready 流程成立。 + - session 间 browser state 隔离。 +2. renderer: + - `BrowserPanel` 仅响应当前 session 事件。 + - session 切换时的 detach / destroy / pending destroy 成立。 +3. 回归: + - `cdp_send` 仍走 offload。 + - disabled tools 存储和展示使用新工具名。 diff --git a/docs/specs/yobrowser-optimization/spec.md b/docs/specs/yobrowser-optimization/spec.md index 1180753e0..958b66848 100644 --- a/docs/specs/yobrowser-optimization/spec.md +++ b/docs/specs/yobrowser-optimization/spec.md @@ -1,64 +1,66 @@ -# YoBrowser Optimization(UI + CDP 工具) +# YoBrowser Session 单实例收敛 ## 背景 -当前 YoBrowser 在 Workspace 侧边栏存在 UI 问题: -- `src/renderer/src/components/workspace/WorkspaceView.vue` 在 `agent` 模式下总会渲染 `WorkspaceBrowserTabs` 分区,即便没有任何 tab,也会出现一块空区域。 +当前 YoBrowser 还保留了多 window / 多 tab 的旧抽象,但实际运行时已经收敛为单个 sidepanel browser host。继续暴露 `open / close / focus / list`、windowId、tabId 等接口,只会增加状态分叉和错误恢复成本。 -## 目标(Goals) +同时,session 切换后 browser 的回收策略需要和会话状态对齐:如果旧 session 仍在 `working`,切走时只能先 detach,不能立刻销毁,否则会打断本轮工具调用。 -1. **UI**:只有存在 YoBrowser tabs 时,Workspace 侧边栏才显示 Browser Tabs 分区。 -2. **Agent 工具直接注入**:YoBrowser 工具(`yo_browser_*`)在 agent 模式下直接可用,无需激活任何 skill。 +## 目标 -## 非目标(Non-Goals) +1. 每个 session 最多持有一个 YoBrowser `webContents`。 +2. agent 仅暴露 3 个裸工具名:`load_url`、`get_browser_status`、`cdp_send`。 +3. `load_url` 首次调用时懒创建 browser,并自动完成 sidepanel attach 流程。 +4. session 切换时按会话状态销毁: + - 非 `working`:立即 detach 并销毁。 + - `working`:先 detach,待状态结束后再销毁。 +5. 下线旧独立 browser shell,只保留聊天右侧 sidepanel 的 YoBrowser。 -- 不调整 YoBrowser window 的 UI、尺寸、布局、位置策略。 -- 不修改 `BrowserContextBuilder.buildSystemPrompt` 的注入策略(不做减少/压缩/裁剪)。 -- 不改造其他 agent 工具(filesystem/bash/mcp 等)。 -- 不使用 skills 系统来控制 YoBrowser 工具的可见性。 +## 非目标 -## 用户故事(User Stories) +- 不扩成通用多窗口 browser 系统。 +- 不保留旧 `yo_browser_*` 别名兼容。 +- 不让 `cdp_send` 自动创建 browser;必须先 `load_url`。 +- 不额外重构通用 window presenter 架构。 -- 作为用户,我不希望在没有任何浏览器 tab 的情况下,Workspace 侧边栏仍出现空的 Browser Tabs 分区。 -- 作为 agent 用户,我希望 YoBrowser 自动化能力以 CDP 为核心,工具在 agent 模式下直接可用。 +## 用户故事 -## 约束与假设(Constraints & Assumptions) +- 作为 agent 用户,我希望 browser 工具是直接、稳定、少状态的,不需要理解 window/tab 多实体模型。 +- 作为使用多会话的用户,我希望切换 session 时前一个 session 的 browser 不串到当前会话。 +- 作为正在执行 browser 工具的用户,我希望切走会话不会打断仍在运行中的 browser 操作。 -- YoBrowser 现有实现已经基于 Electron Debugger/CDP(`CDPManager`, `BrowserTab.ensureSession()`)。 -- 安全边界:`local://` URL 禁止绑定 CDP(`BrowserTab` 现有逻辑已做限制)。 +## 约束与假设 -## 验收标准(Acceptance Criteria) +- “正在 loading” 统一按当前 session 状态 `working` 处理。 +- 一个 session 同时只允许一个 sidepanel browser 实例。 +- `cdp_send` 永远绑定当前 tool call 的 `conversationId`。 +- `load_url`、`get_browser_status`、`cdp_send` 视为内建保留工具名,MCP 不得覆盖。 -### A. UI:Workspace Browser Tabs 展示逻辑 +## 验收标准 -- [ ] `src/renderer/src/components/workspace/WorkspaceView.vue` 仅在 `chatMode === 'agent' && yoBrowserStore.tabCount > 0` 时渲染 `WorkspaceBrowserTabs`。 -- [ ] 当 `tabCount === 0` 时,不显示 Browser Tabs 分区(不保留空白区域)。 +### A. 工具面 -### B. 工具:YoBrowser CDP 工具直接注入(agent 模式) +- [ ] agent tool definitions 仅包含 `load_url`、`get_browser_status`、`cdp_send`。 +- [ ] 旧 `yo_browser_*` 名称调用时返回 unknown tool。 +- [ ] `cdp_send` 若 session browser 尚未初始化,返回明确错误,要求先 `load_url`。 -- [ ] agent tool definitions 中包含 `yo_browser_*` 工具(agent 模式下直接可用)。 -- [ ] agent 的 tool call 路由正确处理 `yo_browser_*` 工具(`toolName.startsWith('yo_browser_')`)。 -- [ ] 不依赖 skills 系统(不检查 `activeSkills`)。 +### B. Session 生命周期 -### C. 工具实现:CDP 方式 + 合适的参数定义 +- [ ] `load_url` 首次调用时才创建对应 session 的 browser。 +- [ ] 不同 session 持有各自独立 browser state,不共享 page / visibility / attach 状态。 +- [ ] session 切换时,旧 session 若非 `working`,立即 destroy。 +- [ ] session 切换时,旧 session 若为 `working`,仅 detach;该 session 结束后再 destroy。 -- [ ] 工具集合: - - `yo_browser_tab_list`:列出 tabs 与 active tab。 - - `yo_browser_tab_new`:创建新 tab(可选 url)。 - - `yo_browser_tab_activate`:激活 tab。 - - `yo_browser_tab_close`:关闭 tab。 - - `yo_browser_cdp_send`:向指定/当前 tab 的 CDP session 发送 `{ method, params }`。 -- [ ] 参数 schema 符合 CDP 使用方式(method、params 等)。 -- [ ] 保留安全边界:`local://` 禁止 CDP attach。 +### C. UI 与事件 -### D. Prompt/Context +- [ ] Renderer 仅响应当前 `sessionId` 的 YoBrowser 事件。 +- [ ] 切换 session 后,browser panel 不显示前一个 session 的状态。 +- [ ] 旧独立 browser shell 入口不再可用。 -- [ ] `BrowserContextBuilder.buildSystemPrompt` 的注入保持现状(不做减少/压缩/裁剪)。 +### D. 文档与接口 -### E. 兼容性 - -- [ ] 不涉及数据迁移。 -- [ ] 现有 YoBrowser UI/窗口/Tab 生命周期保持可用。 +- [ ] `IYoBrowserPresenter` 与共享类型收敛到 session-aware 单实例接口。 +- [ ] 架构文档与本 spec 使用新工具名与新生命周期语义。 ## Open Questions diff --git a/docs/specs/yobrowser-optimization/tasks.md b/docs/specs/yobrowser-optimization/tasks.md new file mode 100644 index 000000000..e0fa7202f --- /dev/null +++ b/docs/specs/yobrowser-optimization/tasks.md @@ -0,0 +1,9 @@ +# YoBrowser Session 单实例任务拆分 + +1. 收敛共享类型与 presenter 接口到 session-aware 单实例模型。 +2. 重写 `YoBrowserPresenter` 的 session 状态管理、attach、detach、destroy 流程。 +3. 将 tool definitions / handler / agent routing 切到 `load_url`、`get_browser_status`、`cdp_send`。 +4. 在 renderer sidepanel 中按 `sessionId` 驱动 browser panel,并实现 `working` 态延迟销毁。 +5. 删除旧独立 browser shell 与 `browserTabId` 相关残留。 +6. 更新 main / renderer / agent presenter 测试到新工具名和新生命周期。 +7. 更新规格与架构文档,并跑格式化、i18n、lint、关键测试。 diff --git a/electron.vite.config.ts b/electron.vite.config.ts index 6d793be81..4dee57b91 100644 --- a/electron.vite.config.ts +++ b/electron.vite.config.ts @@ -64,7 +64,6 @@ export default defineConfig({ resolve: { alias: { '@': resolve('src/renderer/src'), - '@browser': resolve('src/renderer/browser'), '@shared': resolve('src/shared'), "@shadcn": resolve('src/shadcn'), vue: 'vue/dist/vue.esm-bundler.js' @@ -107,7 +106,6 @@ export default defineConfig({ cssCodeSplit: false, rollupOptions: { input: { - browser: resolve('src/renderer/browser/index.html'), index: resolve('src/renderer/index.html'), floating: resolve('src/renderer/floating/index.html'), splash: resolve('src/renderer/splash/index.html'), diff --git a/src/main/presenter/agentPresenter/acp/agentToolManager.ts b/src/main/presenter/agentPresenter/acp/agentToolManager.ts index d3bd2c8ca..ee2b20704 100644 --- a/src/main/presenter/agentPresenter/acp/agentToolManager.ts +++ b/src/main/presenter/agentPresenter/acp/agentToolManager.ts @@ -19,6 +19,7 @@ import { CHAT_SETTINGS_TOOL_NAMES } from './chatSettingsTools' import type { AgentToolRuntimePort } from '../runtimePorts' +import { YO_BROWSER_TOOL_NAMES } from '../../browser/YoBrowserToolDefinitions' // Consider moving to a shared handlers location in future refactoring import { @@ -66,6 +67,7 @@ interface AgentToolManagerOptions { } export class AgentToolManager { + private static readonly YO_BROWSER_TOOL_NAME_SET = new Set(YO_BROWSER_TOOL_NAMES) private agentWorkspacePath: string | null private fileSystemHandler: AgentFileSystemHandler | null = null private bashHandler: AgentBashHandler | null = null @@ -393,8 +395,8 @@ export class AgentToolManager { } // Route to YoBrowser CDP tools - if (toolName.startsWith('yo_browser_')) { - const response = await this.getYoBrowserToolHandler().callTool(toolName, args) + if (AgentToolManager.YO_BROWSER_TOOL_NAME_SET.has(toolName)) { + const response = await this.getYoBrowserToolHandler().callTool(toolName, args, conversationId) return { content: response } diff --git a/src/main/presenter/agentPresenter/loop/toolCallProcessor.ts b/src/main/presenter/agentPresenter/loop/toolCallProcessor.ts index e3ffc0bb2..50a93f979 100644 --- a/src/main/presenter/agentPresenter/loop/toolCallProcessor.ts +++ b/src/main/presenter/agentPresenter/loop/toolCallProcessor.ts @@ -93,7 +93,7 @@ const QUESTION_ERROR_KEY = 'common.error.invalidQuestionRequest' // Tools that require offload when output exceeds threshold // Tools not in this list will never trigger offload (e.g., read has its own pagination) -const TOOLS_REQUIRING_OFFLOAD = new Set(['exec', 'ls', 'find', 'grep', 'yo_browser_cdp_send']) +const TOOLS_REQUIRING_OFFLOAD = new Set(['exec', 'ls', 'find', 'grep', 'cdp_send']) export class ToolCallProcessor { constructor(private readonly options: ToolCallProcessorOptions) {} diff --git a/src/main/presenter/browser/BrowserContextBuilder.ts b/src/main/presenter/browser/BrowserContextBuilder.ts index b7e19ce71..e16f05c7c 100644 --- a/src/main/presenter/browser/BrowserContextBuilder.ts +++ b/src/main/presenter/browser/BrowserContextBuilder.ts @@ -1,21 +1,13 @@ -import type { BrowserToolDefinition, BrowserWindowInfo } from '@shared/types/browser' +import type { BrowserToolDefinition, YoBrowserStatus } from '@shared/types/browser' export class BrowserContextBuilder { - static buildSystemPrompt(windows: BrowserWindowInfo[], activeWindowId: number | null): string { - const activeWindow = windows.find((browserWindow) => browserWindow.id === activeWindowId) - const windowLines = - windows.length === 0 - ? ['- No browser windows open.'] - : windows.map((browserWindow) => { - const marker = browserWindow.id === activeWindowId ? '*' : ' ' - const title = browserWindow.page.title || browserWindow.page.url || 'Untitled' - return `${marker} ${title} (${browserWindow.page.url || 'about:blank'})` - }) + static buildSystemPrompt(status: YoBrowserStatus): string { + const page = status.page + const pageLine = page ? `${page.title || page.url || 'Untitled'} (${page.url})` : 'none' + return [ 'Yo Browser is available for web exploration.', - `Active window: ${activeWindow ? `${activeWindow.page.title || activeWindow.page.url} (${activeWindow.id})` : 'none'}`, - 'Open browser windows:', - ...windowLines, + `Current page: ${pageLine}`, 'Use Yo Browser to browse, extract DOM, run scripts, capture screenshots, and download files.' ].join('\n') } diff --git a/src/main/presenter/browser/YoBrowserPresenter.ts b/src/main/presenter/browser/YoBrowserPresenter.ts index 671f0b3b8..325878d69 100644 --- a/src/main/presenter/browser/YoBrowserPresenter.ts +++ b/src/main/presenter/browser/YoBrowserPresenter.ts @@ -4,36 +4,33 @@ import { is } from '@electron-toolkit/utils' import { eventBus, SendTarget } from '@/eventbus' import { YO_BROWSER_EVENTS } from '@/events' import logger from '@shared/logger' -import type { - BrowserContextSnapshot, - BrowserTabInfo, - BrowserWindowInfo, - ScreenshotOptions +import { + BrowserPageStatus, + type BrowserPageInfo, + type ScreenshotOptions, + type YoBrowserStatus } from '@shared/types/browser' import type { DownloadInfo, IWindowPresenter, IYoBrowserPresenter } from '@shared/presenter' import { BrowserTab as BrowserPage } from './BrowserTab' import { CDPManager } from './CDPManager' -import { ScreenshotManager } from './ScreenshotManager' import { DownloadManager } from './DownloadManager' +import { ScreenshotManager } from './ScreenshotManager' import { clearYoBrowserSessionData, getYoBrowserSession } from './yoBrowserSession' import { YoBrowserToolHandler } from './YoBrowserToolHandler' -type BrowserWindowState = { - id: number - viewId: number +type SessionBrowserState = { + sessionId: string + view: WebContentsView page: BrowserPage createdAt: number updatedAt: number - isEmbedded?: boolean - view?: WebContentsView - visible?: boolean - attachedWindowId?: number | null - lastBounds?: Rectangle | null - lastVisible?: boolean - hostReady?: boolean + visible: boolean + attachedWindowId: number | null + lastBounds: Rectangle | null + hostReady: boolean } -type BrowserWindowListeners = { +type HostWindowListeners = { focus: () => void show: () => void hide: () => void @@ -41,6 +38,7 @@ type BrowserWindowListeners = { } type HostReadyWaiter = { + sessionId: string hostWindowId: number timeoutId: NodeJS.Timeout stableTimerId: NodeJS.Timeout | null @@ -49,20 +47,15 @@ type HostReadyWaiter = { } export class YoBrowserPresenter implements IYoBrowserPresenter { - private readonly browserWindows = new Map() - private readonly viewIdToWindowId = new Map() - private readonly pageIdToWindowId = new Map() - private readonly attachedWindowIds = new Set() - private readonly windowListeners = new Map() - private embeddedState: BrowserWindowState | null = null - private activeWindowId: number | null = null + private readonly sessionBrowsers = new Map() + private readonly hostWindowListeners = new Map() + private readonly hostReadyWaiters = new Map() private readonly cdpManager = new CDPManager() private readonly screenshotManager = new ScreenshotManager(this.cdpManager) private readonly downloadManager = new DownloadManager() private readonly windowPresenter: IWindowPresenter private readonly embeddedHostReadyTimeoutMs = 2000 private readonly embeddedHostReadyStableMs = 120 - private hostReadyWaiter: HostReadyWaiter | null = null readonly toolHandler: YoBrowserToolHandler constructor(windowPresenter: IWindowPresenter) { @@ -74,370 +67,186 @@ export class YoBrowserPresenter implements IYoBrowserPresenter { // Lazy initialization only. } - async ensureWindow(): Promise { - const existing = this.getResolvedWindowState() - if (existing) { - return existing.id - } - - const created = await this.ensureEmbeddedState() - return created?.id ?? null + async getBrowserStatus(sessionId: string): Promise { + return this.toStatus(this.sessionBrowsers.get(sessionId) ?? null) } - async openWindow(url?: string): Promise { + async loadUrl(sessionId: string, url: string, timeoutMs?: number): Promise { + const normalizedSessionId = sessionId.trim() + if (!normalizedSessionId) { + throw new Error('sessionId is required') + } + if (!url.trim()) { + throw new Error('url is required') + } + const hostWindowId = this.resolveHostWindowId() if (hostWindowId == null) { - return null + throw new Error('No host window available for YoBrowser') } + const state = this.ensureSessionBrowserState(normalizedSessionId) + this.markHostNotReady(state) this.logLifecycle('open requested', { + sessionId: normalizedSessionId, windowId: hostWindowId, - url: url ?? 'about:blank' - }) - - const state = await this.ensureEmbeddedState(undefined, hostWindowId) - if (!state) { - return null - } - - this.markEmbeddedHostNotReady(state) - this.logLifecycle('embedded state ready', { - windowId: state.id, - pageId: state.page.pageId, - url: state.page.url - }) - - this.emitOpenRequested(hostWindowId, state.page.pageId, url ?? state.page.url) - this.logLifecycle('panel open requested', { - windowId: hostWindowId, - pageId: state.page.pageId, - url: url ?? state.page.url + url }) + this.emitOpenRequested(normalizedSessionId, hostWindowId, url) this.windowPresenter.show(hostWindowId, true) - this.setActiveWindowId(hostWindowId) - - this.logLifecycle('host ready waiting', { - windowId: hostWindowId, - pageId: state.page.pageId, - url: url ?? state.page.url - }) - await this.waitForEmbeddedHostReady(hostWindowId, state) - - if (url && url !== 'about:blank') { - this.logLifecycle('navigation started', { - windowId: state.id, - pageId: state.page.pageId, - url - }) - - try { - await state.page.navigateUntilDomReady(url) - } catch (error) { - state.updatedAt = Date.now() - this.emitWindowUpdated(state) - throw error - } - } + await this.waitForSessionHostReady(normalizedSessionId, hostWindowId, state) + await state.page.navigateUntilDomReady(url, timeoutMs ?? 30000) state.updatedAt = Date.now() - this.emitWindowUpdated(state) - return this.toWindowInfo(state) + this.emitWindowUpdated(normalizedSessionId) + return this.toStatus(state) } - async attachEmbeddedToWindow(windowId: number): Promise { - const state = await this.ensureEmbeddedState(undefined, windowId) - if (!state?.view) { - return null + async attachSessionBrowser(sessionId: string, hostWindowId: number): Promise { + const state = this.sessionBrowsers.get(sessionId) + if (!state) { + return false } - const window = BrowserWindow.fromId(windowId) - if (!window || window.isDestroyed()) { - return null + const hostWindow = BrowserWindow.fromId(hostWindowId) + if (!hostWindow || hostWindow.isDestroyed()) { + return false } - if (state.attachedWindowId != null && state.attachedWindowId !== windowId) { - const previousWindowId = state.attachedWindowId - const previousWindow = BrowserWindow.fromId(previousWindowId) - this.detachWindowListeners(previousWindowId) - if (previousWindow && !previousWindow.isDestroyed()) { - try { - previousWindow.contentView.removeChildView(state.view) - } catch { - // Ignore already detached view. - } - } - } + this.detachOtherSessionBrowsers(hostWindowId, sessionId) - if (state.attachedWindowId !== windowId) { - this.markEmbeddedHostNotReady(state) + if (state.attachedWindowId != null && state.attachedWindowId !== hostWindowId) { + this.detachFromWindow(state, state.attachedWindowId) } - if (state.attachedWindowId !== windowId) { + if (state.attachedWindowId !== hostWindowId) { + this.markHostNotReady(state) try { - window.contentView.addChildView(state.view) + hostWindow.contentView.addChildView(state.view) } catch { try { - window.contentView.removeChildView(state.view) + hostWindow.contentView.removeChildView(state.view) } catch { // Ignore already detached view. } - window.contentView.addChildView(state.view) + hostWindow.contentView.addChildView(state.view) } } - if (state.id !== windowId) { - state.id = windowId - this.viewIdToWindowId.set(state.viewId, windowId) - this.pageIdToWindowId.set(state.page.pageId, windowId) - } - - this.attachWindowListeners(windowId) - state.attachedWindowId = windowId + this.attachHostWindowListeners(hostWindowId) + state.attachedWindowId = hostWindowId state.updatedAt = Date.now() - this.setActiveWindowId(windowId) - this.emitWindowUpdated(state) - return state.id + this.emitWindowUpdated(sessionId) + return true } - async updateEmbeddedBounds(windowId: number, bounds: Rectangle, visible: boolean): Promise { - const state = await this.ensureEmbeddedState(undefined, windowId) - if (!state?.view) { + async updateSessionBrowserBounds( + sessionId: string, + hostWindowId: number, + bounds: Rectangle, + visible: boolean + ): Promise { + const state = this.sessionBrowsers.get(sessionId) + if (!state) { return } const normalizedBounds = this.normalizeBounds(bounds) state.lastBounds = normalizedBounds - state.lastVisible = visible state.updatedAt = Date.now() if (!visible || normalizedBounds.width <= 0 || normalizedBounds.height <= 0) { - this.markEmbeddedHostNotReady(state) - this.setWindowVisibility(state, false) + this.markHostNotReady(state) + this.setSessionVisibility(state, false) return } - if (state.attachedWindowId !== windowId) { - const attachedWindowId = await this.attachEmbeddedToWindow(windowId) - if (attachedWindowId == null) { + if (state.attachedWindowId !== hostWindowId) { + const attached = await this.attachSessionBrowser(sessionId, hostWindowId) + if (!attached) { return } } state.view.setBounds(normalizedBounds) - this.setWindowVisibility(state, true) - this.scheduleEmbeddedHostReady(windowId, normalizedBounds) + this.setSessionVisibility(state, true) + this.scheduleSessionHostReady(sessionId, hostWindowId, normalizedBounds) } - async detachEmbedded(): Promise { - const state = this.embeddedState - if (!state?.view) { + async detachSessionBrowser(sessionId: string): Promise { + const state = this.sessionBrowsers.get(sessionId) + if (!state || state.attachedWindowId == null) { return } - const attachedWindowId = state.attachedWindowId - if (attachedWindowId != null) { - const window = BrowserWindow.fromId(attachedWindowId) - this.detachWindowListeners(attachedWindowId) - if (window && !window.isDestroyed()) { - try { - window.contentView.removeChildView(state.view) - } catch { - // Ignore already detached view. - } - } - } - - state.attachedWindowId = null - this.markEmbeddedHostNotReady(state) + this.detachFromWindow(state, state.attachedWindowId) + this.markHostNotReady(state) state.updatedAt = Date.now() - this.setWindowVisibility(state, false) - } - - async focusWindow(windowId: number): Promise { - if (this.embeddedState?.id === windowId) { - this.windowPresenter.show(windowId, true) - this.setActiveWindowId(windowId) - this.emitWindowUpdated(this.embeddedState) - return - } - - const state = this.browserWindows.get(windowId) - if (!state) return - this.windowPresenter.show(windowId, true) - this.setActiveWindowId(windowId) - this.emitWindowVisibility(windowId, true) - this.emitWindowUpdated(state) - } - - async closeWindow(windowId: number): Promise { - if (this.embeddedState?.id === windowId) { - await this.destroyEmbeddedState(true) - return - } - - if (!this.browserWindows.has(windowId)) return - await this.windowPresenter.closeWindow(windowId, true) + this.setSessionVisibility(state, false) } - async listWindows(): Promise { - const windows = [ - ...(this.embeddedState ? [this.embeddedState] : []), - ...Array.from(this.browserWindows.values()) - ] - - return windows - .sort((left, right) => right.updatedAt - left.updatedAt) - .map((state) => this.toWindowInfo(state)) - } - - async getActiveWindow(): Promise { - const state = this.getResolvedWindowState() - return state ? this.toWindowInfo(state) : null - } - - async getWindowById(windowId: number): Promise { - const state = this.getWindowStateById(windowId) - return state ? this.toWindowInfo(state) : null - } - - async hasWindow(): Promise { - return this.browserWindows.size > 0 || this.embeddedState != null - } - - async show(shouldFocus: boolean = true): Promise { - const existing = this.getResolvedWindowState() - if (existing) { - if (existing.isEmbedded) { - if (shouldFocus) { - this.windowPresenter.show(existing.id, true) - this.setActiveWindowId(existing.id) - } - - const canShowEmbedded = - existing.attachedWindowId != null && - Boolean(existing.view && !existing.view.webContents.isDestroyed()) - this.setWindowVisibility(existing, canShowEmbedded) - } else { - this.windowPresenter.show(existing.id, shouldFocus) - if (shouldFocus) { - this.setActiveWindowId(existing.id) - } - this.setWindowVisibility(existing, true) - } + async destroySessionBrowser(sessionId: string): Promise { + const state = this.sessionBrowsers.get(sessionId) + if (!state) { return } - await this.openWindow('about:blank') - } - - async hide(): Promise { - const state = this.getResolvedWindowState() - if (!state) return - if (state.isEmbedded) { - await this.detachEmbedded() - return - } - this.windowPresenter.hide(state.id) - this.emitWindowVisibility(state.id, false) - } - - async toggleVisibility(): Promise { - const state = this.getResolvedWindowState() - if (!state) { - await this.openWindow('about:blank') - return true - } + this.resolveOrRejectHostReadyWait( + sessionId, + new Error(`Session browser ${sessionId} was destroyed before it became ready`) + ) + await this.detachSessionBrowser(sessionId) + state.page.destroy() + this.sessionBrowsers.delete(sessionId) - if (state.isEmbedded) { - const canShowEmbedded = - state.attachedWindowId != null && - Boolean(state.view && !state.view.webContents.isDestroyed()) - if (!canShowEmbedded) { - this.setWindowVisibility(state, false) - return false + if (!state.view.webContents.isDestroyed()) { + try { + state.view.webContents.close() + } catch { + // Ignore view shutdown failures. } - - const nextVisible = !state.visible - this.setWindowVisibility(state, nextVisible) - return nextVisible - } - - const window = BrowserWindow.fromId(state.id) - if (!window || window.isDestroyed()) { - await this.openWindow('about:blank') - return true - } - - if (window.isVisible()) { - await this.hide() - return false } - await this.focusWindow(state.id) - return true - } - - async isVisible(): Promise { - const state = this.getResolvedWindowState() - if (!state) return false - if (state.isEmbedded) { - return Boolean( - state.visible && - state.attachedWindowId != null && - state.view && - !state.view.webContents.isDestroyed() - ) - } - const window = BrowserWindow.fromId(state.id) - return Boolean(window && !window.isDestroyed() && window.isVisible()) + this.emitWindowClosed(sessionId) + this.emitWindowCount() } - async navigateWindow(windowId: number, url: string, timeoutMs?: number): Promise { - const state = this.getResolvedWindowState(windowId) + async goBack(sessionId: string): Promise { + const state = this.sessionBrowsers.get(sessionId) if (!state) { - throw new Error(`Browser window ${windowId} not found`) + return } - - this.logLifecycle('navigation started', { - windowId: state.id, - pageId: state.page.pageId, - url - }) - await state.page.navigate(url, timeoutMs) - state.updatedAt = Date.now() - this.emitWindowUpdated(state) - } - - async goBack(target?: number | string): Promise { - const state = this.getResolvedWindowState(target) - if (!state) return await state.page.goBack() state.updatedAt = Date.now() - this.emitWindowUpdated(state) + this.emitWindowUpdated(sessionId) } - async goForward(target?: number | string): Promise { - const state = this.getResolvedWindowState(target) - if (!state) return + async goForward(sessionId: string): Promise { + const state = this.sessionBrowsers.get(sessionId) + if (!state) { + return + } await state.page.goForward() state.updatedAt = Date.now() - this.emitWindowUpdated(state) + this.emitWindowUpdated(sessionId) } - async reload(target?: number | string): Promise { - const state = this.getResolvedWindowState(target) - if (!state) return + async reload(sessionId: string): Promise { + const state = this.sessionBrowsers.get(sessionId) + if (!state) { + return + } await state.page.reload() state.updatedAt = Date.now() - this.emitWindowUpdated(state) + this.emitWindowUpdated(sessionId) } - async getNavigationState(target?: number | string): Promise<{ + async getNavigationState(sessionId: string): Promise<{ canGoBack: boolean canGoForward: boolean }> { - const state = this.getResolvedWindowState(target) + const state = this.sessionBrowsers.get(sessionId) if (!state || state.page.contents.isDestroyed()) { return { canGoBack: false, @@ -451,55 +260,54 @@ export class YoBrowserPresenter implements IYoBrowserPresenter { } } - async getBrowserContext(): Promise { - return { - activeWindowId: this.getResolvedWindowState()?.id ?? null, - windows: await this.listWindows() + async captureScreenshot(sessionId: string, options?: ScreenshotOptions): Promise { + const state = this.sessionBrowsers.get(sessionId) + if (!state) { + throw new Error(`Session browser ${sessionId} not found`) } - } - async captureScreenshot(target: string | number, options?: ScreenshotOptions): Promise { - const state = this.getResolvedWindowState(target) - if (!state) { - throw new Error(`Browser target ${String(target)} not found`) + try { + return await state.page.takeScreenshot(options) + } catch (error) { + if (error instanceof Error && error.name === 'YoBrowserNotReadyError') { + this.logLifecycle('tool blocked:not-ready', { + sessionId, + url: state.page.url, + status: state.page.status, + action: 'capture screenshot' + }) + } + throw error } - return await this.runPageAction(state, 'capture screenshot', () => - state.page.takeScreenshot(options) - ) } - async extractDom(target: string | number, selector?: string): Promise { - const state = this.getResolvedWindowState(target) - if (!state) { - throw new Error(`Browser target ${String(target)} not found`) - } - return await this.runPageAction(state, 'extract DOM', () => state.page.extractDOM(selector)) + async getBrowserPage(sessionId: string): Promise { + return this.sessionBrowsers.get(sessionId)?.page.toPageInfo() ?? null } - async evaluateScript(target: string | number, script: string): Promise { - const state = this.getResolvedWindowState(target) + async sendCdpCommand( + sessionId: string, + method: string, + params?: Record + ): Promise { + const state = this.sessionBrowsers.get(sessionId) if (!state) { - throw new Error(`Browser target ${String(target)} not found`) + throw new Error(`Session browser ${sessionId} is not initialized`) } - return await this.runPageAction(state, 'evaluate script', () => - state.page.evaluateScript(script) - ) + return await state.page.sendCdpCommand(method, params) } async startDownload(url: string, savePath?: string): Promise { - const state = this.getResolvedWindowState() + const state = this.findPreferredSessionState() if (!state || state.page.contents.isDestroyed()) { - throw new Error('No active browser window available') + throw new Error('No active session browser available') } return await this.downloadManager.downloadFile(url, savePath, state.page.contents) } async clearSandboxData(): Promise { await clearYoBrowserSessionData() - if (this.embeddedState && !this.embeddedState.page.contents.isDestroyed()) { - this.embeddedState.page.contents.reloadIgnoringCache() - } - for (const state of this.browserWindows.values()) { + for (const state of this.sessionBrowsers.values()) { if (!state.page.contents.isDestroyed()) { state.page.contents.reloadIgnoringCache() } @@ -507,116 +315,15 @@ export class YoBrowserPresenter implements IYoBrowserPresenter { } async shutdown(): Promise { - await this.destroyEmbeddedState(false) - const windowIds = Array.from(this.browserWindows.keys()) - for (const windowId of windowIds) { - await this.windowPresenter.closeWindow(windowId, true) - } - } - - // Deprecated wrappers kept temporarily while callers migrate to window semantics. - async listTabs(): Promise { - return (await this.listWindows()).map((browserWindow) => ({ - ...browserWindow.page, - isActive: browserWindow.id === this.activeWindowId - })) - } - - async getActiveTab(): Promise { - const activeWindow = await this.getActiveWindow() - if (!activeWindow) { - return null - } - return { - ...activeWindow.page, - isActive: true - } - } - - async getTabById(pageId: string): Promise { - const state = this.getResolvedWindowState(pageId) - if (!state) { - return null - } - return { - ...state.page.toPageInfo(), - isActive: state.id === this.activeWindowId + for (const sessionId of Array.from(this.sessionBrowsers.keys())) { + await this.destroySessionBrowser(sessionId) } } - async createTab(url?: string): Promise { - const browserWindow = await this.openWindow(url ?? 'about:blank') - if (!browserWindow) { - return null - } - return { - ...browserWindow.page, - isActive: true - } - } - - async navigateTab(pageId: string, url: string, timeoutMs?: number): Promise { - const state = this.getResolvedWindowState(pageId) - if (!state) { - throw new Error(`Browser page ${pageId} not found`) - } - await this.navigateWindow(state.id, url, timeoutMs) - } - - async activateTab(pageId: string): Promise { - const state = this.getResolvedWindowState(pageId) - if (!state) return - await this.focusWindow(state.id) - } - - async closeTab(pageId: string): Promise { - const state = this.getResolvedWindowState(pageId) - if (!state) return - await this.closeWindow(state.id) - } - - async reuseTab(url: string): Promise { - const existing = this.findReusableWindow(url) + private ensureSessionBrowserState(sessionId: string): SessionBrowserState { + const existing = this.sessionBrowsers.get(sessionId) if (existing) { - await this.navigateWindow(existing.id, url) - await this.focusWindow(existing.id) - return { - ...existing.page.toPageInfo(), - isActive: true - } - } - return await this.createTab(url) - } - - async getTabIdByViewId(viewId: number): Promise { - const windowId = this.viewIdToWindowId.get(viewId) - if (windowId == null) { - return null - } - const state = this.getResolvedWindowState(windowId) - return state?.page.pageId ?? null - } - - async getBrowserTab(target?: string | number): Promise { - return this.getResolvedWindowState(target)?.page ?? null - } - - private async ensureEmbeddedState( - _url?: string, - preferredWindowId?: number - ): Promise { - const hostWindowId = this.resolveHostWindowId(preferredWindowId) - if (hostWindowId == null) { - return null - } - - if (this.embeddedState) { - if (this.embeddedState.id !== hostWindowId) { - this.embeddedState.id = hostWindowId - this.viewIdToWindowId.set(this.embeddedState.viewId, hostWindowId) - this.pageIdToWindowId.set(this.embeddedState.page.pageId, hostWindowId) - } - return this.embeddedState + return existing } const view = new WebContentsView({ @@ -632,174 +339,86 @@ export class YoBrowserPresenter implements IYoBrowserPresenter { const page = new BrowserPage(view.webContents, this.cdpManager, this.screenshotManager) const now = Date.now() - const state: BrowserWindowState = { - id: hostWindowId, - viewId: view.webContents.id, + const state: SessionBrowserState = { + sessionId, + view, page, createdAt: now, updatedAt: now, - isEmbedded: true, - view, visible: false, attachedWindowId: null, lastBounds: null, - lastVisible: false, hostReady: false } - this.embeddedState = state - this.viewIdToWindowId.set(state.viewId, hostWindowId) - this.pageIdToWindowId.set(page.pageId, hostWindowId) - this.setupPageListeners(hostWindowId, page, view.webContents, true) - - this.setActiveWindowId(hostWindowId) - this.emitWindowCreated(state) + this.sessionBrowsers.set(sessionId, state) + this.setupPageListeners(state, view.webContents) + this.emitWindowCreated(sessionId) this.emitWindowCount() return state } - private attachWindowListeners(windowId: number): void { - if (this.attachedWindowIds.has(windowId)) { - return - } - - const window = BrowserWindow.fromId(windowId) - if (!window || window.isDestroyed()) { - return - } - - this.attachedWindowIds.add(windowId) - - const focus = () => { - this.setActiveWindowId(windowId) - const state = this.getAttachedWindowState(windowId) - if (state) { - state.updatedAt = Date.now() - this.emitWindowUpdated(state) - } - } - - const show = () => { - const state = this.getAttachedWindowState(windowId) - if (state?.isEmbedded) { - this.setWindowVisibility(state, true) - return - } - this.emitWindowVisibility(windowId, true) - } + private setupPageListeners(state: SessionBrowserState, contents: WebContents): void { + const sessionId = state.sessionId + const getState = () => this.sessionBrowsers.get(sessionId) - const hide = () => { - const state = this.getAttachedWindowState(windowId) - if (state?.isEmbedded) { - this.setWindowVisibility(state, false) + contents.on('did-navigate', (_event, url) => { + const current = getState() + if (!current) { return } - this.emitWindowVisibility(windowId, false) - } - - const closed = () => { - this.detachWindowListeners(windowId) - if (this.embeddedState?.attachedWindowId === windowId) { - void this.destroyEmbeddedState(false) - } - this.cleanupWindow(windowId, true) - } - - this.windowListeners.set(windowId, { focus, show, hide, closed }) - - window.on('focus', focus) - window.on('show', show) - window.on('hide', hide) - window.on('closed', closed) - } - - private detachWindowListeners(windowId: number): void { - const listeners = this.windowListeners.get(windowId) - if (!listeners) { - this.attachedWindowIds.delete(windowId) - return - } - - const window = BrowserWindow.fromId(windowId) - if (window && !window.isDestroyed()) { - window.removeListener('focus', listeners.focus) - window.removeListener('show', listeners.show) - window.removeListener('hide', listeners.hide) - window.removeListener('closed', listeners.closed) - } - - this.windowListeners.delete(windowId) - this.attachedWindowIds.delete(windowId) - } - - private getAttachedWindowState(windowId: number): BrowserWindowState | null { - if (this.embeddedState?.attachedWindowId === windowId) { - return this.embeddedState - } - return this.browserWindows.get(windowId) ?? null - } - - private getWindowStateById(windowId: number): BrowserWindowState | null { - if (this.embeddedState?.id === windowId) { - return this.embeddedState - } - - return this.browserWindows.get(windowId) ?? null - } - - private setupPageListeners( - windowId: number, - page: BrowserPage, - contents: WebContents, - isEmbedded: boolean = false - ): void { - const getState = () => (isEmbedded ? this.embeddedState : this.browserWindows.get(windowId)) - - contents.on('did-navigate', (_event, url) => { - const state = getState() - if (!state) return - page.url = url - state.updatedAt = Date.now() - this.emitWindowUpdated(state) + current.page.url = url + current.updatedAt = Date.now() + this.emitWindowUpdated(sessionId) }) contents.on('page-title-updated', (_event, title) => { - const state = getState() - if (!state) return - page.title = title || page.url - state.updatedAt = Date.now() - this.emitWindowUpdated(state) + const current = getState() + if (!current) { + return + } + current.page.title = title || current.page.url + current.updatedAt = Date.now() + this.emitWindowUpdated(sessionId) }) contents.on('page-favicon-updated', (_event, favicons) => { - const state = getState() - if (!state || favicons.length === 0) return - if (page.favicon !== favicons[0]) { - page.favicon = favicons[0] - state.updatedAt = Date.now() - this.emitWindowUpdated(state) + const current = getState() + if (!current || favicons.length === 0) { + return + } + if (current.page.favicon !== favicons[0]) { + current.page.favicon = favicons[0] + current.updatedAt = Date.now() + this.emitWindowUpdated(sessionId) } }) contents.on('did-start-loading', () => { - const state = getState() - if (!state) return - state.updatedAt = Date.now() - this.emitWindowUpdated(state) + const current = getState() + if (!current) { + return + } + current.updatedAt = Date.now() + this.emitWindowUpdated(sessionId) }) contents.on('dom-ready', () => { - const state = getState() - if (!state) return - state.updatedAt = Date.now() - this.emitWindowUpdated(state) + const current = getState() + if (!current) { + return + } + current.updatedAt = Date.now() + this.emitWindowUpdated(sessionId) }) contents.on('did-finish-load', () => { - const state = getState() - if (!state) return - state.updatedAt = Date.now() - this.emitWindowUpdated(state) + const current = getState() + if (!current) { + return + } + current.updatedAt = Date.now() + this.emitWindowUpdated(sessionId) }) contents.on( @@ -815,100 +434,156 @@ export class YoBrowserPresenter implements IYoBrowserPresenter { return } - const state = getState() - if (!state) return - state.updatedAt = Date.now() - this.emitWindowUpdated(state) + const current = getState() + if (!current) { + return + } + current.updatedAt = Date.now() + this.emitWindowUpdated(sessionId) } ) contents.on('destroyed', () => { - if (isEmbedded) { - void this.destroyEmbeddedState(false) - } else { - this.cleanupWindow(windowId, false) - } + this.handleDestroyedContents(sessionId) }) } - private cleanupWindow(windowId: number, emitClosed: boolean): void { - const state = this.browserWindows.get(windowId) + private handleDestroyedContents(sessionId: string): void { + const state = this.sessionBrowsers.get(sessionId) if (!state) { - this.detachWindowListeners(windowId) return } + this.resolveOrRejectHostReadyWait( + sessionId, + new Error(`Session browser ${sessionId} was destroyed before it became ready`) + ) state.page.destroy() - this.browserWindows.delete(windowId) - this.viewIdToWindowId.delete(state.viewId) - this.pageIdToWindowId.delete(state.page.pageId) - this.detachWindowListeners(windowId) - - if (this.activeWindowId === windowId) { - this.activeWindowId = this.getResolvedWindowState()?.id ?? null - this.emitWindowFocused(this.activeWindowId) + state.attachedWindowId = null + state.visible = false + state.hostReady = false + this.sessionBrowsers.delete(sessionId) + this.emitWindowClosed(sessionId) + this.emitWindowCount() + } + + private attachHostWindowListeners(windowId: number): void { + if (this.hostWindowListeners.has(windowId)) { + return } - if (emitClosed) { - this.emitWindowClosed(windowId) + const window = BrowserWindow.fromId(windowId) + if (!window || window.isDestroyed()) { + return } - this.emitWindowCount() - } + const focus = () => { + const state = this.findAttachedStateByWindowId(windowId) + if (!state) { + return + } + state.updatedAt = Date.now() + this.emitWindowFocused(state.sessionId, windowId) + this.emitWindowUpdated(state.sessionId) + } - private getResolvedWindowState(target?: number | string): BrowserWindowState | null { - if (this.embeddedState) { - if (typeof target === 'number' && target === this.embeddedState.id) { - return this.embeddedState + const show = () => { + const state = this.findAttachedStateByWindowId(windowId) + if (!state) { + return } + this.setSessionVisibility(state, true) + } - if ( - typeof target === 'string' && - target.trim() && - target === this.embeddedState.page.pageId - ) { - return this.embeddedState + const hide = () => { + const state = this.findAttachedStateByWindowId(windowId) + if (!state) { + return } + this.setSessionVisibility(state, false) } - if (typeof target === 'number') { - return this.getWindowStateById(target) + const closed = () => { + const state = this.findAttachedStateByWindowId(windowId) + if (state) { + state.attachedWindowId = null + state.hostReady = false + this.setSessionVisibility(state, false) + } + this.detachHostWindowListeners(windowId) } - if (typeof target === 'string' && target.trim()) { - const windowId = this.pageIdToWindowId.get(target) - return windowId != null ? this.getWindowStateById(windowId) : null + this.hostWindowListeners.set(windowId, { focus, show, hide, closed }) + window.on('focus', focus) + window.on('show', show) + window.on('hide', hide) + window.on('closed', closed) + } + + private detachHostWindowListeners(windowId: number): void { + const listeners = this.hostWindowListeners.get(windowId) + if (!listeners) { + return + } + + const window = BrowserWindow.fromId(windowId) + if (window && !window.isDestroyed()) { + window.removeListener('focus', listeners.focus) + window.removeListener('show', listeners.show) + window.removeListener('hide', listeners.hide) + window.removeListener('closed', listeners.closed) } - const activeFromFocused = this.findFocusedBrowserWindow() - if (activeFromFocused) { - this.activeWindowId = activeFromFocused.id - return activeFromFocused + this.hostWindowListeners.delete(windowId) + } + + private detachOtherSessionBrowsers(hostWindowId: number, exceptSessionId: string): void { + for (const state of this.sessionBrowsers.values()) { + if (state.sessionId === exceptSessionId || state.attachedWindowId !== hostWindowId) { + continue + } + + this.detachFromWindow(state, hostWindowId) + this.markHostNotReady(state) + this.setSessionVisibility(state, false) + state.updatedAt = Date.now() + this.emitWindowUpdated(state.sessionId) } + } - if (this.activeWindowId != null) { - const activeState = this.getWindowStateById(this.activeWindowId) - if (activeState) { - return activeState + private detachFromWindow(state: SessionBrowserState, hostWindowId: number): void { + const window = BrowserWindow.fromId(hostWindowId) + if (window && !window.isDestroyed()) { + try { + window.contentView.removeChildView(state.view) + } catch { + // Ignore already detached view. } } + state.attachedWindowId = null + } - const [latest] = [ - ...(this.embeddedState ? [this.embeddedState] : []), - ...Array.from(this.browserWindows.values()) - ].sort((left, right) => right.updatedAt - left.updatedAt) - return latest ?? null + private findAttachedStateByWindowId(windowId: number): SessionBrowserState | null { + for (const state of this.sessionBrowsers.values()) { + if (state.attachedWindowId === windowId) { + return state + } + } + return null } - private findFocusedBrowserWindow(): BrowserWindowState | null { - const focusedWindow = this.windowPresenter.getFocusedWindow() - if (!focusedWindow || focusedWindow.isDestroyed()) { + private findPreferredSessionState(): SessionBrowserState | null { + const states = [...this.sessionBrowsers.values()] + if (states.length === 0) { return null } - if (this.embeddedState?.id === focusedWindow.id) { - return this.embeddedState + + const visibleState = states.find((state) => state.visible) + if (visibleState) { + return visibleState } - return this.browserWindows.get(focusedWindow.id) ?? null + + return states.sort((left, right) => right.updatedAt - left.updatedAt)[0] ?? null } private resolveHostWindowId(preferredWindowId?: number): number | null { @@ -928,32 +603,10 @@ export class YoBrowserPresenter implements IYoBrowserPresenter { return firstWindow && !firstWindow.isDestroyed() ? firstWindow.id : null } - private findReusableWindow(url: string): BrowserWindowState | null { - if (!url) { - return this.getResolvedWindowState() - } - - try { - const targetHost = new URL(url).hostname - for (const state of this.browserWindows.values()) { - try { - if (new URL(state.page.url).hostname === targetHost) { - return state - } - } catch { - // Ignore invalid URL parsing for existing pages. - } - } - } catch { - // Ignore invalid URL parsing for requested URL. - } - - return this.getResolvedWindowState() - } - - private async waitForEmbeddedHostReady( + private async waitForSessionHostReady( + sessionId: string, hostWindowId: number, - state: BrowserWindowState + state: SessionBrowserState ): Promise { if ( state.hostReady && @@ -967,34 +620,39 @@ export class YoBrowserPresenter implements IYoBrowserPresenter { } this.resolveOrRejectHostReadyWait( - null, + sessionId, new Error( - `Embedded browser host wait was interrupted before host ${hostWindowId} became ready` + `Session browser host wait was interrupted before host ${hostWindowId} became ready` ) ) await new Promise((resolve, reject) => { const timeoutId = setTimeout(() => { const error = new Error( - `Embedded browser host ${hostWindowId} did not become ready within ${this.embeddedHostReadyTimeoutMs}ms` + `Session browser host ${hostWindowId} did not become ready within ${this.embeddedHostReadyTimeoutMs}ms` ) - this.resolveOrRejectHostReadyWait(null, error) + this.resolveOrRejectHostReadyWait(sessionId, error) }, this.embeddedHostReadyTimeoutMs) - this.hostReadyWaiter = { + this.hostReadyWaiters.set(sessionId, { + sessionId, hostWindowId, timeoutId, stableTimerId: null, resolve, reject - } + }) }) } - private scheduleEmbeddedHostReady(windowId: number, bounds: Rectangle): void { - const state = this.embeddedState - const waiter = this.hostReadyWaiter - if (!state || !waiter || waiter.hostWindowId !== windowId) { + private scheduleSessionHostReady( + sessionId: string, + hostWindowId: number, + bounds: Rectangle + ): void { + const state = this.sessionBrowsers.get(sessionId) + const waiter = this.hostReadyWaiters.get(sessionId) + if (!state || !waiter || waiter.hostWindowId !== hostWindowId) { return } @@ -1005,14 +663,14 @@ export class YoBrowserPresenter implements IYoBrowserPresenter { const expectedBoundsKey = this.boundsKey(bounds) waiter.stableTimerId = setTimeout(() => { - const currentState = this.embeddedState - const currentWaiter = this.hostReadyWaiter + const currentState = this.sessionBrowsers.get(sessionId) + const currentWaiter = this.hostReadyWaiters.get(sessionId) if ( !currentState || !currentWaiter || currentWaiter !== waiter || - currentWaiter.hostWindowId !== windowId || - currentState.attachedWindowId !== windowId || + currentWaiter.hostWindowId !== hostWindowId || + currentState.attachedWindowId !== hostWindowId || !currentState.visible || this.boundsKey(currentState.lastBounds) !== expectedBoundsKey ) { @@ -1021,46 +679,72 @@ export class YoBrowserPresenter implements IYoBrowserPresenter { currentState.hostReady = true this.logLifecycle('host ready', { - windowId, + sessionId, + windowId: hostWindowId, pageId: currentState.page.pageId, url: currentState.page.url }) - this.resolveOrRejectHostReadyWait(currentWaiter) + this.resolveOrRejectHostReadyWait(sessionId) }, this.embeddedHostReadyStableMs) } - private markEmbeddedHostNotReady(state: BrowserWindowState): void { + private markHostNotReady(state: SessionBrowserState): void { state.hostReady = false - const waiter = this.hostReadyWaiter + const waiter = this.hostReadyWaiters.get(state.sessionId) if (waiter?.stableTimerId) { clearTimeout(waiter.stableTimerId) waiter.stableTimerId = null } } - private resolveOrRejectHostReadyWait(waiter: HostReadyWaiter | null, error?: Error): void { - const targetWaiter = waiter ?? this.hostReadyWaiter - if (!targetWaiter) { + private resolveOrRejectHostReadyWait(sessionId: string, error?: Error): void { + const waiter = this.hostReadyWaiters.get(sessionId) + if (!waiter) { return } - if (targetWaiter.timeoutId) { - clearTimeout(targetWaiter.timeoutId) + clearTimeout(waiter.timeoutId) + if (waiter.stableTimerId) { + clearTimeout(waiter.stableTimerId) } - if (targetWaiter.stableTimerId) { - clearTimeout(targetWaiter.stableTimerId) + this.hostReadyWaiters.delete(sessionId) + + if (error) { + waiter.reject(error) + return } - if (this.hostReadyWaiter === targetWaiter) { - this.hostReadyWaiter = null + waiter.resolve() + } + + private toStatus(state: SessionBrowserState | null): YoBrowserStatus { + if (!state || state.page.contents.isDestroyed()) { + return { + initialized: false, + page: null, + canGoBack: false, + canGoForward: false, + visible: false, + loading: false + } } - if (error) { - targetWaiter.reject(error) - return + return { + initialized: true, + page: state.page.toPageInfo(), + canGoBack: state.page.contents.navigationHistory.canGoBack(), + canGoForward: state.page.contents.navigationHistory.canGoForward(), + visible: state.visible, + loading: state.page.status === BrowserPageStatus.Loading } + } - targetWaiter.resolve() + private setSessionVisibility(state: SessionBrowserState, visible: boolean): void { + if (state.visible === visible) { + return + } + state.visible = visible + this.emitWindowVisibility(state.sessionId, visible) } private normalizeBounds(bounds: Rectangle): Rectangle { @@ -1079,84 +763,41 @@ export class YoBrowserPresenter implements IYoBrowserPresenter { return `${bounds.x}:${bounds.y}:${bounds.width}:${bounds.height}` } - private async runPageAction( - state: BrowserWindowState, - action: string, - operation: () => Promise - ): Promise { - try { - return await operation() - } catch (error) { - if (error instanceof Error && error.name === 'YoBrowserNotReadyError') { - this.logLifecycle('tool blocked:not-ready', { - windowId: state.id, - pageId: state.page.pageId, - url: state.page.url, - status: state.page.status, - action - }) - } - throw error - } - } - private logLifecycle(message: string, context: Record): void { logger.info(`[YoBrowser] ${message}`, context) } - private toWindowInfo(state: BrowserWindowState): BrowserWindowInfo { - const window = BrowserWindow.fromId(state.id) - return { - id: state.id, - page: state.page.toPageInfo(), - isFocused: Boolean(window && !window.isDestroyed() && window.isFocused()), - isVisible: state.isEmbedded - ? Boolean(state.visible) - : Boolean(window && !window.isDestroyed() && window.isVisible()), - createdAt: state.createdAt, - updatedAt: state.updatedAt - } - } - - private setWindowVisibility(state: BrowserWindowState, visible: boolean): void { - if (state.visible === visible) { - return - } - state.visible = visible - this.emitWindowVisibility(state.id, visible) - } - - private setActiveWindowId(windowId: number | null): void { - this.activeWindowId = windowId - this.emitWindowFocused(windowId) - } - - private emitWindowCreated(state: BrowserWindowState): void { + private emitWindowCreated(sessionId: string): void { eventBus.sendToRenderer(YO_BROWSER_EVENTS.WINDOW_CREATED, SendTarget.ALL_WINDOWS, { - window: this.toWindowInfo(state) + sessionId, + status: this.toStatus(this.sessionBrowsers.get(sessionId) ?? null) }) } - private emitOpenRequested(windowId: number, pageId: string, url: string): void { + private emitOpenRequested(sessionId: string, windowId: number, url: string): void { eventBus.sendToRenderer(YO_BROWSER_EVENTS.OPEN_REQUESTED, SendTarget.ALL_WINDOWS, { + sessionId, windowId, - pageId, url }) } - private emitWindowUpdated(state: BrowserWindowState): void { + private emitWindowUpdated(sessionId: string): void { eventBus.sendToRenderer(YO_BROWSER_EVENTS.WINDOW_UPDATED, SendTarget.ALL_WINDOWS, { - window: this.toWindowInfo(state) + sessionId, + status: this.toStatus(this.sessionBrowsers.get(sessionId) ?? null) }) } - private emitWindowClosed(windowId: number): void { - eventBus.sendToRenderer(YO_BROWSER_EVENTS.WINDOW_CLOSED, SendTarget.ALL_WINDOWS, { windowId }) + private emitWindowClosed(sessionId: string): void { + eventBus.sendToRenderer(YO_BROWSER_EVENTS.WINDOW_CLOSED, SendTarget.ALL_WINDOWS, { + sessionId + }) } - private emitWindowFocused(windowId: number | null): void { + private emitWindowFocused(sessionId: string, windowId: number): void { eventBus.sendToRenderer(YO_BROWSER_EVENTS.WINDOW_FOCUSED, SendTarget.ALL_WINDOWS, { + sessionId, windowId }) } @@ -1165,53 +806,14 @@ export class YoBrowserPresenter implements IYoBrowserPresenter { eventBus.sendToRenderer( YO_BROWSER_EVENTS.WINDOW_COUNT_CHANGED, SendTarget.ALL_WINDOWS, - this.browserWindows.size + (this.embeddedState ? 1 : 0) + this.sessionBrowsers.size ) } - private emitWindowVisibility(windowId: number, visible: boolean): void { + private emitWindowVisibility(sessionId: string, visible: boolean): void { eventBus.sendToRenderer(YO_BROWSER_EVENTS.WINDOW_VISIBILITY_CHANGED, SendTarget.ALL_WINDOWS, { - windowId, + sessionId, visible }) } - - private async destroyEmbeddedState(emitClosed: boolean): Promise { - const state = this.embeddedState - if (!state) { - return - } - - this.resolveOrRejectHostReadyWait( - null, - new Error(`Embedded browser window ${state.id} was destroyed before it became ready`) - ) - - await this.detachEmbedded() - state.page.destroy() - this.viewIdToWindowId.delete(state.viewId) - this.pageIdToWindowId.delete(state.page.pageId) - - if (state.view && !state.view.webContents.isDestroyed()) { - try { - state.view.webContents.close() - } catch { - // Ignore view shutdown failures. - } - } - - const closedWindowId = state.id - this.embeddedState = null - - if (this.activeWindowId === closedWindowId) { - this.activeWindowId = this.getResolvedWindowState()?.id ?? null - this.emitWindowFocused(this.activeWindowId) - } - - if (emitClosed) { - this.emitWindowClosed(closedWindowId) - } - - this.emitWindowCount() - } } diff --git a/src/main/presenter/browser/YoBrowserToolDefinitions.ts b/src/main/presenter/browser/YoBrowserToolDefinitions.ts index b9716500b..40b373509 100644 --- a/src/main/presenter/browser/YoBrowserToolDefinitions.ts +++ b/src/main/presenter/browser/YoBrowserToolDefinitions.ts @@ -3,18 +3,11 @@ import { zodToJsonSchema } from 'zod-to-json-schema' import type { MCPToolDefinition } from '@shared/presenter' const yoBrowserSchemas = { - window_list: z.object({}), - window_open: z.object({ - url: z.string().url().optional().describe('Optional URL to open in the new browser window') - }), - window_focus: z.object({ - windowId: z.number().int().positive().describe('Browser window ID') - }), - window_close: z.object({ - windowId: z.number().int().positive().describe('Browser window ID') + get_browser_status: z.object({}), + load_url: z.object({ + url: z.string().url().describe('URL to load in the session browser') }), cdp_send: z.object({ - windowId: z.number().int().positive().optional().describe('Optional browser window ID'), method: z .enum([ 'Page.navigate', @@ -36,6 +29,8 @@ const yoBrowserSchemas = { }) } +export const YO_BROWSER_TOOL_NAMES = ['load_url', 'get_browser_status', 'cdp_send'] as const + function asParameters(schema: z.ZodTypeAny) { return zodToJsonSchema(schema) as { type: string @@ -63,28 +58,18 @@ function toDefinition(name: string, description: string, schema: z.ZodTypeAny): export function getYoBrowserToolDefinitions(): MCPToolDefinition[] { return [ toDefinition( - 'yo_browser_window_list', - 'List all browser windows and identify the active window', - yoBrowserSchemas.window_list - ), - toDefinition( - 'yo_browser_window_open', - 'Open the embedded browser in the current chat window side panel and optionally navigate to a URL', - yoBrowserSchemas.window_open - ), - toDefinition( - 'yo_browser_window_focus', - 'Focus an existing browser window', - yoBrowserSchemas.window_focus + 'get_browser_status', + 'Get the current session browser status', + yoBrowserSchemas.get_browser_status ), toDefinition( - 'yo_browser_window_close', - 'Close an existing browser window', - yoBrowserSchemas.window_close + 'load_url', + 'Create the session browser on demand and load a URL into it', + yoBrowserSchemas.load_url ), toDefinition( - 'yo_browser_cdp_send', - 'Send a Chrome DevTools Protocol (CDP) command to a browser window page', + 'cdp_send', + 'Send a Chrome DevTools Protocol (CDP) command to the current session browser page', yoBrowserSchemas.cdp_send ) ] diff --git a/src/main/presenter/browser/YoBrowserToolHandler.ts b/src/main/presenter/browser/YoBrowserToolHandler.ts index 3c4653e7a..0ab16363d 100644 --- a/src/main/presenter/browser/YoBrowserToolHandler.ts +++ b/src/main/presenter/browser/YoBrowserToolHandler.ts @@ -13,68 +13,55 @@ export class YoBrowserToolHandler { return getYoBrowserToolDefinitions() } - async callTool(toolName: string, args: Record): Promise { + async callTool( + toolName: string, + args: Record, + conversationId?: string + ): Promise { try { + const sessionId = conversationId?.trim() + if (!sessionId) { + throw new Error('conversationId is required for YoBrowser tools') + } + switch (toolName) { - case 'yo_browser_window_list': - case 'yo_browser_tab_list': - return await this.handleWindowList() - case 'yo_browser_window_open': - case 'yo_browser_tab_new': { - const url = typeof args.url === 'string' ? args.url : undefined - return await this.handleWindowOpen(url) - } - case 'yo_browser_window_focus': { - const windowId = typeof args.windowId === 'number' ? args.windowId : null - if (windowId == null) { - throw new Error('windowId is required') + case 'get_browser_status': + return JSON.stringify(await this.presenter.getBrowserStatus(sessionId)) + case 'load_url': { + const url = typeof args.url === 'string' ? args.url : '' + if (!url) { + throw new Error('url is required') } - return await this.handleWindowFocus(windowId) + return JSON.stringify(await this.presenter.loadUrl(sessionId, url)) } - case 'yo_browser_window_close': { - const windowId = typeof args.windowId === 'number' ? args.windowId : null - if (windowId == null) { - throw new Error('windowId is required') + case 'cdp_send': { + const method = typeof args.method === 'string' ? args.method : '' + if (!method) { + throw new Error('CDP method is required') } - return await this.handleWindowClose(windowId) - } - case 'yo_browser_tab_activate': { - const pageId = - typeof args.pageId === 'string' - ? args.pageId - : typeof args.tabId === 'string' - ? args.tabId - : '' - if (!pageId) { - throw new Error('pageId is required') + + const page = await this.presenter.getBrowserPage(sessionId) + if (!page) { + throw new Error(`Session browser for ${sessionId} is not initialized`) } - await this.presenter.activateTab(pageId) - return JSON.stringify({ success: true, pageId }) - } - case 'yo_browser_tab_close': { - const pageId = - typeof args.pageId === 'string' - ? args.pageId - : typeof args.tabId === 'string' - ? args.tabId - : '' - if (!pageId) { - throw new Error('pageId is required') + + try { + const params = this.normalizeCdpParams(args.params) + const response = await this.presenter.sendCdpCommand(sessionId, method, params) + return JSON.stringify(response ?? {}) + } catch (error) { + if (error instanceof Error && error.name === 'YoBrowserNotReadyError') { + logger.warn('[YoBrowser] tool blocked:not-ready', { + toolName: 'cdp_send', + sessionId, + method, + pageId: page.id, + url: page.url, + status: page.status + }) + } + throw error } - await this.presenter.closeTab(pageId) - return JSON.stringify({ success: true, pageId }) - } - case 'yo_browser_cdp_send': { - const windowId = typeof args.windowId === 'number' ? args.windowId : undefined - const pageId = - typeof args.pageId === 'string' - ? args.pageId - : typeof args.tabId === 'string' - ? args.tabId - : undefined - const method = typeof args.method === 'string' ? args.method : '' - const params = this.normalizeCdpParams(args.params) - return await this.handleCdpSend(windowId ?? pageId, method, params) } default: throw new Error(`Unknown YoBrowser tool: ${toolName}`) @@ -85,61 +72,6 @@ export class YoBrowserToolHandler { } } - private async handleWindowList(): Promise { - const snapshot = await this.presenter.getBrowserContext() - return JSON.stringify(snapshot) - } - - private async handleWindowOpen(url?: string): Promise { - const browserWindow = await this.presenter.openWindow(url) - if (!browserWindow) { - throw new Error('Failed to open browser window') - } - return JSON.stringify(browserWindow) - } - - private async handleWindowFocus(windowId: number): Promise { - await this.presenter.focusWindow(windowId) - return JSON.stringify({ success: true, windowId }) - } - - private async handleWindowClose(windowId: number): Promise { - await this.presenter.closeWindow(windowId) - return JSON.stringify({ success: true, windowId }) - } - - private async handleCdpSend( - target: number | string | undefined, - method: string, - params: Record - ): Promise { - if (!method) { - throw new Error('CDP method is required') - } - - const browserPage = await this.presenter.getBrowserTab(target) - if (!browserPage) { - throw new Error(`Browser target ${String(target)} not found`) - } - - try { - const response = await browserPage.sendCdpCommand(method, params) - return JSON.stringify(response ?? {}) - } catch (error) { - if (error instanceof Error && error.name === 'YoBrowserNotReadyError') { - logger.warn('[YoBrowser] tool blocked:not-ready', { - toolName: 'yo_browser_cdp_send', - target: target ?? 'active', - method, - pageId: browserPage.pageId, - url: browserPage.url, - status: browserPage.status - }) - } - throw error - } - } - private normalizeCdpParams(value: unknown): Record { if (typeof value === 'object' && value !== null && !Array.isArray(value)) { return value as Record diff --git a/src/main/presenter/deepchatAgentPresenter/toolOutputGuard.ts b/src/main/presenter/deepchatAgentPresenter/toolOutputGuard.ts index c8c603885..a9e44b255 100644 --- a/src/main/presenter/deepchatAgentPresenter/toolOutputGuard.ts +++ b/src/main/presenter/deepchatAgentPresenter/toolOutputGuard.ts @@ -8,7 +8,7 @@ import { estimateMessagesTokens } from './contextBuilder' const TOOL_OUTPUT_OFFLOAD_THRESHOLD = 5000 const TOOL_OUTPUT_PREVIEW_LENGTH = 1024 -const TOOLS_REQUIRING_OFFLOAD = new Set(['exec', 'ls', 'find', 'grep']) +const TOOLS_REQUIRING_OFFLOAD = new Set(['exec', 'ls', 'find', 'grep', 'cdp_send']) type ToolMessageUpdateMode = 'append' | 'replace' @@ -208,7 +208,7 @@ export class ToolOutputGuard { } private requiresOffload(toolName: string): boolean { - return TOOLS_REQUIRING_OFFLOAD.has(toolName) || toolName.startsWith('yo_browser_') + return TOOLS_REQUIRING_OFFLOAD.has(toolName) } private withToolMessage( diff --git a/src/main/presenter/newAgentPresenter/index.ts b/src/main/presenter/newAgentPresenter/index.ts index 72aa06947..7949c14bd 100644 --- a/src/main/presenter/newAgentPresenter/index.ts +++ b/src/main/presenter/newAgentPresenter/index.ts @@ -57,6 +57,11 @@ import { import { rtkRuntimeService } from '@/lib/agentRuntime/rtkRuntimeService' const RETIRED_DEFAULT_AGENT_TOOLS = new Set(['find', 'grep', 'ls']) +const LEGACY_AGENT_TOOL_NAME_MAP: Record = { + yo_browser_cdp_send: 'cdp_send', + yo_browser_window_open: 'load_url', + yo_browser_window_list: 'get_browser_status' +} export class NewAgentPresenter { private agentRegistry: AgentRegistry @@ -1668,6 +1673,7 @@ export class NewAgentPresenter { disabledAgentTools .filter((item): item is string => typeof item === 'string') .map((item) => item.trim()) + .map((item) => LEGACY_AGENT_TOOL_NAME_MAP[item] ?? item) .filter((item) => Boolean(item) && !RETIRED_DEFAULT_AGENT_TOOLS.has(item)) ) ).sort((left, right) => left.localeCompare(right)) diff --git a/src/main/presenter/tabPresenter.ts b/src/main/presenter/tabPresenter.ts index b570e32b4..1c4e005ca 100644 --- a/src/main/presenter/tabPresenter.ts +++ b/src/main/presenter/tabPresenter.ts @@ -73,19 +73,6 @@ export class TabPresenter implements ITabPresenter { }) } - setTabBrowserId(tabId: number, browserTabId: string): void { - const state = this.tabState.get(tabId) - if (state) { - state.browserTabId = browserTabId - const windowId = this.tabWindowMap.get(tabId) - if (windowId !== undefined) { - this.notifyWindowTabsUpdate(windowId).catch((error) => { - console.warn(`Failed to sync browser tab id for window ${windowId}:`, error) - }) - } - } - } - private onWindowSizeChange(windowId: number) { const views = this.windowTabs.get(windowId) const window = BrowserWindow.fromId(windowId) @@ -638,15 +625,6 @@ export class TabPresenter implements ITabPresenter { ): void { // 处理外部链接 webContents.setWindowOpenHandler(({ url }) => { - const state = this.tabState.get(tabId) - // 如果是 browser tab,在当前 tab 导航 - if (state?.browserTabId) { - presenter.yoBrowserPresenter.navigateTab(state.browserTabId, url).catch((error: Error) => { - console.error(`[TabPresenter] Failed to navigate browser tab:`, error) - }) - return { action: 'deny' } - } - // Chat tab: 使用系统默认浏览器打开链接 shell.openExternal(url) return { action: 'deny' } }) diff --git a/src/main/presenter/toolPresenter/index.ts b/src/main/presenter/toolPresenter/index.ts index 68fc9bdd1..09c99bdc6 100644 --- a/src/main/presenter/toolPresenter/index.ts +++ b/src/main/presenter/toolPresenter/index.ts @@ -12,6 +12,7 @@ import { AgentToolManager, type AgentToolCallResult } from '../agentPresenter/ac import type { AgentToolRuntimePort } from '../agentPresenter/runtimePorts' import { jsonrepair } from 'jsonrepair' import { CommandPermissionService } from '../permission' +import { YO_BROWSER_TOOL_NAMES } from '../browser/YoBrowserToolDefinitions' interface PreCheckedPermissionResult { needsPermission: true @@ -64,7 +65,8 @@ interface ToolPresenterOptions { } const FILESYSTEM_TOOL_ORDER = ['read', 'write', 'edit', 'exec', 'process'] -const OFFLOAD_TOOL_NAMES = new Set(['exec', 'yo_browser_cdp_send']) +const OFFLOAD_TOOL_NAMES = new Set(['exec', 'cdp_send']) +const RESERVED_AGENT_TOOL_NAMES = new Set(YO_BROWSER_TOOL_NAMES) const withToolSource = (tools: MCPToolDefinition[], source: 'mcp' | 'agent'): MCPToolDefinition[] => tools.map((tool) => ({ @@ -122,7 +124,9 @@ export class ToolPresenter implements IToolPresenter { // 1. Get MCP tools const mcpDefs = withToolSource( - await this.options.mcpPresenter.getAllToolDefinitions(context.enabledMcpTools), + (await this.options.mcpPresenter.getAllToolDefinitions(context.enabledMcpTools)).filter( + (tool) => !RESERVED_AGENT_TOOL_NAMES.has(tool.function.name) + ), 'mcp' ) defs.push(...mcpDefs) @@ -450,18 +454,14 @@ export class ToolPresenter implements IToolPresenter { `Available YoBrowser tools: ${tools.map((tool) => `\`${tool.function.name}\``).join(', ')}.` ] - if (toolNames.has('yo_browser_window_list')) { - lines.push('- Use `yo_browser_window_list` to inspect current browser windows before acting.') + if (toolNames.has('get_browser_status')) { + lines.push('- Use `get_browser_status` to inspect the current session browser state.') } - if (toolNames.has('yo_browser_window_open')) { - lines.push( - '- Use `yo_browser_window_open` when you need a browser window for web exploration.' - ) + if (toolNames.has('load_url')) { + lines.push('- Use `load_url` to lazily create the session browser and navigate to a page.') } - if (toolNames.has('yo_browser_cdp_send')) { - lines.push( - '- Use `yo_browser_cdp_send` for DOM inspection, scripted interaction, and screenshots.' - ) + if (toolNames.has('cdp_send')) { + lines.push('- Use `cdp_send` for DOM inspection, scripted interaction, and screenshots.') } return lines.join('\n') diff --git a/src/main/presenter/windowPresenter/index.ts b/src/main/presenter/windowPresenter/index.ts index bae060e41..09860a09e 100644 --- a/src/main/presenter/windowPresenter/index.ts +++ b/src/main/presenter/windowPresenter/index.ts @@ -520,7 +520,7 @@ export class WindowPresenter implements IWindowPresenter { public async createBrowserWindow(options?: { x?: number; y?: number }): Promise { return await this.createManagedWindow({ - windowType: 'browser', + windowType: 'chat', x: options?.x, y: options?.y }) @@ -558,13 +558,11 @@ export class WindowPresenter implements IWindowPresenter { x?: number // 初始 X 坐标 y?: number // 初始 Y 坐标 }): Promise { - const windowType = options?.windowType ?? 'chat' - // 根据平台选择图标 const iconFile = nativeImage.createFromPath(process.platform === 'win32' ? iconWin : icon) - // 根据窗口类型设置默认宽度 - const defaultWidth = windowType === 'browser' ? 600 : 800 + // Standalone browser shell has been removed. All managed windows now use chat shell sizing. + const defaultWidth = 800 const defaultHeight = 620 // 使用窗口状态管理器恢复位置和尺寸 @@ -625,10 +623,6 @@ export class WindowPresenter implements IWindowPresenter { const windowId = appWindow.id this.windows.set(windowId, appWindow) // 将窗口实例存入 Map - // For browser windows, register type with TabPresenter - if (windowType === 'browser') { - ;(presenter.tabPresenter as TabPresenter).setWindowType(windowId, windowType) - } managedWindowState.manage(appWindow) // 管理窗口状态 @@ -647,14 +641,8 @@ export class WindowPresenter implements IWindowPresenter { appWindow.on('ready-to-show', () => { console.log(`Window ${windowId} is ready to show.`) if (!appWindow.isDestroyed()) { - // For browser windows, don't auto-show/focus to prevent stealing focus from chat windows - // Browser windows should only be shown when explicitly requested by user (e.g., clicking browser button) - const shouldAutoShow = windowType !== 'browser' || options?.forMovedTab === true - - if (shouldAutoShow) { - appWindow.show() - appWindow.focus() - } + appWindow.show() + appWindow.focus() eventBus.sendToMain(WINDOW_EVENTS.WINDOW_CREATED, windowId) } else { console.warn(`Window ${windowId} was destroyed before ready-to-show.`) @@ -842,89 +830,17 @@ export class WindowPresenter implements IWindowPresenter { }) // --- 加载 Renderer HTML 文件 --- - if (windowType === 'chat') { - // Chat windows load the main renderer directly with #/chat hash route - if (is.dev && process.env['ELECTRON_RENDERER_URL']) { - console.log( - `Loading main renderer URL in dev mode: ${process.env['ELECTRON_RENDERER_URL']}#/chat` - ) - appWindow.loadURL(process.env['ELECTRON_RENDERER_URL'] + '#/chat') - } else { - console.log( - `Loading packaged main renderer file: ${join(__dirname, '../renderer/index.html')}` - ) - appWindow.loadFile(join(__dirname, '../renderer/index.html'), { hash: '/chat' }) - } + // Standalone browser renderer has been removed. All windows load the main chat shell. + if (is.dev && process.env['ELECTRON_RENDERER_URL']) { + console.log( + `Loading main renderer URL in dev mode: ${process.env['ELECTRON_RENDERER_URL']}#/chat` + ) + appWindow.loadURL(process.env['ELECTRON_RENDERER_URL'] + '#/chat') } else { - // Browser windows load the dedicated browser renderer - if (is.dev && process.env['ELECTRON_RENDERER_URL']) { - console.log( - `Loading renderer URL in dev mode: ${process.env['ELECTRON_RENDERER_URL']}/browser/index.html` - ) - appWindow.loadURL(process.env['ELECTRON_RENDERER_URL'] + '/browser/index.html') - } else { - console.log( - `Loading packaged renderer file: ${join(__dirname, '../renderer/browser/index.html')}` - ) - appWindow.loadFile(join(__dirname, '../renderer/browser/index.html')) - } - } - - // --- 处理 browser 窗口的初始标签页创建或激活 --- - // Only browser windows need initial tab / activateTab handling via TabPresenter - if (windowType === 'browser') { - if (options?.initialTab) { - appWindow.webContents.once('did-finish-load', async () => { - console.log(`Window ${windowId} did-finish-load, checking for initial tab creation.`) - if (appWindow.isDestroyed()) { - console.warn( - `Window ${windowId} was destroyed before did-finish-load callback, cannot create initial tab.` - ) - return - } - appWindow.focus() - try { - console.log(`Creating initial browser view, URL: ${options.initialTab!.url}`) - const viewId = await (presenter.tabPresenter as TabPresenter).createTab( - windowId, - options.initialTab!.url, - { active: true } - ) - if (viewId === null) { - console.error(`Failed to create initial browser view in new window ${windowId}.`) - } else { - console.log(`Created initial browser view ${viewId} in window ${windowId}.`) - } - } catch (error) { - console.error(`Error creating initial browser view:`, error) - } - }) - } - - if (options?.activateTabId !== undefined && !options?.forMovedTab) { - appWindow.webContents.once('did-finish-load', async () => { - console.log( - `Window ${windowId} did-finish-load, attempting to activate tab ${options.activateTabId}.` - ) - if (appWindow.isDestroyed()) { - console.warn( - `Window ${windowId} was destroyed before did-finish-load callback, cannot activate tab ${options.activateTabId}.` - ) - return - } - try { - await (presenter.tabPresenter as TabPresenter).switchTab( - options.activateTabId as number - ) - console.log(`Requested to switch to tab ${options.activateTabId}.`) - } catch (error) { - console.error( - `Failed to activate tab ${options.activateTabId} after window ${windowId} load:`, - error - ) - } - }) - } + console.log( + `Loading packaged main renderer file: ${join(__dirname, '../renderer/index.html')}` + ) + appWindow.loadFile(join(__dirname, '../renderer/index.html'), { hash: '/chat' }) } // DevTools 不再自动打开,需要手动通过菜单或快捷键打开 diff --git a/src/renderer/browser/App.vue b/src/renderer/browser/App.vue deleted file mode 100644 index 510ce1a9c..000000000 --- a/src/renderer/browser/App.vue +++ /dev/null @@ -1,79 +0,0 @@ - - - - - diff --git a/src/renderer/browser/components/AppBar.vue b/src/renderer/browser/components/AppBar.vue deleted file mode 100644 index 1693b753b..000000000 --- a/src/renderer/browser/components/AppBar.vue +++ /dev/null @@ -1,173 +0,0 @@ - - - - - diff --git a/src/renderer/browser/components/BrowserToolbar.vue b/src/renderer/browser/components/BrowserToolbar.vue deleted file mode 100644 index 583279f6d..000000000 --- a/src/renderer/browser/components/BrowserToolbar.vue +++ /dev/null @@ -1,151 +0,0 @@ - - - diff --git a/src/renderer/browser/components/icons/CloseIcon.vue b/src/renderer/browser/components/icons/CloseIcon.vue deleted file mode 100644 index 4f25c7cc5..000000000 --- a/src/renderer/browser/components/icons/CloseIcon.vue +++ /dev/null @@ -1,19 +0,0 @@ - - - diff --git a/src/renderer/browser/components/icons/MaximizeIcon.vue b/src/renderer/browser/components/icons/MaximizeIcon.vue deleted file mode 100644 index 0ac671a62..000000000 --- a/src/renderer/browser/components/icons/MaximizeIcon.vue +++ /dev/null @@ -1,20 +0,0 @@ - - - diff --git a/src/renderer/browser/components/icons/MinimizeIcon.vue b/src/renderer/browser/components/icons/MinimizeIcon.vue deleted file mode 100644 index 77f72b0b7..000000000 --- a/src/renderer/browser/components/icons/MinimizeIcon.vue +++ /dev/null @@ -1,19 +0,0 @@ - - - diff --git a/src/renderer/browser/components/icons/RestoreIcon.vue b/src/renderer/browser/components/icons/RestoreIcon.vue deleted file mode 100644 index be39ef86c..000000000 --- a/src/renderer/browser/components/icons/RestoreIcon.vue +++ /dev/null @@ -1,18 +0,0 @@ - - diff --git a/src/renderer/browser/index.html b/src/renderer/browser/index.html deleted file mode 100644 index 79b94675f..000000000 --- a/src/renderer/browser/index.html +++ /dev/null @@ -1,14 +0,0 @@ - - - - - DeepChat - Browser - - - - - -
- - - diff --git a/src/renderer/browser/lib/events.ts b/src/renderer/browser/lib/events.ts deleted file mode 100644 index bbcd710b8..000000000 --- a/src/renderer/browser/lib/events.ts +++ /dev/null @@ -1,20 +0,0 @@ -export const WINDOW_EVENTS = { - READY_TO_SHOW: 'window:ready-to-show', // 替代 main-window-ready-to-show - FORCE_QUIT_APP: 'window:force-quit-app', // 替代 force-quit-app - SET_APPLICATION_QUITTING: 'window:set-application-quitting', // 设置应用退出状态 - APP_FOCUS: 'app:focus', - APP_BLUR: 'app:blur', - WINDOW_MAXIMIZED: 'window:maximized', - WINDOW_UNMAXIMIZED: 'window:unmaximized', - WINDOW_RESIZED: 'window:resized', - WINDOW_RESIZE: 'window:resize', - WINDOW_CLOSE: 'window:close', - WINDOW_CREATED: 'window:created', - WINDOW_FOCUSED: 'window:focused', - WINDOW_BLURRED: 'window:blurred', - WINDOW_ENTER_FULL_SCREEN: 'window:enter-full-screen', - WINDOW_LEAVE_FULL_SCREEN: 'window:leave-full-screen', - WINDOW_CLOSED: 'window:closed', - FIRST_CONTENT_LOADED: 'window:first-content-loaded', // 新增:首次内容加载完成事件 - WINDOW_RESTORED: 'window:restored' -} diff --git a/src/renderer/browser/main.ts b/src/renderer/browser/main.ts deleted file mode 100644 index fd2b5ca2d..000000000 --- a/src/renderer/browser/main.ts +++ /dev/null @@ -1,27 +0,0 @@ -import '@/assets/main.css' -import { addCollection } from '@iconify/vue' -import lucideIcons from '@iconify-json/lucide/icons.json' -import vscodeIcons from '@iconify-json/vscode-icons/icons.json' -import { createPinia } from 'pinia' -import { createApp } from 'vue' -import App from './App.vue' - -import { createI18n } from 'vue-i18n' -import locales from '@/i18n' - -const i18n = createI18n({ - locale: 'zh-CN', - fallbackLocale: 'en-US', - legacy: false, - messages: locales -}) -// Add icon collections to local registry -addCollection(lucideIcons) -addCollection(vscodeIcons) -const pinia = createPinia() - -const app = createApp(App) - -app.use(pinia) -app.use(i18n) -app.mount('#app') diff --git a/src/renderer/browser/stores/window.ts b/src/renderer/browser/stores/window.ts deleted file mode 100644 index 265dd0334..000000000 --- a/src/renderer/browser/stores/window.ts +++ /dev/null @@ -1,90 +0,0 @@ -import { computed, ref } from 'vue' -import { defineStore } from 'pinia' -import type { BrowserWindowInfo } from '@shared/types/browser' -import { YO_BROWSER_EVENTS } from '@/events' -import { usePresenter } from '@/composables/usePresenter' - -const WINDOW_EVENT_CHANNELS = [ - YO_BROWSER_EVENTS.WINDOW_CREATED, - YO_BROWSER_EVENTS.WINDOW_UPDATED, - YO_BROWSER_EVENTS.WINDOW_CLOSED, - YO_BROWSER_EVENTS.WINDOW_FOCUSED, - YO_BROWSER_EVENTS.WINDOW_VISIBILITY_CHANGED, - YO_BROWSER_EVENTS.WINDOW_COUNT_CHANGED -] - -function resolveWindowId(payload: unknown): number | null { - if (typeof payload === 'number') { - return payload - } - - if (!payload || typeof payload !== 'object') { - return null - } - - if ('windowId' in payload && typeof payload.windowId === 'number') { - return payload.windowId - } - - if ('id' in payload && typeof payload.id === 'number') { - return payload.id - } - - if ( - 'window' in payload && - payload.window && - typeof payload.window === 'object' && - 'id' in payload.window && - typeof payload.window.id === 'number' - ) { - return payload.window.id - } - - return null -} - -export const useBrowserWindowStore = defineStore('browserWindow', () => { - const yoBrowserPresenter = usePresenter('yoBrowserPresenter') - const windowId = ref(null) - const browserWindow = ref(null) - const initialized = ref(false) - - const page = computed(() => browserWindow.value?.page ?? null) - const isAboutBlank = computed(() => page.value?.url === 'about:blank') - - const loadState = async () => { - if (windowId.value == null) return - browserWindow.value = await yoBrowserPresenter.getWindowById(windowId.value) - } - - const handleWindowEvent = async (_event: unknown, payload: unknown) => { - const changedWindowId = resolveWindowId(payload) - if (changedWindowId === null || changedWindowId === windowId.value) { - await loadState() - } - } - - const init = async () => { - if (initialized.value) return - initialized.value = true - windowId.value = window.api.getWindowId?.() ?? null - await loadState() - - if (!window?.electron?.ipcRenderer) { - return - } - - WINDOW_EVENT_CHANNELS.forEach((channel) => { - window.electron.ipcRenderer.on(channel, handleWindowEvent) - }) - } - - return { - windowId, - browserWindow, - page, - isAboutBlank, - init, - loadState - } -}) diff --git a/src/renderer/src/components/sidepanel/BrowserPanel.vue b/src/renderer/src/components/sidepanel/BrowserPanel.vue index 2ecddceb5..bed3ebc82 100644 --- a/src/renderer/src/components/sidepanel/BrowserPanel.vue +++ b/src/renderer/src/components/sidepanel/BrowserPanel.vue @@ -57,30 +57,47 @@ import { Icon } from '@iconify/vue' import { useI18n } from 'vue-i18n' import { Button } from '@shadcn/components/ui/button' import { Input } from '@shadcn/components/ui/input' -import BrowserPlaceholder from '@browser/components/BrowserPlaceholder.vue' -import type { BrowserWindowInfo } from '@shared/types/browser' +import BrowserPlaceholder from './BrowserPlaceholder.vue' +import type { YoBrowserStatus } from '@shared/types/browser' import { usePresenter } from '@/composables/usePresenter' import { YO_BROWSER_EVENTS } from '@/events' import { useSidepanelStore } from '@/stores/ui/sidepanel' +import { useSessionStore } from '@/stores/ui/session' + +const props = defineProps<{ + sessionId: string | null +}>() const { t } = useI18n() const sidepanelStore = useSidepanelStore() +const sessionStore = useSessionStore() const yoBrowserPresenter = usePresenter('yoBrowserPresenter') const containerRef = ref(null) const hostWindowId = ref(null) -const browserWindowId = ref(null) +const browserStatus = ref({ + initialized: false, + page: null, + canGoBack: false, + canGoForward: false, + visible: false, + loading: false +}) const currentUrl = ref('about:blank') const urlInput = ref('') const canGoBack = ref(false) const canGoForward = ref(false) const lastSyncedBounds = ref(null) +const pendingBrowserDestroySessionIds = new Set() let visibilityRunId = 0 const STABLE_RECT_SAMPLE_MS = 48 const STABLE_RECT_TIMEOUT_MS = 1500 -const showPlaceholder = computed(() => currentUrl.value === 'about:blank') +const currentSessionId = computed(() => props.sessionId?.trim() || '') +const showPlaceholder = computed( + () => !browserStatus.value.initialized || currentUrl.value === 'about:blank' +) const isBrowserPanelVisible = computed( () => sidepanelStore.open && sidepanelStore.activeTab === 'browser' ) @@ -107,34 +124,41 @@ const callPresenter = async ( return result as T | null } -const resolveWindowId = (payload: unknown): number | null => { - if (typeof payload === 'number') { - return payload +const resolvePayloadSessionId = (payload: unknown): string => { + if (!payload || typeof payload !== 'object') { + return '' } + const typedPayload = payload as { sessionId?: unknown } + return typeof typedPayload.sessionId === 'string' ? typedPayload.sessionId : '' +} + +const resolvePayloadWindowId = (payload: unknown): number | null => { if (!payload || typeof payload !== 'object') { return null } - if ('windowId' in payload && typeof payload.windowId === 'number') { - return payload.windowId - } - - if ( - 'window' in payload && - payload.window && - typeof payload.window === 'object' && - 'id' in payload.window && - typeof payload.window.id === 'number' - ) { - return payload.window.id - } + const typedPayload = payload as { windowId?: unknown } + return typeof typedPayload.windowId === 'number' ? typedPayload.windowId : null +} - return null +const getSessionUiStatus = (sessionId: string) => { + return sessionStore.sessions.find((session) => session.id === sessionId)?.status ?? null } -const isCurrentHostWindow = (windowId: number | null) => { - return windowId != null && hostWindowId.value != null && windowId === hostWindowId.value +const resetBrowserState = () => { + browserStatus.value = { + initialized: false, + page: null, + canGoBack: false, + canGoForward: false, + visible: false, + loading: false + } + currentUrl.value = 'about:blank' + urlInput.value = '' + canGoBack.value = false + canGoForward.value = false } const captureContainerBounds = (): Rectangle | null => { @@ -187,35 +211,39 @@ const waitForStableRect = async (runId: number): Promise => { return null } -const loadState = async () => { - if (hostWindowId.value == null) { +const loadState = async (sessionId: string = currentSessionId.value) => { + if (!sessionId) { + resetBrowserState() return } - const browserWindow = await callPresenter( - 'getWindowById', - yoBrowserPresenter.getWindowById(hostWindowId.value) + const status = await callPresenter( + 'getBrowserStatus', + yoBrowserPresenter.getBrowserStatus(sessionId) ) - browserWindowId.value = browserWindow?.id ?? null - currentUrl.value = browserWindow?.page.url || 'about:blank' - urlInput.value = currentUrl.value === 'about:blank' ? '' : currentUrl.value + if (sessionId !== currentSessionId.value) { + return + } - if (browserWindowId.value == null) { - canGoBack.value = false - canGoForward.value = false + if (!status) { + resetBrowserState() return } - const navigationState = await callPresenter<{ canGoBack: boolean; canGoForward: boolean }>( - 'getNavigationState', - yoBrowserPresenter.getNavigationState(browserWindowId.value) - ) - canGoBack.value = Boolean(navigationState?.canGoBack) - canGoForward.value = Boolean(navigationState?.canGoForward) + browserStatus.value = status + currentUrl.value = status.page?.url || 'about:blank' + urlInput.value = currentUrl.value === 'about:blank' ? '' : currentUrl.value + canGoBack.value = status.canGoBack + canGoForward.value = status.canGoForward } const syncVisibleBounds = async () => { - if (hostWindowId.value == null || browserWindowId.value == null || !isBrowserPanelVisible.value) { + if ( + hostWindowId.value == null || + !currentSessionId.value || + !browserStatus.value.initialized || + !isBrowserPanelVisible.value + ) { return } @@ -226,15 +254,20 @@ const syncVisibleBounds = async () => { lastSyncedBounds.value = rect await callPresenter( - 'updateEmbeddedBounds', - yoBrowserPresenter.updateEmbeddedBounds(hostWindowId.value, rect, true) + 'updateSessionBrowserBounds', + yoBrowserPresenter.updateSessionBrowserBounds( + currentSessionId.value, + hostWindowId.value, + rect, + true + ) ) } -const hideEmbedded = async () => { +const hideEmbedded = async (sessionId: string = currentSessionId.value) => { visibilityRunId += 1 - if (hostWindowId.value == null || browserWindowId.value == null) { + if (!sessionId) { return } @@ -246,15 +279,27 @@ const hideEmbedded = async () => { height: 0 } - await callPresenter( - 'updateEmbeddedBounds(hidden)', - yoBrowserPresenter.updateEmbeddedBounds(hostWindowId.value, hiddenBounds, false) - ) - await callPresenter('detachEmbedded', yoBrowserPresenter.detachEmbedded()) + if (hostWindowId.value != null) { + await callPresenter( + 'updateSessionBrowserBounds(hidden)', + yoBrowserPresenter.updateSessionBrowserBounds( + sessionId, + hostWindowId.value, + hiddenBounds, + false + ) + ) + } + await callPresenter('detachSessionBrowser', yoBrowserPresenter.detachSessionBrowser(sessionId)) } const ensureVisibleAttachment = async () => { - if (hostWindowId.value == null || !isBrowserPanelVisible.value) { + if ( + hostWindowId.value == null || + !currentSessionId.value || + !browserStatus.value.initialized || + !isBrowserPanelVisible.value + ) { return } @@ -271,41 +316,51 @@ const ensureVisibleAttachment = async () => { return } - const attachedWindowId = await callPresenter( - 'attachEmbeddedToWindow', - yoBrowserPresenter.attachEmbeddedToWindow(hostWindowId.value) + const attached = await callPresenter( + 'attachSessionBrowser', + yoBrowserPresenter.attachSessionBrowser(currentSessionId.value, hostWindowId.value) ) - if (attachedWindowId == null || runId !== visibilityRunId) { + if (!attached || runId !== visibilityRunId) { return } - browserWindowId.value = attachedWindowId lastSyncedBounds.value = stableRect await callPresenter( - 'updateEmbeddedBounds(visible)', - yoBrowserPresenter.updateEmbeddedBounds(hostWindowId.value, stableRect, true) + 'updateSessionBrowserBounds(visible)', + yoBrowserPresenter.updateSessionBrowserBounds( + currentSessionId.value, + hostWindowId.value, + stableRect, + true + ) ) - await loadState() + await loadState(currentSessionId.value) } const handleBrowserEvent = async (_event: unknown, payload: unknown) => { - if (!isBrowserPanelVisible.value || !isCurrentHostWindow(resolveWindowId(payload))) { + if (resolvePayloadSessionId(payload) !== currentSessionId.value) { return } - await loadState() + await loadState(currentSessionId.value) } const handleOpenRequested = async (_event: unknown, payload: unknown) => { - if (!isCurrentHostWindow(resolveWindowId(payload)) || !isBrowserPanelVisible.value) { + if ( + resolvePayloadSessionId(payload) !== currentSessionId.value || + hostWindowId.value == null || + resolvePayloadWindowId(payload) !== hostWindowId.value + ) { return } console.info('[BrowserPanel] panel open requested', { windowId: hostWindowId.value }) - await loadState() - await ensureVisibleAttachment() + await loadState(currentSessionId.value) + if (isBrowserPanelVisible.value) { + await ensureVisibleAttachment() + } } const normalizeUrl = (value: string) => { @@ -320,7 +375,7 @@ const normalizeUrl = (value: string) => { } const navigate = async () => { - if (hostWindowId.value == null) { + if (!currentSessionId.value) { return } @@ -329,75 +384,97 @@ const navigate = async () => { return } - if (browserWindowId.value == null) { - await ensureVisibleAttachment() - } - - if (browserWindowId.value == null) { - return - } - - const result = await callPresenter( - 'navigateWindow', - yoBrowserPresenter.navigateWindow(browserWindowId.value, nextUrl) + const result = await callPresenter( + 'loadUrl', + yoBrowserPresenter.loadUrl(currentSessionId.value, nextUrl) ) if (result === null) { return } - await loadState() + browserStatus.value = result + await loadState(currentSessionId.value) } const goBack = async () => { - if (browserWindowId.value == null) { + if (!currentSessionId.value || !browserStatus.value.initialized) { return } const result = await callPresenter( 'goBack', - yoBrowserPresenter.goBack(browserWindowId.value) + yoBrowserPresenter.goBack(currentSessionId.value) ) if (result === null) { return } - await loadState() + await loadState(currentSessionId.value) } const goForward = async () => { - if (browserWindowId.value == null) { + if (!currentSessionId.value || !browserStatus.value.initialized) { return } const result = await callPresenter( 'goForward', - yoBrowserPresenter.goForward(browserWindowId.value) + yoBrowserPresenter.goForward(currentSessionId.value) ) if (result === null) { return } - await loadState() + await loadState(currentSessionId.value) } const reloadPage = async () => { - if (browserWindowId.value == null) { + if (!currentSessionId.value || !browserStatus.value.initialized) { return } const result = await callPresenter( 'reload', - yoBrowserPresenter.reload(browserWindowId.value) + yoBrowserPresenter.reload(currentSessionId.value) ) if (result === null) { return } - await loadState() + await loadState(currentSessionId.value) +} + +const cleanupInactiveSession = async (sessionId: string) => { + if (!sessionId) { + return + } + + await hideEmbedded(sessionId) + if (getSessionUiStatus(sessionId) === 'working') { + pendingBrowserDestroySessionIds.add(sessionId) + return + } + + pendingBrowserDestroySessionIds.delete(sessionId) + await callPresenter('destroySessionBrowser', yoBrowserPresenter.destroySessionBrowser(sessionId)) +} + +const flushPendingSessionDestroys = async () => { + for (const sessionId of Array.from(pendingBrowserDestroySessionIds)) { + if (getSessionUiStatus(sessionId) === 'working') { + continue + } + + pendingBrowserDestroySessionIds.delete(sessionId) + await callPresenter( + 'destroySessionBrowser', + yoBrowserPresenter.destroySessionBrowser(sessionId) + ) + } } useResizeObserver(containerRef, () => { - if (!isBrowserPanelVisible.value || browserWindowId.value == null) { + if (!isBrowserPanelVisible.value || !browserStatus.value.initialized) { return } @@ -406,14 +483,44 @@ useResizeObserver(containerRef, () => { watch(isBrowserPanelVisible, (visible) => { if (visible) { - void loadState() + void loadState(currentSessionId.value) void ensureVisibleAttachment() return } - void hideEmbedded() + void hideEmbedded(currentSessionId.value) }) +watch( + () => props.sessionId, + (nextSessionId, previousSessionId) => { + if (previousSessionId && previousSessionId !== nextSessionId) { + void cleanupInactiveSession(previousSessionId) + } + + if (!nextSessionId) { + resetBrowserState() + return + } + + void loadState(nextSessionId) + if (isBrowserPanelVisible.value) { + void ensureVisibleAttachment() + } + }, + { immediate: true } +) + +watch( + () => sessionStore.sessions.map((session) => `${session.id}:${session.status}`).join('|'), + () => { + void flushPendingSessionDestroys() + if (currentSessionId.value) { + void loadState(currentSessionId.value) + } + } +) + onMounted(async () => { hostWindowId.value = window.api.getWindowId?.() ?? null window.electron.ipcRenderer.on(YO_BROWSER_EVENTS.OPEN_REQUESTED, handleOpenRequested) @@ -423,14 +530,16 @@ onMounted(async () => { window.electron.ipcRenderer.on(YO_BROWSER_EVENTS.WINDOW_FOCUSED, handleBrowserEvent) window.electron.ipcRenderer.on(YO_BROWSER_EVENTS.WINDOW_VISIBILITY_CHANGED, handleBrowserEvent) - await loadState() + if (currentSessionId.value) { + await loadState(currentSessionId.value) + } if (isBrowserPanelVisible.value) { await ensureVisibleAttachment() } }) onBeforeUnmount(() => { - void hideEmbedded() + void hideEmbedded(currentSessionId.value) window.electron.ipcRenderer.removeListener(YO_BROWSER_EVENTS.OPEN_REQUESTED, handleOpenRequested) window.electron.ipcRenderer.removeListener(YO_BROWSER_EVENTS.WINDOW_CREATED, handleBrowserEvent) window.electron.ipcRenderer.removeListener(YO_BROWSER_EVENTS.WINDOW_UPDATED, handleBrowserEvent) diff --git a/src/renderer/browser/components/BrowserPlaceholder.vue b/src/renderer/src/components/sidepanel/BrowserPlaceholder.vue similarity index 55% rename from src/renderer/browser/components/BrowserPlaceholder.vue rename to src/renderer/src/components/sidepanel/BrowserPlaceholder.vue index c4ce2e839..37a5309c1 100644 --- a/src/renderer/browser/components/BrowserPlaceholder.vue +++ b/src/renderer/src/components/sidepanel/BrowserPlaceholder.vue @@ -1,10 +1,10 @@ diff --git a/src/renderer/src/components/sidepanel/ChatSidePanel.vue b/src/renderer/src/components/sidepanel/ChatSidePanel.vue index d03ee556d..1288ee43f 100644 --- a/src/renderer/src/components/sidepanel/ChatSidePanel.vue +++ b/src/renderer/src/components/sidepanel/ChatSidePanel.vue @@ -57,7 +57,7 @@ :session-id="props.sessionId" :workspace-path="props.workspacePath" /> - + @@ -87,9 +87,12 @@ const handleBrowserOpenRequested = (_event: unknown, payload: unknown) => { const currentWindowId = window.api.getWindowId?.() ?? null const requestedWindowId = payload && typeof payload === 'object' && 'windowId' in payload ? payload.windowId : null + const requestedSessionId = + payload && typeof payload === 'object' && 'sessionId' in payload ? payload.sessionId : null if ( !props.sessionId || + requestedSessionId !== props.sessionId || typeof requestedWindowId !== 'number' || requestedWindowId !== currentWindowId ) { diff --git a/src/shared/types/browser.ts b/src/shared/types/browser.ts index 46d2ade29..d5a01f55c 100644 --- a/src/shared/types/browser.ts +++ b/src/shared/types/browser.ts @@ -19,18 +19,13 @@ export interface BrowserPageInfo { updatedAt: number } -// Deprecated alias kept temporarily while in-tree callers migrate to page/window semantics. -export interface BrowserTabInfo extends BrowserPageInfo { - isActive?: boolean -} - -export interface BrowserWindowInfo { - id: number - page: BrowserPageInfo - isFocused: boolean - isVisible: boolean - createdAt: number - updatedAt: number +export interface YoBrowserStatus { + initialized: boolean + page: BrowserPageInfo | null + canGoBack: boolean + canGoForward: boolean + visible: boolean + loading: boolean } export interface ScreenshotOptions { @@ -63,15 +58,3 @@ export interface BrowserToolDefinition { inputSchema: Record requiresVision?: boolean } - -export type BrowserEvent = - | { type: 'window-created'; window: BrowserWindowInfo } - | { type: 'window-updated'; window: BrowserWindowInfo } - | { type: 'window-focused'; windowId: number | null } - | { type: 'window-closed'; windowId: number } - | { type: 'window-visibility-changed'; windowId: number; visible: boolean } - -export interface BrowserContextSnapshot { - activeWindowId: number | null - windows: BrowserWindowInfo[] -} diff --git a/src/shared/types/presenters/legacy.presenters.d.ts b/src/shared/types/presenters/legacy.presenters.d.ts index c852ec0ed..0a618c1fe 100644 --- a/src/shared/types/presenters/legacy.presenters.d.ts +++ b/src/shared/types/presenters/legacy.presenters.d.ts @@ -21,13 +21,7 @@ import type { ISkillPresenter } from '../skill' import type { ISkillSyncPresenter } from '../skillSync' import type { INewAgentPresenter } from './new-agent.presenter' import type { IProjectPresenter } from './project.presenter' -import type { - BrowserTabInfo, - BrowserContextSnapshot, - BrowserWindowInfo, - DownloadInfo, - ScreenshotOptions -} from '../browser' +import type { BrowserPageInfo, DownloadInfo, ScreenshotOptions, YoBrowserStatus } from '../browser' export type SQLITE_MESSAGE = { id: string @@ -193,21 +187,16 @@ export interface TabData { closable: boolean url: string icon?: string - browserTabId?: string -} - -export interface BrowserContextSnapshot { - activeWindowId: number | null - windows: BrowserWindowInfo[] } export interface IYoBrowserPresenter { initialize(): Promise - ensureWindow(): Promise - openWindow(url?: string): Promise - attachEmbeddedToWindow(windowId: number): Promise - updateEmbeddedBounds( - windowId: number, + getBrowserStatus(sessionId: string): Promise + loadUrl(sessionId: string, url: string, timeoutMs?: number): Promise + attachSessionBrowser(sessionId: string, hostWindowId: number): Promise + updateSessionBrowserBounds( + sessionId: string, + hostWindowId: number, bounds: { x: number y: number @@ -216,41 +205,27 @@ export interface IYoBrowserPresenter { }, visible: boolean ): Promise - detachEmbedded(): Promise - focusWindow(windowId: number): Promise - closeWindow(windowId: number): Promise - listWindows(): Promise - getActiveWindow(): Promise - getWindowById(windowId: number): Promise - navigateWindow(windowId: number, url: string, timeoutMs?: number): Promise - hasWindow(): Promise - show(shouldFocus?: boolean): Promise - hide(): Promise - toggleVisibility(): Promise - isVisible(): Promise - listTabs(): Promise - getActiveTab(): Promise - createTab(url?: string): Promise - navigateTab(tabId: string, url: string): Promise - activateTab(tabId: string): Promise - closeTab(tabId: string): Promise - reuseTab(url: string): Promise - goBack(target?: string | number): Promise - goForward(target?: string | number): Promise - reload(target?: string | number): Promise - getBrowserContext(): Promise - getNavigationState(target?: string | number): Promise<{ + detachSessionBrowser(sessionId: string): Promise + destroySessionBrowser(sessionId: string): Promise + goBack(sessionId: string): Promise + goForward(sessionId: string): Promise + reload(sessionId: string): Promise + getNavigationState(sessionId: string): Promise<{ canGoBack: boolean canGoForward: boolean }> - getTabIdByViewId(viewId: number): Promise - captureScreenshot(target: string | number, options?: ScreenshotOptions): Promise + captureScreenshot(sessionId: string, options?: ScreenshotOptions): Promise + getBrowserPage(sessionId: string): Promise startDownload(url: string, savePath?: string): Promise clearSandboxData(): Promise shutdown(): Promise readonly toolHandler: { getToolDefinitions(): any[] - callTool(toolName: string, args: Record): Promise + callTool( + toolName: string, + args: Record, + conversationId?: string + ): Promise } } @@ -353,7 +328,6 @@ export interface ITabPresenter { registerFloatingWindow(webContentsId: number, webContents: Electron.WebContents): void unregisterFloatingWindow(webContentsId: number): void resetTabToBlank(tabId: number): Promise - setTabBrowserId(tabId: number, browserTabId: string): void destroy(): Promise } diff --git a/test/main/presenter/YoBrowserPresenter.test.ts b/test/main/presenter/YoBrowserPresenter.test.ts index a5c0f6d81..e18968e5c 100644 --- a/test/main/presenter/YoBrowserPresenter.test.ts +++ b/test/main/presenter/YoBrowserPresenter.test.ts @@ -220,87 +220,109 @@ describe('YoBrowserPresenter', () => { const presenter = new YoBrowserPresenter(windowPresenter as any) - const getEmbeddedWebContents = () => { - return ((presenter as any).embeddedState?.view?.webContents ?? null) as MockWebContents | null + const getSessionWebContents = (sessionId: string) => { + return ((presenter as any).sessionBrowsers.get(sessionId)?.view?.webContents ?? + null) as MockWebContents | null } return { presenter, windows, viewConfigs, - windowPresenter, - getEmbeddedWebContents + getSessionWebContents } } - it('does not start embedded navigation before the renderer reports a stable host', async () => { - const { presenter, windows, getEmbeddedWebContents } = await setupPresenter() + it('does not start session navigation before the renderer reports a stable host', async () => { + const { presenter, windows, getSessionWebContents } = await setupPresenter() windows.set(1, new MockBrowserWindow(1)) - const openPromise = presenter.openWindow('https://example.com') + const loadPromise = presenter.loadUrl('session-a', 'https://example.com') await Promise.resolve() - const webContents = getEmbeddedWebContents() + const webContents = getSessionWebContents('session-a') expect(webContents?.loadURL).not.toHaveBeenCalled() - await presenter.attachEmbeddedToWindow(1) - await presenter.updateEmbeddedBounds(1, { x: 12, y: 18, width: 320, height: 480 }, true) + await presenter.attachSessionBrowser('session-a', 1) + await presenter.updateSessionBrowserBounds( + 'session-a', + 1, + { x: 12, y: 18, width: 320, height: 480 }, + true + ) await vi.advanceTimersByTimeAsync(130) await Promise.resolve() expect(webContents?.loadURL).toHaveBeenCalledWith('https://example.com') webContents?.emitDomReady() - await openPromise + await loadPromise webContents?.finishLoad() }) - it('resolves openWindow only after host-ready and the first dom-ready', async () => { - const { presenter, windows, getEmbeddedWebContents } = await setupPresenter() + it('resolves loadUrl only after host-ready and the first dom-ready', async () => { + const { presenter, windows, getSessionWebContents } = await setupPresenter() windows.set(1, new MockBrowserWindow(1)) let settled = false - const openPromise = presenter.openWindow('https://example.com').then(() => { + const loadPromise = presenter.loadUrl('session-a', 'https://example.com').then(() => { settled = true }) await Promise.resolve() - await presenter.attachEmbeddedToWindow(1) - await presenter.updateEmbeddedBounds(1, { x: 10, y: 20, width: 300, height: 400 }, true) + await presenter.attachSessionBrowser('session-a', 1) + await presenter.updateSessionBrowserBounds( + 'session-a', + 1, + { x: 10, y: 20, width: 300, height: 400 }, + true + ) await vi.advanceTimersByTimeAsync(130) await Promise.resolve() expect(settled).toBe(false) - const webContents = getEmbeddedWebContents() + const webContents = getSessionWebContents('session-a') webContents?.emitDomReady() - await openPromise + await loadPromise expect(settled).toBe(true) webContents?.finishLoad() }) - it('returns a clear error when host-ready never arrives', async () => { + it('returns a clear error when session host-ready never arrives', async () => { const { presenter, windows } = await setupPresenter() windows.set(1, new MockBrowserWindow(1)) - const openPromise = presenter.openWindow('https://example.com') - const rejection = expect(openPromise).rejects.toThrow( - 'Embedded browser host 1 did not become ready within 2000ms' + const loadPromise = presenter.loadUrl('session-a', 'https://example.com') + const rejection = expect(loadPromise).rejects.toThrow( + 'Session browser host 1 did not become ready within 2000ms' ) await vi.advanceTimersByTimeAsync(2050) await rejection }) - it('does not emit WINDOW_UPDATED for pure embedded bounds changes', async () => { + it('does not emit WINDOW_UPDATED for pure bounds changes', async () => { const { presenter, windows } = await setupPresenter() windows.set(1, new MockBrowserWindow(1)) - await presenter.attachEmbeddedToWindow(1) + void presenter.loadUrl('session-a', 'https://example.com') + await Promise.resolve() + await presenter.attachSessionBrowser('session-a', 1) sendToRendererMock.mockClear() - await presenter.updateEmbeddedBounds(1, { x: 0, y: 0, width: 240, height: 360 }, true) - await presenter.updateEmbeddedBounds(1, { x: 8, y: 16, width: 256, height: 384 }, true) + await presenter.updateSessionBrowserBounds( + 'session-a', + 1, + { x: 0, y: 0, width: 240, height: 360 }, + true + ) + await presenter.updateSessionBrowserBounds( + 'session-a', + 1, + { x: 8, y: 16, width: 256, height: 384 }, + true + ) const updatedEvents = sendToRendererMock.mock.calls.filter( ([event]) => event === 'yo-browser:window-updated' @@ -308,54 +330,51 @@ describe('YoBrowserPresenter', () => { expect(updatedEvents).toHaveLength(0) }) - it('navigates embedded windows directly instead of reopening them', async () => { - const { presenter, windows, getEmbeddedWebContents } = await setupPresenter() + it('keeps session browsers isolated when switching the attached session', async () => { + const { presenter, windows, getSessionWebContents } = await setupPresenter() windows.set(1, new MockBrowserWindow(1)) - await presenter.attachEmbeddedToWindow(1) - const openWindowSpy = vi.spyOn(presenter, 'openWindow') - - const navigatePromise = presenter.navigateWindow(1, 'https://example.com') + const firstLoad = presenter.loadUrl('session-a', 'https://example.com/a') await Promise.resolve() + await presenter.attachSessionBrowser('session-a', 1) + await presenter.updateSessionBrowserBounds( + 'session-a', + 1, + { x: 10, y: 10, width: 300, height: 400 }, + true + ) + await vi.advanceTimersByTimeAsync(130) + getSessionWebContents('session-a')?.emitDomReady() + await firstLoad - expect(openWindowSpy).not.toHaveBeenCalled() - - const webContents = getEmbeddedWebContents() - webContents?.finishLoad() - await navigatePromise - }) - - it('reattaches embedded listeners to the new host window and cleans up the previous host', async () => { - const { presenter, windows } = await setupPresenter() - const firstWindow = new MockBrowserWindow(1) - const secondWindow = new MockBrowserWindow(2) - windows.set(1, firstWindow) - windows.set(2, secondWindow) - - await presenter.attachEmbeddedToWindow(1) - const state = (presenter as any).embeddedState + const secondLoad = presenter.loadUrl('session-b', 'https://example.com/b') + await Promise.resolve() + await presenter.attachSessionBrowser('session-b', 1) + await presenter.updateSessionBrowserBounds( + 'session-b', + 1, + { x: 10, y: 10, width: 300, height: 400 }, + true + ) + await vi.advanceTimersByTimeAsync(130) + getSessionWebContents('session-b')?.emitDomReady() + await secondLoad - await presenter.attachEmbeddedToWindow(2) - expect(firstWindow.contentView.removeChildView).toHaveBeenCalledWith(state.view) + const firstStatus = await presenter.getBrowserStatus('session-a') + const secondStatus = await presenter.getBrowserStatus('session-b') - sendToRendererMock.mockClear() - firstWindow.emit('focus') - expect(sendToRendererMock).not.toHaveBeenCalled() - - secondWindow.emit('focus') - expect( - sendToRendererMock.mock.calls.some( - ([event, _target, payload]) => - event === 'yo-browser:window-focused' && payload?.windowId === secondWindow.id - ) - ).toBe(true) + expect(firstStatus.initialized).toBe(true) + expect(firstStatus.visible).toBe(false) + expect(secondStatus.initialized).toBe(true) + expect(secondStatus.visible).toBe(true) }) it('creates the embedded WebContentsView with sandbox enabled', async () => { const { presenter, windows, viewConfigs } = await setupPresenter() windows.set(1, new MockBrowserWindow(1)) - await presenter.attachEmbeddedToWindow(1) + void presenter.loadUrl('session-a', 'https://example.com') + await Promise.resolve() expect(viewConfigs).toHaveLength(1) expect(viewConfigs[0]?.webPreferences).toMatchObject({ diff --git a/test/main/presenter/browser/YoBrowserToolHandler.test.ts b/test/main/presenter/browser/YoBrowserToolHandler.test.ts new file mode 100644 index 000000000..f2cd18f81 --- /dev/null +++ b/test/main/presenter/browser/YoBrowserToolHandler.test.ts @@ -0,0 +1,59 @@ +import { describe, expect, it, vi } from 'vitest' +import { YoBrowserToolHandler } from '@/presenter/browser/YoBrowserToolHandler' + +vi.mock('@shared/logger', () => ({ + default: { + warn: vi.fn(), + error: vi.fn() + } +})) + +describe('YoBrowserToolHandler', () => { + const createPresenter = () => + ({ + getBrowserStatus: vi.fn().mockResolvedValue({ initialized: false }), + loadUrl: vi.fn().mockResolvedValue({ initialized: true }), + getBrowserPage: vi.fn().mockResolvedValue({ + id: 'page-1', + url: 'https://example.com', + status: 'ready' + }), + sendCdpCommand: vi.fn().mockResolvedValue({ ok: true }) + }) as any + + it('exposes only the simplified YoBrowser tool names', () => { + const handler = new YoBrowserToolHandler(createPresenter()) + + const toolNames = handler.getToolDefinitions().map((tool) => tool.function.name) + + expect(toolNames).toEqual(['get_browser_status', 'load_url', 'cdp_send']) + }) + + it('routes load_url through the conversation session id', async () => { + const presenter = createPresenter() + const handler = new YoBrowserToolHandler(presenter) + + const result = await handler.callTool('load_url', { url: 'https://example.com' }, 'session-a') + + expect(presenter.loadUrl).toHaveBeenCalledWith('session-a', 'https://example.com') + expect(result).toBe(JSON.stringify({ initialized: true })) + }) + + it('rejects old tool names as unknown tools', async () => { + const handler = new YoBrowserToolHandler(createPresenter()) + + await expect(handler.callTool('yo_browser_cdp_send', {}, 'session-a')).rejects.toThrow( + 'Unknown YoBrowser tool: yo_browser_cdp_send' + ) + }) + + it('requires an initialized session browser before cdp_send', async () => { + const presenter = createPresenter() + presenter.getBrowserPage.mockResolvedValue(null) + const handler = new YoBrowserToolHandler(presenter) + + await expect( + handler.callTool('cdp_send', { method: 'Page.reload' }, 'session-a') + ).rejects.toThrow('Session browser for session-a is not initialized') + }) +}) diff --git a/test/main/presenter/deepchatAgentPresenter/deepchatAgentPresenter.test.ts b/test/main/presenter/deepchatAgentPresenter/deepchatAgentPresenter.test.ts index f3562f05c..fd1bb56cb 100644 --- a/test/main/presenter/deepchatAgentPresenter/deepchatAgentPresenter.test.ts +++ b/test/main/presenter/deepchatAgentPresenter/deepchatAgentPresenter.test.ts @@ -1992,7 +1992,7 @@ describe('DeepChatAgentPresenter', () => { timestamp: 1, tool_call: { id: 'tc1', - name: 'yo_browser_cdp_send', + name: 'cdp_send', params: '{"method":"Page.captureScreenshot"}', response: '' } @@ -2005,7 +2005,7 @@ describe('DeepChatAgentPresenter', () => { content: 'Need permission', tool_call: { id: 'tc1', - name: 'yo_browser_cdp_send', + name: 'cdp_send', params: '{"method":"Page.captureScreenshot"}' }, extra: { @@ -2014,7 +2014,7 @@ describe('DeepChatAgentPresenter', () => { permissionRequest: JSON.stringify({ permissionType: 'write', description: 'Need permission', - toolName: 'yo_browser_cdp_send', + toolName: 'cdp_send', serverName: 'yo-browser' }) } @@ -2025,7 +2025,7 @@ describe('DeepChatAgentPresenter', () => { { type: 'function', function: { - name: 'yo_browser_cdp_send', + name: 'cdp_send', description: 'CDP send', parameters: { type: 'object', properties: {} } }, @@ -2066,7 +2066,7 @@ describe('DeepChatAgentPresenter', () => { timestamp: 1, tool_call: { id: 'tc1', - name: 'yo_browser_cdp_send', + name: 'cdp_send', params: '{"method":"Page.captureScreenshot"}', response: '' } @@ -2079,7 +2079,7 @@ describe('DeepChatAgentPresenter', () => { content: 'Need permission', tool_call: { id: 'tc1', - name: 'yo_browser_cdp_send', + name: 'cdp_send', params: '{"method":"Page.captureScreenshot"}' }, extra: { @@ -2088,7 +2088,7 @@ describe('DeepChatAgentPresenter', () => { permissionRequest: JSON.stringify({ permissionType: 'write', description: 'Need permission', - toolName: 'yo_browser_cdp_send', + toolName: 'cdp_send', serverName: 'yo-browser' }) } @@ -2099,7 +2099,7 @@ describe('DeepChatAgentPresenter', () => { { type: 'function', function: { - name: 'yo_browser_cdp_send', + name: 'cdp_send', description: 'CDP send', parameters: { type: 'object', properties: {} } }, diff --git a/test/main/presenter/deepchatAgentPresenter/dispatch.test.ts b/test/main/presenter/deepchatAgentPresenter/dispatch.test.ts index d8024d03e..eba6f92d7 100644 --- a/test/main/presenter/deepchatAgentPresenter/dispatch.test.ts +++ b/test/main/presenter/deepchatAgentPresenter/dispatch.test.ts @@ -542,9 +542,9 @@ describe('dispatch', () => { tempHome = await fs.mkdtemp(path.join(os.tmpdir(), 'deepchat-dispatch-offload-')) getPathSpy = vi.spyOn(app, 'getPath').mockReturnValue(tempHome) - const tools = [makeTool('yo_browser_cdp_send')] + const tools = [makeTool('cdp_send')] const longScreenshot = JSON.stringify({ data: 'x'.repeat(7000) }) - const toolPresenter = createMockToolPresenter({ yo_browser_cdp_send: longScreenshot }) + const toolPresenter = createMockToolPresenter({ cdp_send: longScreenshot }) const conversation: any[] = [] state.blocks.push({ @@ -554,7 +554,7 @@ describe('dispatch', () => { timestamp: Date.now(), tool_call: { id: 'tc1', - name: 'yo_browser_cdp_send', + name: 'cdp_send', params: '{"method":"Page.captureScreenshot"}', response: '' } @@ -562,7 +562,7 @@ describe('dispatch', () => { state.completedToolCalls = [ { id: 'tc1', - name: 'yo_browser_cdp_send', + name: 'cdp_send', arguments: '{"method":"Page.captureScreenshot"}' } ] @@ -595,9 +595,9 @@ describe('dispatch', () => { getPathSpy = vi.spyOn(app, 'getPath').mockReturnValue(tempHome) const writeFileSpy = vi.spyOn(fs, 'writeFile').mockRejectedValueOnce(new Error('disk full')) - const tools = [makeTool('yo_browser_cdp_send')] + const tools = [makeTool('cdp_send')] const longScreenshot = JSON.stringify({ data: 'x'.repeat(7000) }) - const toolPresenter = createMockToolPresenter({ yo_browser_cdp_send: longScreenshot }) + const toolPresenter = createMockToolPresenter({ cdp_send: longScreenshot }) const conversation: any[] = [] state.blocks.push({ @@ -607,7 +607,7 @@ describe('dispatch', () => { timestamp: Date.now(), tool_call: { id: 'tc1', - name: 'yo_browser_cdp_send', + name: 'cdp_send', params: '{"method":"Page.captureScreenshot"}', response: '' } @@ -615,7 +615,7 @@ describe('dispatch', () => { state.completedToolCalls = [ { id: 'tc1', - name: 'yo_browser_cdp_send', + name: 'cdp_send', arguments: '{"method":"Page.captureScreenshot"}' } ] @@ -645,9 +645,9 @@ describe('dispatch', () => { tempHome = await fs.mkdtemp(path.join(os.tmpdir(), 'deepchat-dispatch-offload-clean-')) getPathSpy = vi.spyOn(app, 'getPath').mockReturnValue(tempHome) - const tools = [makeTool('yo_browser_cdp_send')] + const tools = [makeTool('cdp_send')] const longScreenshot = JSON.stringify({ data: 'x'.repeat(7000) }) - const toolPresenter = createMockToolPresenter({ yo_browser_cdp_send: longScreenshot }) + const toolPresenter = createMockToolPresenter({ cdp_send: longScreenshot }) const conversation: any[] = [] state.blocks.push({ @@ -657,7 +657,7 @@ describe('dispatch', () => { timestamp: Date.now(), tool_call: { id: 'tc1', - name: 'yo_browser_cdp_send', + name: 'cdp_send', params: '{"method":"Page.captureScreenshot"}', response: '' } @@ -665,7 +665,7 @@ describe('dispatch', () => { state.completedToolCalls = [ { id: 'tc1', - name: 'yo_browser_cdp_send', + name: 'cdp_send', arguments: '{"method":"Page.captureScreenshot"}' } ] @@ -696,9 +696,9 @@ describe('dispatch', () => { tempHome = await fs.mkdtemp(path.join(os.tmpdir(), 'deepchat-dispatch-terminal-clean-')) getPathSpy = vi.spyOn(app, 'getPath').mockReturnValue(tempHome) - const tools = [makeTool('yo_browser_cdp_send')] + const tools = [makeTool('cdp_send')] const longScreenshot = JSON.stringify({ data: 'x'.repeat(7000) }) - const toolPresenter = createMockToolPresenter({ yo_browser_cdp_send: longScreenshot }) + const toolPresenter = createMockToolPresenter({ cdp_send: longScreenshot }) const conversation: any[] = [] const hooks = { onPreToolUse: vi.fn(), @@ -714,7 +714,7 @@ describe('dispatch', () => { timestamp: Date.now(), tool_call: { id: 'tc1', - name: 'yo_browser_cdp_send', + name: 'cdp_send', params: '{"method":"Page.captureScreenshot"}', response: '' } @@ -722,7 +722,7 @@ describe('dispatch', () => { state.completedToolCalls = [ { id: 'tc1', - name: 'yo_browser_cdp_send', + name: 'cdp_send', arguments: '{"method":"Page.captureScreenshot"}' } ] @@ -747,7 +747,7 @@ describe('dispatch', () => { expect(state.blocks[0].status).toBe('error') expect(hooks.onPostToolUseFailure).toHaveBeenCalledWith({ callId: 'tc1', - name: 'yo_browser_cdp_send', + name: 'cdp_send', params: '{"method":"Page.captureScreenshot"}', error: expect.stringContaining('remaining context window is too small') }) diff --git a/test/main/presenter/deepchatAgentPresenter/process.test.ts b/test/main/presenter/deepchatAgentPresenter/process.test.ts index 3949472c7..e69aea7f7 100644 --- a/test/main/presenter/deepchatAgentPresenter/process.test.ts +++ b/test/main/presenter/deepchatAgentPresenter/process.test.ts @@ -218,7 +218,7 @@ describe('processStream', () => { yield { type: 'tool_call_start', tool_call_id: 'tc1', - tool_call_name: 'yo_browser_cdp_send' + tool_call_name: 'cdp_send' } as LLMCoreStreamEvent yield { type: 'tool_call_end', @@ -234,11 +234,11 @@ describe('processStream', () => { })() }) as unknown as ProcessParams['coreStream'] - const toolPresenter = createMockToolPresenter({ yo_browser_cdp_send: longScreenshot }) + const toolPresenter = createMockToolPresenter({ cdp_send: longScreenshot }) const params = createParams({ coreStream, toolPresenter, - tools: [makeTool('yo_browser_cdp_send')] + tools: [makeTool('cdp_send')] }) const promise = processStream(params) @@ -533,7 +533,7 @@ describe('processStream', () => { yield { type: 'tool_call_start', tool_call_id: 'tc1', - tool_call_name: 'yo_browser_cdp_send' + tool_call_name: 'cdp_send' } as LLMCoreStreamEvent yield { type: 'tool_call_end', @@ -545,11 +545,11 @@ describe('processStream', () => { }) as unknown as ProcessParams['coreStream'] const longScreenshot = JSON.stringify({ data: 'x'.repeat(7000) }) - const toolPresenter = createMockToolPresenter({ yo_browser_cdp_send: longScreenshot }) + const toolPresenter = createMockToolPresenter({ cdp_send: longScreenshot }) const params = createParams({ coreStream, toolPresenter, - tools: [makeTool('yo_browser_cdp_send')], + tools: [makeTool('cdp_send')], modelConfig: { contextLength: 1 } as any, maxTokens: 1 }) diff --git a/test/main/presenter/newAgentPresenter/newAgentPresenter.test.ts b/test/main/presenter/newAgentPresenter/newAgentPresenter.test.ts index 8dd07ab60..a59f4eb82 100644 --- a/test/main/presenter/newAgentPresenter/newAgentPresenter.test.ts +++ b/test/main/presenter/newAgentPresenter/newAgentPresenter.test.ts @@ -359,7 +359,7 @@ describe('NewAgentPresenter', () => { { agentId: 'deepchat', message: 'Hi', - disabledAgentTools: ['exec', 'exec', 'yo_browser_cdp_send'] + disabledAgentTools: ['exec', 'exec', 'cdp_send'] }, 1 ) @@ -371,7 +371,7 @@ describe('NewAgentPresenter', () => { null, expect.objectContaining({ isDraft: false, - disabledAgentTools: ['exec', 'yo_browser_cdp_send'] + disabledAgentTools: expect.arrayContaining(['cdp_send', 'exec']) }) ) }) @@ -1012,14 +1012,11 @@ describe('NewAgentPresenter', () => { created_at: 1000, updated_at: 1000 }) - sqlitePresenter.newSessionsTable.getDisabledAgentTools.mockReturnValue([ - 'exec', - 'yo_browser_cdp_send' - ]) + sqlitePresenter.newSessionsTable.getDisabledAgentTools.mockReturnValue(['exec', 'cdp_send']) const disabledTools = await presenter.getSessionDisabledAgentTools('s1') - expect(disabledTools).toEqual(['exec', 'yo_browser_cdp_send']) + expect(disabledTools).toEqual(['exec', 'cdp_send']) }) it('updates disabled agent tools and invalidates the deepchat prompt cache', async () => { @@ -1036,15 +1033,15 @@ describe('NewAgentPresenter', () => { const disabledTools = await presenter.updateSessionDisabledAgentTools('s1', [ 'grep', 'ls', - 'yo_browser_cdp_send', + 'cdp_send', 'exec', 'exec' ]) - expect(disabledTools).toEqual(['exec', 'yo_browser_cdp_send']) + expect(disabledTools).toEqual(['cdp_send', 'exec']) expect(sqlitePresenter.newSessionsTable.updateDisabledAgentTools).toHaveBeenCalledWith('s1', [ - 'exec', - 'yo_browser_cdp_send' + 'cdp_send', + 'exec' ]) expect(deepChatAgent.invalidateSessionSystemPromptCache).toHaveBeenCalledWith('s1') }) diff --git a/test/main/presenter/toolPresenter/toolPresenter.test.ts b/test/main/presenter/toolPresenter/toolPresenter.test.ts index 3f500a63e..0b0d26f59 100644 --- a/test/main/presenter/toolPresenter/toolPresenter.test.ts +++ b/test/main/presenter/toolPresenter/toolPresenter.test.ts @@ -294,7 +294,7 @@ describe('ToolPresenter', () => { source: 'agent' }, { - ...buildToolDefinition('yo_browser_cdp_send', 'yobrowser'), + ...buildToolDefinition('cdp_send', 'yobrowser'), source: 'agent' } ] @@ -302,7 +302,7 @@ describe('ToolPresenter', () => { expect(withoutYoBrowser).not.toContain('YoBrowser') expect(withYoBrowser).toContain('YoBrowser') - expect(withYoBrowser).toContain('yo_browser_cdp_send') + expect(withYoBrowser).toContain('cdp_send') }) it('includes question guidance only when deepchat_question is enabled', () => { diff --git a/test/renderer/components/BrowserPanel.test.ts b/test/renderer/components/BrowserPanel.test.ts index 04108f854..62b03abcb 100644 --- a/test/renderer/components/BrowserPanel.test.ts +++ b/test/renderer/components/BrowserPanel.test.ts @@ -18,6 +18,21 @@ const makeRect = (x: number, y: number, width: number, height: number): DOMRect } as DOMRect } +const defaultBrowserStatus = { + initialized: true, + page: { + id: 'page-1', + url: 'about:blank', + status: 'idle' as const, + createdAt: 1, + updatedAt: 1 + }, + canGoBack: false, + canGoForward: false, + visible: false, + loading: false +} + describe('BrowserPanel', () => { beforeEach(() => { vi.useFakeTimers() @@ -31,7 +46,9 @@ describe('BrowserPanel', () => { const setup = async (options?: { open?: boolean activeTab?: 'browser' | 'workspace' - getWindowByIdResult?: unknown + sessionId?: string + browserStatus?: typeof defaultBrowserStatus + sessions?: Array<{ id: string; status: string }> }) => { vi.resetModules() @@ -40,27 +57,20 @@ describe('BrowserPanel', () => { open: options?.open ?? true, activeTab: options?.activeTab ?? 'browser' } + const sessionStore = { + sessions: options?.sessions ?? [{ id: options?.sessionId ?? 'session-a', status: 'none' }] + } const yoBrowserPresenter = { - attachEmbeddedToWindow: vi.fn().mockResolvedValue(1), - getWindowById: vi.fn().mockResolvedValue( - options?.getWindowByIdResult ?? { - id: 1, - page: { - url: 'about:blank' - } - } - ), - getNavigationState: vi.fn().mockResolvedValue({ - canGoBack: false, - canGoForward: false - }), - updateEmbeddedBounds: vi.fn().mockResolvedValue(undefined), - navigateWindow: vi.fn().mockResolvedValue(undefined), + getBrowserStatus: vi.fn().mockResolvedValue(options?.browserStatus ?? defaultBrowserStatus), + attachSessionBrowser: vi.fn().mockResolvedValue(true), + updateSessionBrowserBounds: vi.fn().mockResolvedValue(undefined), + loadUrl: vi.fn().mockResolvedValue(options?.browserStatus ?? defaultBrowserStatus), goBack: vi.fn().mockResolvedValue(undefined), goForward: vi.fn().mockResolvedValue(undefined), reload: vi.fn().mockResolvedValue(undefined), - detachEmbedded: vi.fn().mockResolvedValue(undefined) + detachSessionBrowser: vi.fn().mockResolvedValue(undefined), + destroySessionBrowser: vi.fn().mockResolvedValue(undefined) } vi.doMock('vue-i18n', () => ({ @@ -77,6 +87,10 @@ describe('BrowserPanel', () => { useSidepanelStore: () => sidepanelStore })) + vi.doMock('@/stores/ui/session', () => ({ + useSessionStore: () => sessionStore + })) + vi.doMock('@/composables/usePresenter', () => ({ usePresenter: () => yoBrowserPresenter })) @@ -97,6 +111,9 @@ describe('BrowserPanel', () => { const BrowserPanel = (await import('@/components/sidepanel/BrowserPanel.vue')).default const wrapper = mount(BrowserPanel, { + props: { + sessionId: options?.sessionId ?? 'session-a' + }, global: { stubs: { Button: defineComponent({ @@ -123,7 +140,7 @@ describe('BrowserPanel', () => { }) await flushPromises() - return { wrapper, yoBrowserPresenter, sidepanelStore, handlers } + return { wrapper, yoBrowserPresenter, handlers } } it('adds accessible labels to browser toolbar controls', async () => { @@ -145,13 +162,14 @@ describe('BrowserPanel', () => { const { yoBrowserPresenter } = await setup() - expect(yoBrowserPresenter.attachEmbeddedToWindow).not.toHaveBeenCalled() + expect(yoBrowserPresenter.attachSessionBrowser).not.toHaveBeenCalled() await vi.advanceTimersByTimeAsync(160) await flushPromises() - expect(yoBrowserPresenter.attachEmbeddedToWindow).toHaveBeenCalledWith(1) - expect(yoBrowserPresenter.updateEmbeddedBounds).toHaveBeenCalledWith( + expect(yoBrowserPresenter.attachSessionBrowser).toHaveBeenCalledWith('session-a', 1) + expect(yoBrowserPresenter.updateSessionBrowserBounds).toHaveBeenCalledWith( + 'session-a', 1, expect.objectContaining({ x: 24, @@ -163,22 +181,23 @@ describe('BrowserPanel', () => { ) }) - it('ignores open requests for a different host window', async () => { + it('ignores open requests for a different host window or session', async () => { vi.spyOn(HTMLElement.prototype, 'getBoundingClientRect').mockReturnValue( makeRect(10, 10, 300, 400) ) const { yoBrowserPresenter, handlers } = await setup() - yoBrowserPresenter.attachEmbeddedToWindow.mockClear() - yoBrowserPresenter.updateEmbeddedBounds.mockClear() + yoBrowserPresenter.attachSessionBrowser.mockClear() + yoBrowserPresenter.updateSessionBrowserBounds.mockClear() const openRequestedHandler = handlers.get('yo-browser:open-requested') expect(openRequestedHandler).toBeTypeOf('function') - await openRequestedHandler?.({}, { windowId: 2 }) + await openRequestedHandler?.({}, { sessionId: 'session-b', windowId: 1 }) + await openRequestedHandler?.({}, { sessionId: 'session-a', windowId: 2 }) await flushPromises() - expect(yoBrowserPresenter.attachEmbeddedToWindow).not.toHaveBeenCalled() - expect(yoBrowserPresenter.updateEmbeddedBounds).not.toHaveBeenCalled() + expect(yoBrowserPresenter.attachSessionBrowser).not.toHaveBeenCalled() + expect(yoBrowserPresenter.updateSessionBrowserBounds).not.toHaveBeenCalled() }) }) diff --git a/test/renderer/components/McpIndicator.test.ts b/test/renderer/components/McpIndicator.test.ts index 17a354be8..4c96731f1 100644 --- a/test/renderer/components/McpIndicator.test.ts +++ b/test/renderer/components/McpIndicator.test.ts @@ -96,7 +96,7 @@ const setup = async (options?: { buildTool('read', 'agent-filesystem'), buildTool('exec', 'agent-filesystem'), buildTool('deepchat_question', 'agent-core'), - buildTool('yo_browser_cdp_send', 'yobrowser'), + buildTool('cdp_send', 'yobrowser'), buildTool('mcp_tool', 'demo-server', 'mcp') ]) } diff --git a/test/renderer/components/NewThreadPage.test.ts b/test/renderer/components/NewThreadPage.test.ts index 75c6c8d77..e282d572e 100644 --- a/test/renderer/components/NewThreadPage.test.ts +++ b/test/renderer/components/NewThreadPage.test.ts @@ -214,7 +214,7 @@ describe('NewThreadPage ACP draft session bootstrap', () => { ] draftStore.providerId = 'openai' draftStore.modelId = 'gpt-4' - draftStore.disabledAgentTools = ['exec', 'yo_browser_cdp_send'] + draftStore.disabledAgentTools = ['exec', 'cdp_send'] ;(draftStore.toGenerationSettings as unknown as ReturnType).mockReturnValue({ systemPrompt: 'Preset prompt', temperature: 1.2, @@ -233,7 +233,7 @@ describe('NewThreadPage ACP draft session bootstrap', () => { message: 'hello deepchat', files: [{ name: 'plan.md', path: '/tmp/workspace/plan.md', mimeType: 'text/markdown' }], agentId: 'deepchat', - disabledAgentTools: ['exec', 'yo_browser_cdp_send'], + disabledAgentTools: ['exec', 'cdp_send'], generationSettings: { systemPrompt: 'Preset prompt', temperature: 1.2, From d7721246932b1703c6c0553fe3af5a96afcede2b Mon Sep 17 00:00:00 2001 From: zerob13 Date: Tue, 17 Mar 2026 22:45:03 +0800 Subject: [PATCH 2/4] fix(agent): sanitize offload filenames --- src/main/lib/agentRuntime/sessionPaths.ts | 20 ++++++++++- .../lib/agentRuntime/sessionPaths.test.ts | 35 +++++++++++++++++++ .../loop/toolCallProcessor.test.ts | 4 +-- .../deepchatAgentPresenter/dispatch.test.ts | 7 ++-- .../deepchatAgentPresenter/process.test.ts | 7 ++-- 5 files changed, 64 insertions(+), 9 deletions(-) create mode 100644 test/main/lib/agentRuntime/sessionPaths.test.ts diff --git a/src/main/lib/agentRuntime/sessionPaths.ts b/src/main/lib/agentRuntime/sessionPaths.ts index 96563a22d..92dcf6be7 100644 --- a/src/main/lib/agentRuntime/sessionPaths.ts +++ b/src/main/lib/agentRuntime/sessionPaths.ts @@ -1,6 +1,9 @@ import { app } from 'electron' import path from 'path' +const INVALID_WINDOWS_SEGMENT_CHARS = new Set(['<', '>', ':', '"', '/', '\\', '|', '?', '*']) +const TRAILING_WINDOWS_SEGMENT_CHARS = /[. ]+$/g + export function getSessionsRoot(): string { return path.resolve(app.getPath('home'), '.deepchat', 'sessions') } @@ -29,7 +32,7 @@ export function resolveToolOffloadPath(conversationId: string, toolCallId: strin return null } - const safeToolCallId = toolCallId.replace(/[\\/]/g, '_') + const safeToolCallId = sanitizeToolCallIdForOffload(toolCallId) return path.join(sessionDir, `tool_${safeToolCallId}.offload`) } @@ -41,3 +44,18 @@ export function resolveToolOffloadTemplatePath(conversationId: string): string | return path.join(sessionDir, 'tool_.offload') } + +function sanitizeToolCallIdForOffload(toolCallId: string): string { + const sanitized = Array.from(toolCallId.trim(), (char) => { + const charCode = char.charCodeAt(0) + if (charCode <= 0x1f || INVALID_WINDOWS_SEGMENT_CHARS.has(char)) { + return '_' + } + + return char + }) + .join('') + .replace(TRAILING_WINDOWS_SEGMENT_CHARS, '') + + return sanitized || 'tool_call' +} diff --git a/test/main/lib/agentRuntime/sessionPaths.test.ts b/test/main/lib/agentRuntime/sessionPaths.test.ts new file mode 100644 index 000000000..69f8a0d22 --- /dev/null +++ b/test/main/lib/agentRuntime/sessionPaths.test.ts @@ -0,0 +1,35 @@ +import path from 'path' +import { app } from 'electron' +import { afterEach, describe, expect, it, vi } from 'vitest' +import { resolveToolOffloadPath } from '@/lib/agentRuntime/sessionPaths' + +describe('sessionPaths offload path sanitization', () => { + const homeDir = path.join('C:', 'Users', 'tester') + + afterEach(() => { + vi.restoreAllMocks() + }) + + it('sanitizes colon-based tool call ids into a normal .offload file name', () => { + vi.spyOn(app, 'getPath').mockReturnValue(homeDir) + + const filePath = resolveToolOffloadPath('session-a', 'function.cdp_send:11') + + expect(filePath).toBe( + path.join(homeDir, '.deepchat', 'sessions', 'session-a', 'tool_function.cdp_send_11.offload') + ) + expect(path.basename(filePath!)).not.toContain(':') + expect(path.basename(filePath!)).toMatch(/\.offload$/) + }) + + it('sanitizes other windows-invalid characters and trailing dots or spaces', () => { + vi.spyOn(app, 'getPath').mockReturnValue(homeDir) + + const filePath = resolveToolOffloadPath('session-a', 'bad<>:"/\\\\|?*\u0001name. ') + const fileName = path.basename(filePath!) + + expect(fileName).toMatch(/^tool_[^<>:"/\\|?*\u0000-\u001f]+\.offload$/) + expect(fileName).not.toContain(':') + expect(fileName).not.toMatch(/[. ]\.offload$/) + }) +}) diff --git a/test/main/presenter/agentPresenter/loop/toolCallProcessor.test.ts b/test/main/presenter/agentPresenter/loop/toolCallProcessor.test.ts index 15668d05a..b6efea844 100644 --- a/test/main/presenter/agentPresenter/loop/toolCallProcessor.test.ts +++ b/test/main/presenter/agentPresenter/loop/toolCallProcessor.test.ts @@ -65,7 +65,7 @@ describe('ToolCallProcessor tool output offload', () => { const events: any[] = [] for await (const event of processor.process({ eventId: 'event-1', - toolCalls: [{ id: 'tool-1', name: 'exec', arguments: '{}' }], + toolCalls: [{ id: 'tool:1', name: 'exec', arguments: '{}' }], enabledMcpTools: [], conversationMessages, modelConfig, @@ -88,7 +88,7 @@ describe('ToolCallProcessor tool output offload', () => { '.deepchat', 'sessions', conversationId, - 'tool_tool-1.offload' + 'tool_tool_1.offload' ) expect(stub).toContain('[Tool output offloaded]') expect(stub).toContain(`Total characters: ${longOutput.length}`) diff --git a/test/main/presenter/deepchatAgentPresenter/dispatch.test.ts b/test/main/presenter/deepchatAgentPresenter/dispatch.test.ts index eba6f92d7..f97f91353 100644 --- a/test/main/presenter/deepchatAgentPresenter/dispatch.test.ts +++ b/test/main/presenter/deepchatAgentPresenter/dispatch.test.ts @@ -553,7 +553,7 @@ describe('dispatch', () => { status: 'pending', timestamp: Date.now(), tool_call: { - id: 'tc1', + id: 'function.cdp_send:11', name: 'cdp_send', params: '{"method":"Page.captureScreenshot"}', response: '' @@ -561,7 +561,7 @@ describe('dispatch', () => { }) state.completedToolCalls = [ { - id: 'tc1', + id: 'function.cdp_send:11', name: 'cdp_send', arguments: '{"method":"Page.captureScreenshot"}' } @@ -584,7 +584,8 @@ describe('dispatch', () => { expect(executed.terminalError).toBeUndefined() const toolMessage = conversation.find((message: any) => message.role === 'tool') expect(toolMessage.content).toContain('[Tool output offloaded]') - expect(toolMessage.content).toContain('tool_tc1.offload') + expect(toolMessage.content).toContain('tool_function.cdp_send_11.offload') + expect(toolMessage.content).not.toContain(':11.offload') expect(toolMessage.content).not.toContain(tempHome!) expect(state.blocks[0].tool_call?.response).toContain('[Tool output offloaded]') expect(state.blocks[0].status).toBe('success') diff --git a/test/main/presenter/deepchatAgentPresenter/process.test.ts b/test/main/presenter/deepchatAgentPresenter/process.test.ts index e69aea7f7..ed4753515 100644 --- a/test/main/presenter/deepchatAgentPresenter/process.test.ts +++ b/test/main/presenter/deepchatAgentPresenter/process.test.ts @@ -217,12 +217,12 @@ describe('processStream', () => { return (async function* () { yield { type: 'tool_call_start', - tool_call_id: 'tc1', + tool_call_id: 'function.cdp_send:11', tool_call_name: 'cdp_send' } as LLMCoreStreamEvent yield { type: 'tool_call_end', - tool_call_id: 'tc1', + tool_call_id: 'function.cdp_send:11', tool_call_arguments_complete: '{"method":"Page.captureScreenshot"}' } as LLMCoreStreamEvent yield { type: 'stop', stop_reason: 'tool_use' } as LLMCoreStreamEvent @@ -248,7 +248,8 @@ describe('processStream', () => { const secondCallMessages = (coreStream as ReturnType).mock.calls[1][0] const toolResultMsg = secondCallMessages.find((m: any) => m.role === 'tool') expect(toolResultMsg.content).toContain('[Tool output offloaded]') - expect(toolResultMsg.content).toContain('tool_tc1.offload') + expect(toolResultMsg.content).toContain('tool_function.cdp_send_11.offload') + expect(toolResultMsg.content).not.toContain(':11.offload') expect(toolResultMsg.content).not.toContain(tempHome!) }) From 4afc8747c7e2343c59d10fdfa20f9f4d5625b737 Mon Sep 17 00:00:00 2001 From: zerob13 Date: Tue, 17 Mar 2026 22:51:34 +0800 Subject: [PATCH 3/4] fix(floating-button): stabilize drag sizing --- .../floatingButtonPresenter/index.ts | 77 +++-- .../floatingButtonPresenter/index.test.ts | 274 ++++++++++++++++++ 2 files changed, 332 insertions(+), 19 deletions(-) create mode 100644 test/main/presenter/floatingButtonPresenter/index.test.ts diff --git a/src/main/presenter/floatingButtonPresenter/index.ts b/src/main/presenter/floatingButtonPresenter/index.ts index 5ad81bc0c..7132f9ed2 100644 --- a/src/main/presenter/floatingButtonPresenter/index.ts +++ b/src/main/presenter/floatingButtonPresenter/index.ts @@ -29,6 +29,8 @@ type DragRuntimeState = { startY: number windowX: number windowY: number + windowWidth: number + windowHeight: number } export class FloatingButtonPresenter { @@ -37,6 +39,8 @@ export class FloatingButtonPresenter { private configPresenter: IConfigPresenter private snapshot: FloatingWidgetSnapshot = { ...EMPTY_SNAPSHOT } private layoutAnimationTimer: ReturnType | null = null + private isDragging = false + private pendingLayoutSync = false constructor(configPresenter: IConfigPresenter) { this.configPresenter = configPresenter @@ -75,6 +79,8 @@ export class FloatingButtonPresenter { public destroy(): void { this.config.enabled = false this.snapshot = { ...EMPTY_SNAPSHOT } + this.isDragging = false + this.pendingLayoutSync = false this.stopLayoutAnimation() ipcMain.removeHandler(FLOATING_BUTTON_EVENTS.SNAPSHOT_REQUEST) @@ -250,11 +256,18 @@ export class FloatingButtonPresenter { return } + this.stopLayoutAnimation() + const stableBounds = this.getSnapshotBounds(bounds) + this.floatingWindow.setBounds(stableBounds) + this.isDragging = true + dragState = { startX: x, startY: y, - windowX: bounds.x, - windowY: bounds.y + windowX: stableBounds.x, + windowY: stableBounds.y, + windowWidth: stableBounds.width, + windowHeight: stableBounds.height } }) @@ -263,39 +276,44 @@ export class FloatingButtonPresenter { return } - const bounds = this.floatingWindow.getBounds() - if (!bounds) { - return - } - const deltaX = x - dragState.startX const deltaY = y - dragState.startY this.floatingWindow.setBounds({ x: dragState.windowX + deltaX, y: dragState.windowY + deltaY, - width: bounds.width, - height: bounds.height + width: dragState.windowWidth, + height: dragState.windowHeight }) }) ipcMain.on(FLOATING_BUTTON_EVENTS.DRAG_END, () => { if (!dragState || !this.floatingWindow?.exists()) { + this.isDragging = false dragState = null return } const bounds = this.floatingWindow.getBounds() if (!bounds) { + this.isDragging = false dragState = null return } - const currentDisplay = screen.getDisplayMatching(bounds) - const snapped = snapWidgetBoundsToEdge(bounds, currentDisplay.workArea) + const stableBounds = { + x: bounds.x, + y: bounds.y, + width: dragState.windowWidth, + height: dragState.windowHeight + } + const currentDisplay = screen.getDisplayMatching(stableBounds) + const snapped = snapWidgetBoundsToEdge(stableBounds, currentDisplay.workArea) this.floatingWindow.setDockSide(snapped.dockSide) this.floatingWindow.setBounds(snapped) + this.isDragging = false dragState = null + this.flushPendingLayoutSync() }) } @@ -321,19 +339,17 @@ export class FloatingButtonPresenter { return } + if (this.isDragging) { + this.pendingLayoutSync = true + return + } + const bounds = this.floatingWindow.getBounds() if (!bounds) { return } - const currentDisplay = screen.getDisplayMatching(bounds) - const dockSide = this.floatingWindow.getDockSide() - const nextBounds = repositionWidgetForResize( - bounds, - getWidgetSizeForSnapshot(this.snapshot), - currentDisplay.workArea, - dockSide - ) + const nextBounds = this.getSnapshotBounds(bounds) if (!animate || this.areBoundsEqual(bounds, nextBounds)) { this.stopLayoutAnimation() @@ -395,6 +411,29 @@ export class FloatingButtonPresenter { } } + private flushPendingLayoutSync(): void { + if (!this.pendingLayoutSync) { + return + } + + this.pendingLayoutSync = false + this.applyWindowLayout() + } + + private getSnapshotBounds(bounds: WidgetRect): WidgetRect { + if (!this.floatingWindow) { + return bounds + } + + const currentDisplay = screen.getDisplayMatching(bounds) + return repositionWidgetForResize( + bounds, + getWidgetSizeForSnapshot(this.snapshot), + currentDisplay.workArea, + this.floatingWindow.getDockSide() + ) + } + private easeInOutCubic(progress: number): number { return progress < 0.5 ? 4 * progress * progress * progress diff --git a/test/main/presenter/floatingButtonPresenter/index.test.ts b/test/main/presenter/floatingButtonPresenter/index.test.ts new file mode 100644 index 000000000..d3821c44b --- /dev/null +++ b/test/main/presenter/floatingButtonPresenter/index.test.ts @@ -0,0 +1,274 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' +import { FLOATING_BUTTON_EVENTS } from '../../../../src/main/events' +import { + getCollapsedWidgetSize, + getExpandedWidgetSize +} from '../../../../src/main/presenter/floatingButtonPresenter/layout' +import type { SessionWithState } from '../../../../src/shared/types/agent-interface' + +const { electronState, floatingWindowState, presenterState, sendToRendererMock, menuPopupMock } = + vi.hoisted(() => { + const eventHandlers = new Map unknown>() + const invokeHandlers = new Map unknown>() + const workArea = { + x: 0, + y: 0, + width: 1200, + height: 900 + } + + const floatingWindowState = { + bounds: { x: 1136, y: 180, width: 64, height: 64 }, + dockSide: 'right' as 'left' | 'right', + exists: true, + instance: null as null | { + create: ReturnType + show: ReturnType + destroy: ReturnType + exists: ReturnType + getState: ReturnType + getBounds: ReturnType + setBounds: ReturnType + getDockSide: ReturnType + setDockSide: ReturnType + getWindow: ReturnType + }, + reset() { + this.bounds = { x: 1136, y: 180, width: 64, height: 64 } + this.dockSide = 'right' + this.exists = true + this.instance = null + } + } + + const presenterState = { + sessions: [] as SessionWithState[], + reset() { + this.sessions = [] + } + } + + const sendToRendererMock = vi.fn() + const menuPopupMock = vi.fn() + + return { + electronState: { + workArea, + eventHandlers, + invokeHandlers, + reset() { + eventHandlers.clear() + invokeHandlers.clear() + } + }, + floatingWindowState, + presenterState, + sendToRendererMock, + menuPopupMock + } + }) + +vi.mock('electron', () => ({ + BrowserWindow: class BrowserWindow {}, + ipcMain: { + on: vi.fn((channel: string, handler: (...args: unknown[]) => unknown) => { + electronState.eventHandlers.set(channel, handler) + }), + handle: vi.fn((channel: string, handler: (...args: unknown[]) => unknown) => { + electronState.invokeHandlers.set(channel, handler) + }), + removeHandler: vi.fn((channel: string) => { + electronState.invokeHandlers.delete(channel) + }), + removeAllListeners: vi.fn((channel: string) => { + electronState.eventHandlers.delete(channel) + }) + }, + screen: { + getDisplayMatching: vi.fn(() => ({ + workArea: electronState.workArea + })) + }, + Menu: { + buildFromTemplate: vi.fn(() => ({ + popup: menuPopupMock + })) + }, + app: { + quit: vi.fn() + } +})) + +vi.mock('../../../../src/main/presenter/floatingButtonPresenter/FloatingButtonWindow', () => ({ + FloatingButtonWindow: class MockFloatingButtonWindow { + public create = vi.fn().mockResolvedValue(undefined) + public show = vi.fn() + public destroy = vi.fn() + public exists = vi.fn(() => floatingWindowState.exists) + public getState = vi.fn(() => null) + public getBounds = vi.fn(() => ({ ...floatingWindowState.bounds })) + public setBounds = vi.fn((bounds) => { + floatingWindowState.bounds = { ...bounds } + }) + public getDockSide = vi.fn(() => floatingWindowState.dockSide) + public setDockSide = vi.fn((dockSide: 'left' | 'right') => { + floatingWindowState.dockSide = dockSide + }) + public getWindow = vi.fn(() => ({ + isDestroyed: () => false, + webContents: { + id: 1, + send: sendToRendererMock + } + })) + + constructor() { + floatingWindowState.instance = this as unknown as typeof floatingWindowState.instance + } + } +})) + +vi.mock('../../../../src/main/presenter/index', () => ({ + presenter: { + newAgentPresenter: { + getSessionList: vi.fn(async () => presenterState.sessions), + activateSession: vi.fn() + }, + windowPresenter: { + mainWindow: null, + getAllWindows: vi.fn(() => []), + getFocusedWindow: vi.fn(() => null), + createAppWindow: vi.fn(async () => null), + show: vi.fn() + }, + tabPresenter: { + getWindowType: vi.fn(() => 'chat') + } + } +})) + +import { FloatingButtonPresenter } from '../../../../src/main/presenter/floatingButtonPresenter' + +describe('FloatingButtonPresenter drag layout sync', () => { + let floatingPresenter: FloatingButtonPresenter | null = null + + const createConfigPresenter = () => + ({ + getFloatingButtonEnabled: vi.fn(() => true), + getLanguage: vi.fn(() => 'zh-CN'), + getCurrentThemeIsDark: vi.fn(async () => false) + }) as any + + const emitEvent = async (channel: string, payload?: unknown) => { + const handler = electronState.eventHandlers.get(channel) + if (!handler) { + throw new Error(`Missing IPC handler for ${channel}`) + } + + return await handler({}, payload) + } + + beforeEach(() => { + vi.useFakeTimers() + electronState.reset() + floatingWindowState.reset() + presenterState.reset() + sendToRendererMock.mockReset() + menuPopupMock.mockReset() + }) + + afterEach(async () => { + floatingPresenter?.destroy() + floatingPresenter = null + await vi.runOnlyPendingTimersAsync() + vi.useRealTimers() + }) + + it('keeps the collapsed size stable when dragging interrupts collapse animation', async () => { + floatingPresenter = new FloatingButtonPresenter(createConfigPresenter()) + await floatingPresenter.initialize() + + await emitEvent(FLOATING_BUTTON_EVENTS.SET_EXPANDED, true) + await vi.advanceTimersByTimeAsync(400) + + await emitEvent(FLOATING_BUTTON_EVENTS.SET_EXPANDED, false) + await vi.advanceTimersByTimeAsync(160) + + expect(floatingWindowState.bounds.width).toBeGreaterThan(getCollapsedWidgetSize(0).width) + + await emitEvent(FLOATING_BUTTON_EVENTS.DRAG_START, { x: 100, y: 100 }) + expect(floatingWindowState.bounds).toMatchObject({ + width: getCollapsedWidgetSize(0).width, + height: getCollapsedWidgetSize(0).height + }) + + await emitEvent(FLOATING_BUTTON_EVENTS.DRAG_MOVE, { x: 220, y: 150 }) + expect(floatingWindowState.bounds).toMatchObject({ + x: 1256, + y: 230, + width: getCollapsedWidgetSize(0).width, + height: getCollapsedWidgetSize(0).height + }) + + await emitEvent(FLOATING_BUTTON_EVENTS.DRAG_END) + expect(floatingWindowState.bounds).toMatchObject({ + x: electronState.workArea.x + electronState.workArea.width - getCollapsedWidgetSize(0).width, + y: 230, + width: getCollapsedWidgetSize(0).width, + height: getCollapsedWidgetSize(0).height + }) + }) + + it('defers layout changes during drag and applies the latest snapshot after drop', async () => { + floatingPresenter = new FloatingButtonPresenter(createConfigPresenter()) + await floatingPresenter.initialize() + + await emitEvent(FLOATING_BUTTON_EVENTS.DRAG_START, { x: 80, y: 90 }) + + await emitEvent(FLOATING_BUTTON_EVENTS.SET_EXPANDED, true) + presenterState.sessions = [ + { + id: 'session-1', + agentId: 'deepchat', + title: 'Session 1', + projectDir: null, + isPinned: false, + isDraft: false, + createdAt: 1, + updatedAt: 10, + status: 'idle', + providerId: 'openai', + modelId: 'gpt-5.4' + }, + { + id: 'session-2', + agentId: 'deepchat', + title: 'Session 2', + projectDir: null, + isPinned: false, + isDraft: false, + createdAt: 2, + updatedAt: 11, + status: 'generating', + providerId: 'openai', + modelId: 'gpt-5.4' + } + ] + + await floatingPresenter.refreshWidgetState() + expect(floatingWindowState.bounds).toMatchObject({ + width: getCollapsedWidgetSize(0).width, + height: getCollapsedWidgetSize(0).height + }) + + await emitEvent(FLOATING_BUTTON_EVENTS.DRAG_MOVE, { x: 140, y: 170 }) + await emitEvent(FLOATING_BUTTON_EVENTS.DRAG_END) + + expect(floatingWindowState.bounds).toMatchObject({ + x: electronState.workArea.x + electronState.workArea.width - getExpandedWidgetSize(2).width, + y: 260, + width: getExpandedWidgetSize(2).width, + height: getExpandedWidgetSize(2).height + }) + }) +}) From c1e10f6a215f7ace21f4461d501ac3d80f2c6ab6 Mon Sep 17 00:00:00 2001 From: zerob13 Date: Tue, 17 Mar 2026 23:30:04 +0800 Subject: [PATCH 4/4] fix(agent-runtime): avoid offload id collisions --- src/main/lib/agentRuntime/sessionPaths.ts | 6 ++++- .../lib/agentRuntime/sessionPaths.test.ts | 24 +++++++++++++++++-- .../floatingButtonPresenter/index.test.ts | 4 +++- 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/src/main/lib/agentRuntime/sessionPaths.ts b/src/main/lib/agentRuntime/sessionPaths.ts index 92dcf6be7..fcafdda53 100644 --- a/src/main/lib/agentRuntime/sessionPaths.ts +++ b/src/main/lib/agentRuntime/sessionPaths.ts @@ -1,3 +1,4 @@ +import { createHash } from 'crypto' import { app } from 'electron' import path from 'path' @@ -46,7 +47,7 @@ export function resolveToolOffloadTemplatePath(conversationId: string): string | } function sanitizeToolCallIdForOffload(toolCallId: string): string { - const sanitized = Array.from(toolCallId.trim(), (char) => { + const sanitizedBase = Array.from(toolCallId.trim(), (char) => { const charCode = char.charCodeAt(0) if (charCode <= 0x1f || INVALID_WINDOWS_SEGMENT_CHARS.has(char)) { return '_' @@ -57,5 +58,8 @@ function sanitizeToolCallIdForOffload(toolCallId: string): string { .join('') .replace(TRAILING_WINDOWS_SEGMENT_CHARS, '') + const fingerprint = createHash('sha1').update(toolCallId).digest('hex').slice(0, 8) + const sanitized = [sanitizedBase || 'tool_call', fingerprint].filter(Boolean).join('_') + return sanitized || 'tool_call' } diff --git a/test/main/lib/agentRuntime/sessionPaths.test.ts b/test/main/lib/agentRuntime/sessionPaths.test.ts index 69f8a0d22..09cf5197b 100644 --- a/test/main/lib/agentRuntime/sessionPaths.test.ts +++ b/test/main/lib/agentRuntime/sessionPaths.test.ts @@ -1,3 +1,4 @@ +import { createHash } from 'crypto' import path from 'path' import { app } from 'electron' import { afterEach, describe, expect, it, vi } from 'vitest' @@ -13,10 +14,18 @@ describe('sessionPaths offload path sanitization', () => { it('sanitizes colon-based tool call ids into a normal .offload file name', () => { vi.spyOn(app, 'getPath').mockReturnValue(homeDir) - const filePath = resolveToolOffloadPath('session-a', 'function.cdp_send:11') + const toolCallId = 'function.cdp_send:11' + const fingerprint = createHash('sha1').update(toolCallId).digest('hex').slice(0, 8) + const filePath = resolveToolOffloadPath('session-a', toolCallId) expect(filePath).toBe( - path.join(homeDir, '.deepchat', 'sessions', 'session-a', 'tool_function.cdp_send_11.offload') + path.join( + homeDir, + '.deepchat', + 'sessions', + 'session-a', + `tool_function.cdp_send_11_${fingerprint}.offload` + ) ) expect(path.basename(filePath!)).not.toContain(':') expect(path.basename(filePath!)).toMatch(/\.offload$/) @@ -32,4 +41,15 @@ describe('sessionPaths offload path sanitization', () => { expect(fileName).not.toContain(':') expect(fileName).not.toMatch(/[. ]\.offload$/) }) + + it('adds a fingerprint so colliding sanitized tool ids still map to different files', () => { + vi.spyOn(app, 'getPath').mockReturnValue(homeDir) + + const colonFilePath = resolveToolOffloadPath('session-a', 'tool:1') + const slashFilePath = resolveToolOffloadPath('session-a', 'tool/1') + + expect(path.basename(colonFilePath!)).toMatch(/^tool_tool_1_[0-9a-f]{8}\.offload$/) + expect(path.basename(slashFilePath!)).toMatch(/^tool_tool_1_[0-9a-f]{8}\.offload$/) + expect(colonFilePath).not.toBe(slashFilePath) + }) }) diff --git a/test/main/presenter/floatingButtonPresenter/index.test.ts b/test/main/presenter/floatingButtonPresenter/index.test.ts index d3821c44b..30e22f8d2 100644 --- a/test/main/presenter/floatingButtonPresenter/index.test.ts +++ b/test/main/presenter/floatingButtonPresenter/index.test.ts @@ -68,8 +68,10 @@ const { electronState, floatingWindowState, presenterState, sendToRendererMock, } }) +const BrowserWindow = vi.hoisted(() => class BrowserWindow {}) + vi.mock('electron', () => ({ - BrowserWindow: class BrowserWindow {}, + BrowserWindow, ipcMain: { on: vi.fn((channel: string, handler: (...args: unknown[]) => unknown) => { electronState.eventHandlers.set(channel, handler)