diff --git a/.vscode/extensions.json b/.vscode/extensions.json index fe1ca403b..347bc4222 100644 --- a/.vscode/extensions.json +++ b/.vscode/extensions.json @@ -1,3 +1,3 @@ { - "recommendations": ["dbaeumer.vscode-eslint", "lokalise.i18n-ally", "esbenp.prettier-vscode"] + "recommendations": ["dbaeumer.vscode-eslint", "lokalise.i18n-ally", "esbenp.prettier-vscode", "TypeScriptTeam.native-preview"] } diff --git a/.vscode/settings.json b/.vscode/settings.json index 77acfbec6..a476674e2 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -5,5 +5,7 @@ "i18n-ally.keystyle": "nested", "i18n-ally.sourceLanguage": "zh-CN", "i18n-ally.namespace": true, - "i18n-ally.pathMatcher": "{locale}/{namespaces}.json" + "i18n-ally.pathMatcher": "{locale}/{namespaces}.json", + "unocss.disable": true, + "typescript.experimental.useTsgo": true } diff --git a/docs/architecture/tool-system.md b/docs/architecture/tool-system.md index 9403012da..f73c17213 100644 --- a/docs/architecture/tool-system.md +++ b/docs/architecture/tool-system.md @@ -29,7 +29,7 @@ graph TB AgentToolMgr[AgentToolManager] FsHandler[AgentFileSystemHandler] - Browser[Yo Browser Tools] + YoBrowser[Yo Browser CDP] end subgraph "外部服务" @@ -48,10 +48,10 @@ graph TB McpClient --> MCPServers AgentToolMgr --> FsHandler - AgentToolMgr --> Browser + AgentToolMgr --> YoBrowser FsHandler --> Files - Browser --> Web + YoBrowser --> Web classDef router fill:#e3f2fd classDef mcp fill:#fff3e0 @@ -60,7 +60,7 @@ graph TB class ToolP,Mapper router class McpP,ServerMgr,ToolMgr,McpClient mcp - class AgentToolMgr,FsHandler,Browser agent + class AgentToolMgr,FsHandler,YoBrowser agent class MCPServers,Files,Web external ``` @@ -622,23 +622,20 @@ class AgentFileSystemHandler { 3. **边界检查**:防止 `../` 越界访问 4. **正则验证**:`grep_search` 和 `text_replace` 使用 `validateRegexPattern` 防 ReDoS -### Browser 工具 +### YoBrowser CDP 工具 -```typescript -// 通过 Yo Browser Presenter 调用 -async callBrowserTool(toolName: string, args: any): Promise { - switch (toolName) { - case 'browser_navigate': - return await this.yoBrowserPresenter.navigate(args.url) - case 'browser_scrape': - return await this.yoBrowserPresenter.scrape(args.url) - case 'browser_screenshot': - return await this.yoBrowserPresenter.screenshot(args.url) - default: - throw new Error(`未知的 Browser 工具: ${toolName}`) - } -} -``` +YoBrowser 提供基于 Chrome DevTools Protocol (CDP) 的最小工具集,在 agent 模式下直接可用。 + +**可用工具**: +- `yo_browser_tab_list` - 列出所有浏览器 tabs +- `yo_browser_tab_new` - 创建新 tab +- `yo_browser_tab_activate` - 激活指定 tab +- `yo_browser_tab_close` - 关闭 tab +- `yo_browser_cdp_send` - 发送 CDP 命令 + +**安全约束**: +- `local://` URL 禁止 CDP attach(在 `BrowserTab.ensureSession()` 中检查) +- 所有 CDP 命令通过 `webContents.debugger.sendCommand()` 执行 ## 🔐 权限系统 diff --git a/docs/archives/workspace-agent-refactoring-summary.md b/docs/archives/workspace-agent-refactoring-summary.md index 0634495da..bc9d193e0 100644 --- a/docs/archives/workspace-agent-refactoring-summary.md +++ b/docs/archives/workspace-agent-refactoring-summary.md @@ -196,7 +196,7 @@ graph TB - MCP 工具:保持原始命名 - Agent FileSystem 工具:不加前缀(`read_file` 等) -- Yo Browser:保留 `browser_` 前缀 +- Yo Browser:使用 `yo_browser_` 前缀 ### 工具路由机制 diff --git a/docs/specs/agent-provider-simplification/plan.md b/docs/specs/agent-provider-simplification/plan.md new file mode 100644 index 000000000..1820f65a5 --- /dev/null +++ b/docs/specs/agent-provider-simplification/plan.md @@ -0,0 +1,68 @@ +# Plan: Agent Provider Simplification (ACP-only) + +## Summary + +Replace the “agent provider” abstraction and detection logic with a single explicit rule: **ACP is the only agent provider and is identified by `providerId === 'acp'`.** + +## Current Call Flow (relevant parts) + +- Main: + - `ProviderInstanceManager.createProviderInstance()` already special-cases `provider.id === 'acp'`. + - `ProviderInstanceManager.isAgentProvider()` uses `instanceof BaseAgentProvider` and (if instance not created) a constructor prototype check (`isAgentConstructor`). + - `LLMProviderPresenter.isAgentProvider()` exposes this to the renderer via `ILlmProviderPresenter`. +- Renderer: + - `src/renderer/src/stores/modelStore.ts` calls `llmproviderPresenter.isAgentProvider(providerId)` over IPC to choose between: + - `agentModelStore.refreshAgentModels(providerId)` (ACP path) + - `refreshStandardModels + refreshCustomModels` (standard path) + - Other renderer logic already treats ACP as special via `provider.id === 'acp'`. + +## Proposed Changes + +### 1) Remove agent-provider classification API + +- Remove `isAgentProvider(providerId: string)` from: + - `src/shared/types/presenters/llmprovider.presenter.d.ts` + - `src/shared/types/presenters/legacy.presenters.d.ts` + - `src/main/presenter/llmProviderPresenter/index.ts` + - `src/main/presenter/llmProviderPresenter/managers/providerInstanceManager.ts` + +Rationale: It is only used by the renderer for ACP gating, and ACP can be identified locally by ID. + +### 2) Replace renderer gating with an explicit ACP check + +- In `src/renderer/src/stores/modelStore.ts`: + - Remove the async IPC call `llmP.isAgentProvider(providerId)`. + - Replace with a local predicate: `providerId === 'acp'`. + - Keep the existing ACP refresh path using `agentModelStore.refreshAgentModels('acp')` (no behavioral change). + +### 3) Remove `BaseAgentProvider` (optional but preferred) + +Because `BaseAgentProvider` is only used by `AcpProvider`, delete the base class and: + +- Make `AcpProvider` extend `BaseLLMProvider` directly. +- Move `cleanup()` logic into `AcpProvider` (or delegate to `AcpSessionManager` / `AcpProcessManager`). +- Ensure `cleanup()` is safe to call multiple times and during shutdown. + +Notes: +- `acpCleanupHook` currently awaits `cleanup()` even though `BaseAgentProvider.cleanup()` is `void`. Consider standardizing ACP cleanup to `Promise` to match usage. + +## Compatibility / Migration + +- No user data migration. +- Provider ID `acp` remains unchanged and is treated as a stable internal contract. +- Any internal IPC typing generation must be updated to reflect removal of `isAgentProvider`. + +## Test Strategy + +Add minimal tests focusing on the only behavioral dependency (renderer model refresh selection): + +- Renderer unit test for `modelStore.refreshProviderModels()`: + - When `providerId === 'acp'`, it uses `agentModelStore.refreshAgentModels`. + - When `providerId !== 'acp'`, it uses standard refresh path. + +Main-process unit tests are optional; the change is mostly removal and ACP-id checks. + +## Rollout + +Single PR is acceptable if changes stay localized (types + modelStore + ACP provider base class cleanup). + diff --git a/docs/specs/agent-provider-simplification/spec.md b/docs/specs/agent-provider-simplification/spec.md new file mode 100644 index 000000000..2c6aa6579 --- /dev/null +++ b/docs/specs/agent-provider-simplification/spec.md @@ -0,0 +1,43 @@ +# Agent Provider Simplification (ACP-only) + +## Background + +DeepChat currently distinguishes between: + +- **LLM providers**: network-backed providers that implement `BaseLLMProvider` (OpenAI/Anthropic/etc). +- **Agent providers**: providers that manage local agent sessions/processes (currently only `acp` via `AcpProvider`). + +The codebase implements this distinction via a dedicated base class (`BaseAgentProvider`) and a runtime/type-detection API (`isAgentProvider`), which is then consumed from the renderer via IPC. + +## Problem + +- `BaseAgentProvider` is only used by `AcpProvider`, so the abstraction adds indirection without real reuse. +- Provider type detection is over-engineered (`isAgentConstructor` + prototype checks) and duplicates existing ACP-specific branching. +- The renderer calls `llmproviderPresenter.isAgentProvider(providerId)` over IPC, but the only “agent provider” is `providerId === 'acp'`. This creates unnecessary main↔renderer coupling and call complexity. + +## Goals + +- Treat **ACP as the only agent provider** and identify it **only by `providerId === 'acp'`**. +- Remove the generic “agent provider type detection” path and the renderer IPC dependency for this decision. +- Keep user-visible behavior unchanged: + - ACP agents still appear as selectable models when ACP is enabled. + - Non-ACP providers keep the standard model/custom-model refresh behavior. + - Shutdown and provider disable still clean up ACP resources. + +## Non-goals + +- Supporting multiple agent providers beyond ACP. +- Redesigning ACP model derivation (agents-as-models) or session/workspace semantics. +- Changing persisted provider IDs or stored settings schemas. + +## Acceptance Criteria + +- Renderer no longer calls `llmproviderPresenter.isAgentProvider(...)`; ACP decision is local (`providerId === 'acp'`). +- Main process no longer needs `isAgentConstructor` / prototype-based provider classification. +- No remaining runtime dependency on `BaseAgentProvider` for correctness (ACP cleanup remains correct). +- `pnpm run typecheck`, `pnpm test`, `pnpm run lint` pass. + +## Open Questions + +- None. + diff --git a/docs/specs/agent-provider-simplification/tasks.md b/docs/specs/agent-provider-simplification/tasks.md new file mode 100644 index 000000000..9b6918842 --- /dev/null +++ b/docs/specs/agent-provider-simplification/tasks.md @@ -0,0 +1,26 @@ +# Tasks: Agent Provider Simplification (ACP-only) + +1. Update renderer to stop using IPC for agent-provider detection + - Remove `llmproviderPresenter.isAgentProvider` usage from `src/renderer/src/stores/modelStore.ts`. + - Gate ACP behavior by `providerId === 'acp'`. + +2. Remove `isAgentProvider` from the presenter contract + - Remove from `src/shared/types/presenters/llmprovider.presenter.d.ts`. + - Remove from `src/shared/types/presenters/legacy.presenters.d.ts`. + - Remove implementation from `src/main/presenter/llmProviderPresenter/index.ts`. + +3. Remove main-side agent-provider classification implementation + - Delete `ProviderInstanceManager.isAgentProvider()` and `isAgentConstructor()` in `src/main/presenter/llmProviderPresenter/managers/providerInstanceManager.ts`. + - Ensure no other code path depends on `BaseAgentProvider` type checks. + +4. Remove `BaseAgentProvider` abstraction (preferred) + - Delete `src/main/presenter/llmProviderPresenter/baseAgentProvider.ts`. + - Update `src/main/presenter/llmProviderPresenter/providers/acpProvider.ts` to extend `BaseLLMProvider` directly. + - Keep/adjust ACP cleanup semantics (safe shutdown, provider disable, app quit). + +5. Add/adjust tests + - Add a Vitest suite under `test/renderer/**` validating model refresh selection for ACP vs non-ACP. + +6. Quality gates + - Run `pnpm run format`, `pnpm run lint`, `pnpm run typecheck`, and `pnpm test`. + diff --git a/docs/specs/chat-settings-control/plan.md b/docs/specs/chat-settings-control/plan.md new file mode 100644 index 000000000..d76665b0c --- /dev/null +++ b/docs/specs/chat-settings-control/plan.md @@ -0,0 +1,122 @@ +# Plan: Control Settings via Chat + +## Key Decision: Skill-Based Context Control + +This feature MUST be described and delivered as a DeepChat skill so that additional instructions/context are only injected when the user actually requests to change DeepChat settings. + +- Skill Name (suggested): `deepchat-settings` +- Activation: Activated via `skill_control` **ONLY** when the user request involves DeepChat settings/preferences. +- Deactivation: Call `skill_control` after completing the setting change to keep context lean. + +## Tool Injection Control (No Skill, No Tools) + +Configuration-related tools MUST NOT appear in the LLM tool list (and MUST NOT be mentioned in the system prompt) unless the `deepchat-settings` skill is active. + +Implementation intent: + +- Define dedicated tools (MCP-format function definitions): + - `deepchat_settings_toggle` + - `deepchat_settings_set_language` + - `deepchat_settings_set_theme` + - `deepchat_settings_set_font_size` + - `deepchat_settings_open` +- **DO NOT** expose them through MCP server/tool list UI (avoid being auto-enabled into `enabledMcpTools`). +- Only inject these tool definitions when: + - `deepchat-settings` is enabled for the current conversation, AND + - The skill's pre-metadata `allowedTools` includes the tool name. + +This requires conversation-scoped tool definition construction: + +- Extend tool definition construction context to include `conversationId`. +- Retrieve `skillsAllowedTools` for that conversation (via `SkillPresenter.getActiveSkillsAllowedTools`). +- Only conditionally append `deepchat_settings_*` tool definitions when allowed. + +## Step 1: Safe Settings Application API (Main Process) + +### Entry Point + +Implement a narrow, validated application surface in the main process (presenter method or agent tool handler) for: + +- Accepting `unknown` input and validating it (Zod-style, similar to `AgentFileSystemHandler`). +- Using an allowlist of setting IDs. +- Applying changes by calling existing `ConfigPresenter` methods so existing event broadcasts remain correct. +- Returning structured results to render confirmation/error messages. + +### Allowlisted Settings and Mapping + +Toggle settings: + +- `soundEnabled` -> `ConfigPresenter.setSoundEnabled(boolean)` (broadcasts: `CONFIG_EVENTS.SOUND_ENABLED_CHANGED`) +- `copyWithCotEnabled` -> `ConfigPresenter.setCopyWithCotEnabled(boolean)` (broadcasts: `CONFIG_EVENTS.COPY_WITH_COT_CHANGED`) + +Enum settings: + +- `language` -> `ConfigPresenter.setLanguage(locale)` (broadcasts: `CONFIG_EVENTS.LANGUAGE_CHANGED`) +- `theme` -> `ConfigPresenter.setTheme('dark' | 'light' | 'system')` (broadcasts: `CONFIG_EVENTS.THEME_CHANGED`) +- `fontSizeLevel` -> `ConfigPresenter.setSetting('fontSizeLevel', level)` (broadcasts `CONFIG_EVENTS.FONT_SIZE_CHANGED` via special case) + +### Validation Rules + +- Strict allowlist; reject unknown IDs. +- No implicit type conversion in Step 1. +- Validation per setting: + - Booleans: must be boolean type + - Enum values: must match allowed set + - `fontSizeLevel`: must be integer within supported range (source of truth TBD; may align with `uiSettingsStore` constants) + - `language`: must be one of supported locales (reuse support list from config) + +### Defense in Depth: Require Skill Activity + +Even with controlled tool injection, maintain runtime checks: + +- If `deepchat-settings` is **NOT** enabled for the conversation, reject application and return error telling the model/user to activate it. +- This ensures settings don't accidentally change due to unrelated agent behavior. + +## Step 2: Skill Definition (Natural Language Behavior) + +### Built-in Skill Artifact + +Add `resources/skills/deepchat-settings/SKILL.md`: + +- Pre-metadata `description` MUST explicitly state: + - This is ONLY for changing DeepChat application settings. + - Activate ONLY when user requests setting changes (settings/preferences/theme/language/font/sound/copy COT). + - Do NOT activate for OS settings or programming/code settings. +- Body MUST define: + - Supported settings (allowlist) and canonical values. + - How to ask clarifying questions when ambiguous. + - When to refuse and instead open settings. + - Always deactivate after completing setting tasks. + +### Disallowed Settings -> Open Settings + +For requests involving MCP configuration, prompts, providers, API keys, etc.: + +- Do NOT apply via tools. +- Provide precise instructions telling user where to change them. +- Open settings window and navigate to relevant section if possible. + +Implementation options for opening/navigating settings: + +- Use `presenter.windowPresenter.createSettingsWindow()`. +- Optionally `executeJavaScript` to set localStorage navigation hint that UI can read. +- Or add dedicated IPC channel from main process -> settings renderer to navigate to tab/section. + +## Data Model + +Introduce shared request/response types (for Step 1 entry point + tools): + +- `ChatSettingId` (union of allowlisted IDs) +- `ApplyChatSettingRequest` (discriminated union `{ id, value }`) +- `ApplyChatSettingResult` + - `{ ok: true; id; value; previousValue?; appliedAt }` + - `{ ok: false; errorCode; message; details? }` + +## Testing Strategy + +- Main process (Vitest): + - Allowlist + validation (reject invalid values, no writes) + - Each supported setting maps to correct `ConfigPresenter` method + - Skill requirement enforcement works (tool rejects when skill inactive) +- Renderer/UI (if any navigation hints added): + - Settings page navigation handler tests (optional) diff --git a/docs/specs/chat-settings-control/spec.md b/docs/specs/chat-settings-control/spec.md new file mode 100644 index 000000000..4e181d54c --- /dev/null +++ b/docs/specs/chat-settings-control/spec.md @@ -0,0 +1,85 @@ +# Control Settings via Chat + +## Overview + +Allow users to update a small, safe subset of DeepChat settings via natural language within conversations. Changes MUST be validated, persisted, and take effect immediately. For complex/high-risk settings (e.g., MCP configuration, prompts), the assistant MUST NOT apply changes directly; instead, it should explain where to edit them and automatically open settings (ideally deep-linked to the relevant section). + +This specification is intentionally split into two increments: + +- **Step 1**: Provide a safe, validated settings application API (main process) that can be called from controlled entry points (renderer UI and/or agent tools) to change settings and trigger live updates. +- **Step 2**: Deliver natural language behavior as a **DeepChat skill** so that additional context is injected only when relevant. + +## Goals + +- Allow in-conversation updates to: + - Toggle settings: Sound, Copy COT details. + - Enum settings: Language, Theme, Font size. +- Apply changes immediately (current window + relevant other windows). +- Persist changes to existing configuration store. +- Keep surface area safe: do not expose arbitrary configuration keys. +- Use skills to control context: + - Settings modification guidance MUST be injected ONLY when user actually requests to change DeepChat settings. + +## Non-Goals + +- Do NOT allow users to set arbitrary `ConfigPresenter.setSetting(key, value)` keys via chat. +- Do NOT allow setting sensitive values via chat (API keys, tokens, environment variables, file paths, command arguments). +- Do NOT implement editing of MCP servers, prompts, providers, or other complex nested config via natural language. +- Do NOT change how settings are stored on disk (no migrations in this feature). + +## User Stories + +- As a user, I can say "turn on sound" and it enables sound immediately. +- As a user, I can say "copy COT details when copying" and it enables/disables the toggle. +- As a user, I can say "set language to English" and UI language switches immediately. +- As a user, I can say "use dark theme" or "follow system theme" and theme updates immediately. +- As a user, I can say "make text larger" and font size changes immediately. +- As a user, if I ask "add MCP server" or "edit prompts", the assistant tells me where in settings and opens settings there. + +## Acceptance Criteria + +### Step 1: Safe Settings Application API (No NLP) + +- A main process API exists that accepts restricted, validated requests to change one supported setting. +- Only allowlisted settings from this specification can be changed via this API. +- Setting tools are NOT injected into LLM tool list when `deepchat-settings` skill is **NOT** active. +- On success: + - Setting value is persisted (existing underlying storage). + - Changes take effect immediately in current renderer. + - Cross-window/tab updates happen where existing event flow supports (e.g., theme/language/font size/sound). +- On failure: + - Invalid inputs are rejected with structured, user-presentable errors (no partial writes). +- API is safe to call with untrusted input (strict validation + allowlist). + +### Step 2: Natural Language via Skill (Context Control) + +- A built-in skill exists (suggested: `deepchat-settings`) describing this functionality. +- This skill is NOT intended to remain active by default: + - It should activate only when user requests to change DeepChat's own settings. + - It should deactivate after setting change is complete. +- When active, assistant: + - Explains user intent, normalizes to canonical values, and calls Step 1 API. + - For disallowed/complex settings (MCP, prompts, etc.), provides guidance and opens settings to best-match section. + +## Open Questions [NEEDS CLARIFICATION] + +1. Skill Mode Availability + - Skill prompt injection currently seems tied to `chatMode === 'agent'`. Do we want this feature to work in: + - Agent mode only (suggested first increment), OR + - Also in chat/ACP agent modes (requires additional work)? +2. Font Size Representation + - Should chat use semantic labels (`small/medium/large`) mapping to `fontSizeLevel`, or accept explicit numeric levels? +3. Settings Deep Link Targets + - What are the canonical settings tab/section IDs we want to support deep-linking to (e.g., `mcp`, `prompts`, `appearance`, `language`)? +4. UX: Confirm vs Silent Apply + - Should assistant always confirm before applying changes, or apply immediately with "undo" capability? + +## Security & Privacy Notes + +- Step 1 API MUST: + - Use an allowlist of setting IDs. + - Validate input types and enum ranges. + - Avoid any generic "set arbitrary key" functionality. +- Defense in depth (recommended): Setting tools/entry points should verify the relevant skill is active for the conversation before applying. +- Step 2 MUST NOT allow indirect privilege escalation: + - MUST NOT change file system paths, command arguments, environment variables, or settings that hold secrets via natural language. diff --git a/docs/specs/chat-settings-control/tasks.md b/docs/specs/chat-settings-control/tasks.md new file mode 100644 index 000000000..cb7c35b74 --- /dev/null +++ b/docs/specs/chat-settings-control/tasks.md @@ -0,0 +1,31 @@ +# Tasks: Control Settings via Chat + +## Step 0 - Skill-First Design (Context Control) + +1. Draft built-in skill: `resources/skills/deepchat-settings/SKILL.md`. +2. Ensure pre-metadata `description` explicitly restricts activation to only DeepChat setting changes. +3. Ensure skill body lists allowlisted settings + safe handling + self-deactivation guidance. + +## Step 1 - Safe Settings Application API (Main Process) + +1. Add shared types for settings application request/result. +2. Implement validated application entry point (Zod-style `unknown` parsing). +3. Implement allowlist mapping to existing `ConfigPresenter` methods: + - `soundEnabled` + - `copyWithCotEnabled` + - `language` + - `theme` + - `fontSizeLevel` +4. Implement tool injection control: only include `deepchat_settings_toggle`/`deepchat_settings_set_language`/`deepchat_settings_set_theme`/`deepchat_settings_set_font_size`/`deepchat_settings_open` in tool definitions when `deepchat-settings` is active AND allowed. +5. Add defense-in-depth control: reject application if `deepchat-settings`` skill is not active for conversation. +6. Add "open settings" helper/tool for unsupported settings (MCP/prompts, etc.), including best-eff-mn navigation. +7. Add main process tests: + - Validation and mapping + - Tool definitions only exist when skill active + - Skill control enforces rejection when inactive + +## Step 2 - UX Behavior (LLM + Skill) + +1. Verify skill metadata prompt list clearly enough lists `deepchat-settings` for model to select it. +2. Ensure skill instructs: activate only when user asks; deactivate after completion. +3. Add examples of Chinese/English user phrasing in SKILL.md. diff --git a/docs/specs/yobrowser-optimization/plan.md b/docs/specs/yobrowser-optimization/plan.md new file mode 100644 index 000000000..37bfac326 --- /dev/null +++ b/docs/specs/yobrowser-optimization/plan.md @@ -0,0 +1,83 @@ +# YoBrowser Optimization:实施方案(Plan) + +## 现状盘点(基于代码) + +- Renderer:`src/renderer/src/components/workspace/WorkspaceView.vue` 在 `agent` 模式下渲染 `WorkspaceBrowserTabs`,但不关心是否存在 tabs。 +- Renderer:`src/renderer/src/stores/yoBrowser.ts` 已维护 tabs 与 `tabCount`(由 IPC 事件更新)。 +- Main:YoBrowser 通过 `YoBrowserToolHandler` + `YoBrowserToolDefinitions` 暴露 `yo_browser_*` 工具,当前有 skill gating 逻辑(需要激活 `yo-browser-cdp` skill)。 +- Agent loop:`src/main/presenter/agentPresenter/loop/toolCallProcessor.ts` 中 `TOOLS_REQUIRING_OFFLOAD` 包含 `yo_browser_cdp_send`。 + +## 总体设计 + +1) UI:Browser Tabs 分区只在 `tabCount > 0` 时出现。 +2) YoBrowser 工具直接注入:agent 模式下直接提供 `yo_browser_*` 工具,不依赖 skills 体系。 +3) 工具实现保持 CDP 方式:`yo_browser_cdp_send` + tab 管理,参数 schema 按 CDP 定义。 + +> 约束:不做任何 system prompt / browser context 缩减。 + +--- + +## 1) UI:Browser Tabs 分区仅在 `tabCount > 0` 时渲染 + +- 修改 `WorkspaceView.vue`: + - 引入 `useYoBrowserStore()`。 + - 将 `showBrowserTabs` 改为:`chatMode.currentMode.value === 'agent' && yoBrowserStore.tabCount > 0`。 + +说明: +- `yoBrowserStore.tabCount` 已存在且由 tabs 数组计算。 +- tabs 更新依赖现有 `YO_BROWSER_EVENTS.*`(TAB_CREATED/TAB_CLOSED/TAB_COUNT_CHANGED 等),无需新增事件。 + +--- + +## 2) YoBrowser 工具直接注入(agent 模式,不依赖 skills) + +### 2.1 移除 tool definitions 的 skill gating + +- `src/main/presenter/browser/YoBrowserToolHandler.ts` + - 删除 `getActiveSkills()` 方法或不再使用。 + - `getToolDefinitions()` 直接返回 `getYoBrowserToolDefinitions()`(不再受 `activeSkills` 控制)。 + +### 2.2 同步更新 AgentToolManager 注入逻辑 + +- `src/main/presenter/agentPresenter/acp/agentToolManager.ts` + - `getAllToolDefinitions()` 中,在 agent 模式下直接追加 `yoBrowserPresenter.toolHandler.getToolDefinitions()`(不再传递/依赖 conversationId 做 gating)。 + - `callTool()` 中,`toolName.startsWith('yo_browser_')` 分支保持不变(继续路由到 YoBrowser handler)。 + +### 2.3 移除 skill 文档与残留引用 + +- 删除 `resources/skills/yo-browser-cdp/` 整个目录。 +- `docs/architecture/tool-system.md`: + - 删除或改写“YoBrowser CDP 工具仅在 `yo-browser-cdp` skill 激活时可用”的描述。 + - 改为:“YoBrowser CDP 工具在 agent 模式下直接可用”。 +- 全局搜索 `yo-browser-cdp` / `allowedTools` / `skill gated`,确保没有残留引用(代码、文档、测试)。 + +--- + +## 3) 工具实现:CDP 方式 + 参数定义(保持现状) + +### 3.1 工具集合(无需改动) + +- `yo_browser_tab_list` +- `yo_browser_tab_new` +- `yo_browser_tab_activate` +- `yo_browser_tab_close` +- `yo_browser_cdp_send` + +### 3.2 参数 schema(保持现状,无需改动) + +- `src/main/presenter/browser/YoBrowserToolDefinitions.ts`: + - `cdp_send` 参数:`{ tabId?: string, method: string, params?: object }`。 + - 其他 tab 管理工具参数保持不变。 + +### 3.3 安全边界(保持现状) + +- `src/main/presenter/browser/BrowserTab.ensureSession()`: + - 检查 `currentUrl.startsWith('local://')`,若为真则抛出错误(禁止 CDP attach)。 + +--- + +## 不在本计划内 + +- system prompt / browser context 的缩减或重写。 +- 任何对 YoBrowser UI 行为(窗口位置/大小等)的调整。 +- skills 体系(YoBrowser 不再使用 skills 来控制工具可见性)。 diff --git a/docs/specs/yobrowser-optimization/spec.md b/docs/specs/yobrowser-optimization/spec.md new file mode 100644 index 000000000..1180753e0 --- /dev/null +++ b/docs/specs/yobrowser-optimization/spec.md @@ -0,0 +1,65 @@ +# YoBrowser Optimization(UI + CDP 工具) + +## 背景 + +当前 YoBrowser 在 Workspace 侧边栏存在 UI 问题: +- `src/renderer/src/components/workspace/WorkspaceView.vue` 在 `agent` 模式下总会渲染 `WorkspaceBrowserTabs` 分区,即便没有任何 tab,也会出现一块空区域。 + +## 目标(Goals) + +1. **UI**:只有存在 YoBrowser tabs 时,Workspace 侧边栏才显示 Browser Tabs 分区。 +2. **Agent 工具直接注入**:YoBrowser 工具(`yo_browser_*`)在 agent 模式下直接可用,无需激活任何 skill。 + +## 非目标(Non-Goals) + +- 不调整 YoBrowser window 的 UI、尺寸、布局、位置策略。 +- 不修改 `BrowserContextBuilder.buildSystemPrompt` 的注入策略(不做减少/压缩/裁剪)。 +- 不改造其他 agent 工具(filesystem/bash/mcp 等)。 +- 不使用 skills 系统来控制 YoBrowser 工具的可见性。 + +## 用户故事(User Stories) + +- 作为用户,我不希望在没有任何浏览器 tab 的情况下,Workspace 侧边栏仍出现空的 Browser Tabs 分区。 +- 作为 agent 用户,我希望 YoBrowser 自动化能力以 CDP 为核心,工具在 agent 模式下直接可用。 + +## 约束与假设(Constraints & Assumptions) + +- YoBrowser 现有实现已经基于 Electron Debugger/CDP(`CDPManager`, `BrowserTab.ensureSession()`)。 +- 安全边界:`local://` URL 禁止绑定 CDP(`BrowserTab` 现有逻辑已做限制)。 + +## 验收标准(Acceptance Criteria) + +### A. UI:Workspace Browser Tabs 展示逻辑 + +- [ ] `src/renderer/src/components/workspace/WorkspaceView.vue` 仅在 `chatMode === 'agent' && yoBrowserStore.tabCount > 0` 时渲染 `WorkspaceBrowserTabs`。 +- [ ] 当 `tabCount === 0` 时,不显示 Browser Tabs 分区(不保留空白区域)。 + +### B. 工具:YoBrowser CDP 工具直接注入(agent 模式) + +- [ ] agent tool definitions 中包含 `yo_browser_*` 工具(agent 模式下直接可用)。 +- [ ] agent 的 tool call 路由正确处理 `yo_browser_*` 工具(`toolName.startsWith('yo_browser_')`)。 +- [ ] 不依赖 skills 系统(不检查 `activeSkills`)。 + +### C. 工具实现:CDP 方式 + 合适的参数定义 + +- [ ] 工具集合: + - `yo_browser_tab_list`:列出 tabs 与 active tab。 + - `yo_browser_tab_new`:创建新 tab(可选 url)。 + - `yo_browser_tab_activate`:激活 tab。 + - `yo_browser_tab_close`:关闭 tab。 + - `yo_browser_cdp_send`:向指定/当前 tab 的 CDP session 发送 `{ method, params }`。 +- [ ] 参数 schema 符合 CDP 使用方式(method、params 等)。 +- [ ] 保留安全边界:`local://` 禁止 CDP attach。 + +### D. Prompt/Context + +- [ ] `BrowserContextBuilder.buildSystemPrompt` 的注入保持现状(不做减少/压缩/裁剪)。 + +### E. 兼容性 + +- [ ] 不涉及数据迁移。 +- [ ] 现有 YoBrowser UI/窗口/Tab 生命周期保持可用。 + +## Open Questions + +无。 diff --git a/docs/specs/yobrowser-optimization/tasks.md b/docs/specs/yobrowser-optimization/tasks.md new file mode 100644 index 000000000..25b972712 --- /dev/null +++ b/docs/specs/yobrowser-optimization/tasks.md @@ -0,0 +1,61 @@ +# YoBrowser Optimization:任务拆分(Tasks) + +## Phase 1:UI(Workspace 侧边栏) + +1. 调整调整 Browser Tabs 分区显示条件 +- 文件:`src/renderer/src/components/workspace/WorkspaceView.vue` +- 改动:`WorkspaceBrowserTabs` 仅在 `chatMode === 'agent' && yoBrowserStore.tabCount > 0` 时渲染。 +- 验收:无 tabs 时不出现分区;有 tabs 时出现并能点击切换。 + +2.(可选)补 renderer 单测 +- 文件:`test/renderer/**`(按现有测试组织落位) +- 用例:tabCount=0/1 下的条件渲染。 + +--- + +## Phase 2:移除 YoBrowser skill gating + +3. 移除 YoBrowser tool definitions 的 skill gating +- 文件:`src/main/presenter/browser/YoBrowserToolHandler.ts` +- 改动:删除 `getActiveSkills()` 方法或不再使用;`getToolDefinitions()` 直接返回 `getYoBrowserToolDefinitions()`。 +- 验收:不再依赖 `activeSkills`。 + +4. 调整 AgentToolManager 注入逻辑(不再依赖 conversationId 做 gating) +- 文件:`src/main/presenter/agentPresenter/acp/agentToolManager.ts` +- 改动:`getAllToolDefinitions()` 中,agent 模式下直接追加 `yoBrowserPresenter.toolHandler.getToolDefinitions()`(可不传 conversationId)。 +- 验收:tool definitions 包含 `yo_browser_*`。 + +5. 删除 skill 文档与残留引用 +- 删除 `resources/skills/yo-browser-cdp/` 整个目录。 +- 文件:`docs/architecture/tool-system.md`(以及搜索到的其他文档) +- 改动:删除或改写“仅在 `yo-browser-cdp` skill 激活时可用”的描述;改为“agent 模式下直接可用”。 +- 全局搜索:确认没有残留的 `yo-browser-cdp` / `skill gated` 引用。 + +--- + +## Phase 3:验证工具实现(保持 CDP 方式) + +6. 验证工具参数定义 +- 文件:`src/main/presenter/browser/YoBrowserToolDefinitions.ts` +- 验收:`yo_browser_cdp_send` 参数为 `{ tabId?: string, method: string, params?: object }`。 + +7. 验证安全边界 +- 文件:`src/main/presenter/browser/BrowserTab.ts` +- 验收:`ensureSession()` 中有 `local://` URL 检查。 + +8.(可选)补 main 单测 +- 验证: + - agent 模式下 tool definitions 包含 `yo_browser_*`。 + - `callTool()` 正确路由到 YoBrowser handler。 + +--- + +## Phase 4:验收与质量门禁 + +9. 手工验收 +- Agent 模式下:无 tabs 时 Workspace 不显示 Browser Tabs;创建 tab 后显示。 +- Agent 模式下:不激活任何 skill,`yo_browser_*` 工具直接可用。 + +10. 质量门禁 +- `pnpm run format && pnpm run lint && pnpm run typecheck` +- `pnpm test` diff --git a/electron.vite.config.ts b/electron.vite.config.ts index b32eae5f7..effef1451 100644 --- a/electron.vite.config.ts +++ b/electron.vite.config.ts @@ -7,6 +7,8 @@ import monacoEditorPlugin from 'vite-plugin-monaco-editor-esm' import path from 'node:path' import tailwindcss from '@tailwindcss/vite' +const isCustomElement = (tag: string) => + tag === 'voice-agent-widget' || tag.startsWith('ui-resource-renderer') export default defineConfig({ main: { @@ -82,8 +84,7 @@ export default defineConfig({ vue({ template: { compilerOptions: { - // 将所有带短横线的标签名都视为自定义元素 - isCustomElement: (tag) => tag.startsWith('ui-resource-renderer') + isCustomElement } } }), diff --git a/package.json b/package.json index 7304a162e..8a34cc198 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "DeepChat", - "version": "0.5.6-beta.5", + "version": "0.5.7", "description": "DeepChat,一个简单易用的 Agent 客户端", "main": "./out/main/index.js", "author": "ThinkInAIXYZ", @@ -22,8 +22,8 @@ "format:check": "prettier --check .", "format": "prettier --cache --write .", "lint": "oxlint .", - "typecheck:node": "tsc --noEmit -p tsconfig.node.json --composite false", - "typecheck:web": "vue-tsc --noEmit -p tsconfig.app.json --composite false", + "typecheck:node": "tsgo --noEmit -p tsconfig.node.json --composite false", + "typecheck:web": "vue-tsgo --project tsconfig.app.tsgo.json", "typecheck": "pnpm run typecheck:node && pnpm run typecheck:web", "start": "electron-vite preview", "dev": "cross-env VITE_ENABLE_PLAYGROUND=true electron-vite dev --watch", @@ -76,7 +76,7 @@ "chokidar": "^5.0.0", "compare-versions": "^6.1.1", "cross-spawn": "^7.0.6", - "diff": "^7.0.0", + "diff": "^8.0.3", "electron-log": "^5.4.3", "electron-store": "^8.2.0", "electron-updater": "^6.6.2", @@ -132,6 +132,7 @@ "@types/mime-types": "^3.0.1", "@types/node": "^22.19.3", "@types/xlsx": "^0.0.35", + "@typescript/native-preview": "7.0.0-dev.20260115.1", "@vee-validate/zod": "^4.15.1", "@vitejs/plugin-vue": "^6.0.3", "@vitest/ui": "^3.2.4", @@ -179,7 +180,7 @@ "vue-i18n": "^11.2.7", "vue-router": "4", "vue-sonner": "^2.0.9", - "vue-tsc": "^2.2.12", + "vue-tsgo": "0.0.1-yggdrasill.11", "vue-virtual-scroller": "^2.0.0-beta.8", "vuedraggable": "^4.1.0", "yaml": "^2.8.2", diff --git a/resources/skills/deepchat-settings/SKILL.md b/resources/skills/deepchat-settings/SKILL.md new file mode 100644 index 000000000..236eb522a --- /dev/null +++ b/resources/skills/deepchat-settings/SKILL.md @@ -0,0 +1,69 @@ +--- +name: deepchat-settings +description: DeepChat app settings modification (DeepChat 设置/偏好) skill. Activate ONLY when the user explicitly asks to change DeepChat's own settings/preferences (e.g., theme, language, font size...). Do NOT activate for OS/system settings, editor settings, or other apps. +allowedTools: + - deepchat_settings_toggle + - deepchat_settings_set_language + - deepchat_settings_set_theme + - deepchat_settings_set_font_size + - deepchat_settings_open +--- + +# DeepChat Settings Modification Skill + +Use this skill to safely change DeepChat *application* settings during a conversation. + +## Core rules + +- Only change settings when the user is asking to change **DeepChat** settings. +- Use the dedicated settings tools; never attempt arbitrary key/value writes. +- These tools are intended to be available only when this skill is active; if they are missing, activate this skill via `skill_control`. +- If the request is ambiguous, ask a clarifying question before applying. +- For unsupported or high-risk settings (MCP, prompts, providers, API keys, paths): do **not** apply changes; instead explain where to change it and open Settings. +- After completing the settings task, deactivate this skill via `skill_control` to keep context small. + +## Supported settings (initial allowlist) + +Toggles: + +- `soundEnabled`: enable/disable sound effects. +- `copyWithCotEnabled`: enable/disable copying COT details. + +Enums: + +- `language`: DeepChat locale, including `system`, `zh-CN`, `en-US`, `zh-TW`, `zh-HK`, `ko-KR`, `ru-RU`, `ja-JP`, `fr-FR`, `fa-IR`, `pt-BR`, `da-DK`, `he-IL`. +- `theme`: `dark | light | system`. +- `fontSizeLevel`: integer level within supported range. + +Settings navigation (open-only): + +- Use `deepchat_settings_open` only when the request cannot be fulfilled by the settings tools, and avoid calling it if the change is already applied. +- `section` hints: `common`, `display`, `provider`, `mcp`, `prompt`, `acp`, `skills`, `knowledge-base`, `database`, `shortcut`, `about`. + +## Workflow + +1. Confirm the user is requesting a DeepChat settings change. +2. Determine the target setting and the intended value. +3. If the setting is supported, call the matching tool: + - toggles: `deepchat_settings_toggle` + - language: `deepchat_settings_set_language` + - theme: `deepchat_settings_set_theme` + - font size: `deepchat_settings_set_font_size` +4. Confirm back to the user what changed (include the final value). +5. If the setting is unsupported, call `deepchat_settings_open` (with `section`) and provide a short pointer to the correct Settings section. Do not call it if the requested change has already been applied. +6. Deactivate this skill via `skill_control`. + +## Examples (activate this skill) + +- "把主题改成深色" +- "Turn off sound effects" +- "语言改成英文" +- "复制时不要带 COT" +- "Open the MCP settings page" +- "Edit my prompts" + +## Examples (do NOT activate this skill) + +- "把 Windows 的系统代理改成..." +- "帮我改 VS Code 的字体" +- "把电脑的声音关掉" diff --git a/src/main/events.ts b/src/main/events.ts index 759756e2d..f54f14ed9 100644 --- a/src/main/events.ts +++ b/src/main/events.ts @@ -21,6 +21,7 @@ export const CONFIG_EVENTS = { SYNC_SETTINGS_CHANGED: 'config:sync-settings-changed', SEARCH_ENGINES_UPDATED: 'config:search-engines-updated', SEARCH_PREVIEW_CHANGED: 'config:search-preview-changed', + AUTO_SCROLL_CHANGED: 'config:auto-scroll-changed', NOTIFICATIONS_CHANGED: 'config:notifications-changed', CONTENT_PROTECTION_CHANGED: 'config:content-protection-changed', SOUND_ENABLED_CHANGED: 'config:sound-enabled-changed', // 新增:声音开关变更事件 @@ -106,6 +107,11 @@ export const WINDOW_EVENTS = { WINDOW_RESTORED: 'window:restored' } +// Settings related events +export const SETTINGS_EVENTS = { + NAVIGATE: 'settings:navigate' +} + // ollama 相关事件 export const OLLAMA_EVENTS = { PULL_MODEL_PROGRESS: 'ollama:pull-model-progress' diff --git a/src/main/presenter/agentPresenter/acp/agentToolManager.ts b/src/main/presenter/agentPresenter/acp/agentToolManager.ts index 86d932758..c1de4c026 100644 --- a/src/main/presenter/agentPresenter/acp/agentToolManager.ts +++ b/src/main/presenter/agentPresenter/acp/agentToolManager.ts @@ -1,4 +1,4 @@ -import type { IConfigPresenter, IYoBrowserPresenter, MCPToolDefinition } from '@shared/presenter' +import type { IConfigPresenter, MCPToolDefinition } from '@shared/presenter' import { zodToJsonSchema } from 'zod-to-json-schema' import { z } from 'zod' import fs from 'fs' @@ -9,6 +9,13 @@ import { presenter } from '@/presenter' import { AgentFileSystemHandler } from './agentFileSystemHandler' import { AgentBashHandler } from './agentBashHandler' import { SkillTools } from '../../skillPresenter/skillTools' +import { questionToolSchema, QUESTION_TOOL_NAME } from '../tools/questionTool' +import { + ChatSettingsToolHandler, + buildChatSettingsToolDefinitions, + CHAT_SETTINGS_SKILL_NAME, + CHAT_SETTINGS_TOOL_NAMES +} from './chatSettingsTools' // Consider moving to a shared handlers location in future refactoring import { @@ -40,25 +47,25 @@ export interface AgentToolCallResult { baseCommand?: string } conversationId?: string + rememberable?: boolean } } } interface AgentToolManagerOptions { - yoBrowserPresenter: IYoBrowserPresenter agentWorkspacePath: string | null configPresenter: IConfigPresenter commandPermissionHandler?: CommandPermissionService } export class AgentToolManager { - private readonly yoBrowserPresenter: IYoBrowserPresenter private agentWorkspacePath: string | null private fileSystemHandler: AgentFileSystemHandler | null = null private bashHandler: AgentBashHandler | null = null private readonly commandPermissionHandler?: CommandPermissionService private readonly configPresenter: IConfigPresenter private skillTools: SkillTools | null = null + private chatSettingsHandler: ChatSettingsToolHandler | null = null private readonly fileSystemSchemas = { read_file: z.object({ paths: z.array(z.string()).min(1), @@ -224,7 +231,6 @@ export class AgentToolManager { } constructor(options: AgentToolManagerOptions) { - this.yoBrowserPresenter = options.yoBrowserPresenter this.agentWorkspacePath = options.agentWorkspacePath this.configPresenter = options.configPresenter this.commandPermissionHandler = options.commandPermissionHandler @@ -244,6 +250,7 @@ export class AgentToolManager { chatMode: 'chat' | 'agent' | 'acp agent' supportsVision: boolean agentWorkspacePath: string | null + conversationId?: string }): Promise { const defs: MCPToolDefinition[] = [] const isAgentMode = context.chatMode === 'agent' @@ -266,28 +273,57 @@ export class AgentToolManager { this.agentWorkspacePath = effectiveWorkspacePath } - // 1. Yo Browser tools (agent mode only) - if (isAgentMode) { - try { - const yoDefs = await this.yoBrowserPresenter.getToolDefinitions(context.supportsVision) - defs.push(...yoDefs) - } catch (error) { - logger.warn('[AgentToolManager] Failed to load Yo Browser tool definitions', { error }) - } - } - - // 2. FileSystem tools (agent mode only) + // 1. FileSystem tools (agent mode only) if (isAgentMode && this.fileSystemHandler) { const fsDefs = this.getFileSystemToolDefinitions() defs.push(...fsDefs) } + // 2. Built-in question tool (all modes) + defs.push(...this.getQuestionToolDefinitions()) + // 3. Skill tools (agent mode only) if (isAgentMode && this.isSkillsEnabled()) { const skillDefs = this.getSkillToolDefinitions() defs.push(...skillDefs) } + // 4. DeepChat settings tools (agent mode only, skill gated) + if (isAgentMode && this.isSkillsEnabled() && context.conversationId) { + try { + const activeSkills = await presenter.skillPresenter.getActiveSkills(context.conversationId) + if (activeSkills.includes(CHAT_SETTINGS_SKILL_NAME)) { + const allowedTools = await presenter.skillPresenter.getActiveSkillsAllowedTools( + context.conversationId + ) + const requiredSettingsTools = Object.values(CHAT_SETTINGS_TOOL_NAMES) + const nonOpenSettingsTools = requiredSettingsTools.filter( + (tool) => tool !== CHAT_SETTINGS_TOOL_NAMES.open + ) + const hasNonOpenSettingsTool = nonOpenSettingsTools.some((tool) => + allowedTools.includes(tool) + ) + const effectiveAllowedTools = hasNonOpenSettingsTool + ? allowedTools + : Array.from(new Set([...allowedTools, ...requiredSettingsTools])) + + const settingsDefs = buildChatSettingsToolDefinitions(effectiveAllowedTools) + defs.push(...settingsDefs) + } + } catch (error) { + logger.warn('[AgentToolManager] Failed to load DeepChat settings tools', { error }) + } + } + + // 5. YoBrowser CDP tools (agent mode only) + if (isAgentMode) { + try { + defs.push(...presenter.yoBrowserPresenter.toolHandler.getToolDefinitions()) + } catch (error) { + logger.warn('[AgentToolManager] Failed to load YoBrowser tools', { error }) + } + } + return defs } @@ -299,14 +335,18 @@ export class AgentToolManager { args: Record, conversationId?: string ): Promise { - // Route to Yo Browser tools - if (toolName.startsWith('browser_')) { - const response = await this.yoBrowserPresenter.callTool( - toolName, - args as Record - ) + if (toolName === QUESTION_TOOL_NAME) { + const validationResult = questionToolSchema.safeParse(args) + if (!validationResult.success) { + throw new Error(`Invalid arguments for question: ${validationResult.error.message}`) + } return { - content: typeof response === 'string' ? response : JSON.stringify(response) + content: 'question_requested', + rawData: { + content: 'question_requested', + isError: false, + toolResult: validationResult.data + } } } @@ -323,6 +363,19 @@ export class AgentToolManager { return await this.callSkillTool(toolName, args, conversationId) } + // Route to DeepChat settings tools + if (this.isChatSettingsTool(toolName)) { + return await this.callChatSettingsTool(toolName, args, conversationId) + } + + // Route to YoBrowser CDP tools + if (toolName.startsWith('yo_browser_')) { + const response = await presenter.yoBrowserPresenter.toolHandler.callTool(toolName, args) + return { + content: response + } + } + throw new Error(`Unknown Agent tool: ${toolName}`) } @@ -576,6 +629,29 @@ export class AgentToolManager { ] } + private getQuestionToolDefinitions(): MCPToolDefinition[] { + return [ + { + type: 'function', + function: { + name: QUESTION_TOOL_NAME, + description: + 'Ask the user a structured question and pause the agent loop until the user responds.', + parameters: zodToJsonSchema(questionToolSchema) as { + type: string + properties: Record + required?: string[] + } + }, + server: { + name: 'agent-core', + icons: '❓', + description: 'Agent core tools' + } + } + ] + } + private isFileSystemTool(toolName: string): boolean { const filesystemTools = [ 'read_file', @@ -843,6 +919,14 @@ export class AgentToolManager { return this.configPresenter.getSkillsEnabled() } + private async isChatSettingsSkillActive(conversationId?: string): Promise { + if (!conversationId || !this.isSkillsEnabled()) { + return false + } + const activeSkills = await presenter.skillPresenter.getActiveSkills(conversationId) + return activeSkills.includes(CHAT_SETTINGS_SKILL_NAME) + } + private getSkillTools(): SkillTools { if (!this.skillTools) { this.skillTools = new SkillTools(presenter.skillPresenter) @@ -850,6 +934,18 @@ export class AgentToolManager { return this.skillTools } + private getChatSettingsHandler(): ChatSettingsToolHandler { + if (!this.chatSettingsHandler) { + this.chatSettingsHandler = new ChatSettingsToolHandler({ + configPresenter: this.configPresenter, + skillPresenter: presenter.skillPresenter, + sessionPresenter: presenter.sessionPresenter, + windowPresenter: presenter.windowPresenter + }) + } + return this.chatSettingsHandler + } + private getSkillToolDefinitions(): MCPToolDefinition[] { const schemas = this.skillSchemas return [ @@ -896,6 +992,16 @@ export class AgentToolManager { return toolName === 'skill_list' || toolName === 'skill_control' } + private isChatSettingsTool(toolName: string): boolean { + return ( + toolName === CHAT_SETTINGS_TOOL_NAMES.toggle || + toolName === CHAT_SETTINGS_TOOL_NAMES.setLanguage || + toolName === CHAT_SETTINGS_TOOL_NAMES.setTheme || + toolName === CHAT_SETTINGS_TOOL_NAMES.setFontSize || + toolName === CHAT_SETTINGS_TOOL_NAMES.open + ) + } + private async callSkillTool( toolName: string, args: Record, @@ -932,4 +1038,57 @@ export class AgentToolManager { throw new Error(`Unknown skill tool: ${toolName}`) } + + private async callChatSettingsTool( + toolName: string, + args: Record, + conversationId?: string + ): Promise { + const handler = this.getChatSettingsHandler() + if (toolName === CHAT_SETTINGS_TOOL_NAMES.toggle) { + const result = await handler.toggle(args, conversationId) + return { content: JSON.stringify(result) } + } + if (toolName === CHAT_SETTINGS_TOOL_NAMES.setLanguage) { + const result = await handler.setLanguage(args, conversationId) + return { content: JSON.stringify(result) } + } + if (toolName === CHAT_SETTINGS_TOOL_NAMES.setTheme) { + const result = await handler.setTheme(args, conversationId) + return { content: JSON.stringify(result) } + } + if (toolName === CHAT_SETTINGS_TOOL_NAMES.setFontSize) { + const result = await handler.setFontSize(args, conversationId) + return { content: JSON.stringify(result) } + } + if (toolName === CHAT_SETTINGS_TOOL_NAMES.open) { + const shouldCheckPermission = await this.isChatSettingsSkillActive(conversationId) + if (shouldCheckPermission && conversationId) { + const approved = + presenter.settingsPermissionService?.consumeApproval(conversationId, toolName) ?? false + if (!approved) { + const responseContent = 'components.messageBlockPermissionRequest.description.write' + return { + content: responseContent, + rawData: { + content: responseContent, + isError: false, + requiresPermission: true, + permissionRequest: { + toolName, + serverName: CHAT_SETTINGS_SKILL_NAME, + permissionType: 'write', + description: 'Opening DeepChat settings requires approval.', + conversationId, + rememberable: false + } + } + } + } + } + const result = await handler.open(args, conversationId) + return { content: JSON.stringify(result) } + } + throw new Error(`Unknown DeepChat settings tool: ${toolName}`) + } } diff --git a/src/main/presenter/agentPresenter/acp/chatSettingsTools.ts b/src/main/presenter/agentPresenter/acp/chatSettingsTools.ts new file mode 100644 index 000000000..ec81a3fa8 --- /dev/null +++ b/src/main/presenter/agentPresenter/acp/chatSettingsTools.ts @@ -0,0 +1,509 @@ +import { z } from 'zod' +import { zodToJsonSchema } from 'zod-to-json-schema' +import type { + ApplyChatSettingResult, + ChatSettingValue, + ChatLanguage, + OpenChatSettingsResult, + OpenChatSettingsSection, + MCPToolDefinition, + IConfigPresenter, + ISkillPresenter, + ISessionPresenter, + IWindowPresenter +} from '@shared/presenter' +import { SETTINGS_EVENTS } from '@/events' + +export const CHAT_SETTINGS_SKILL_NAME = 'deepchat-settings' +export const CHAT_SETTINGS_TOOL_NAMES = { + toggle: 'deepchat_settings_toggle', + setLanguage: 'deepchat_settings_set_language', + setTheme: 'deepchat_settings_set_theme', + setFontSize: 'deepchat_settings_set_font_size', + open: 'deepchat_settings_open' +} as const + +const SUPPORTED_LANGUAGES = [ + 'system', + 'zh-CN', + 'en-US', + 'zh-TW', + 'zh-HK', + 'ko-KR', + 'ru-RU', + 'ja-JP', + 'fr-FR', + 'fa-IR', + 'pt-BR', + 'da-DK', + 'he-IL' +] as const satisfies readonly ChatLanguage[] + +const SUPPORTED_THEMES = ['dark', 'light', 'system'] as const + +const FONT_SIZE_LEVELS = [0, 1, 2, 3, 4] as const + +const toggleSchema = z + .object({ + setting: z.enum(['soundEnabled', 'copyWithCotEnabled']).describe('Toggle setting id.'), + enabled: z.boolean().describe('Enable or disable the setting.') + }) + .strict() + +const languageSchema = z + .object({ + language: z.enum(SUPPORTED_LANGUAGES).describe('DeepChat language/locale.') + }) + .strict() + +const themeSchema = z + .object({ + theme: z.enum(SUPPORTED_THEMES).describe('Theme mode for DeepChat.') + }) + .strict() + +const fontSizeSchema = z + .object({ + level: z + .union( + FONT_SIZE_LEVELS.map((value) => z.literal(value)) as [ + z.ZodLiteral<0>, + z.ZodLiteral<1>, + z.ZodLiteral<2>, + z.ZodLiteral<3>, + z.ZodLiteral<4> + ] + ) + .describe('Font size level (0-4).') + }) + .strict() + +const SECTION_ALIASES: Record = { + appearance: 'display', + theme: 'display', + language: 'display', + font: 'display', + 'font-size': 'display', + sound: 'common', + copy: 'common', + 'copy-cot': 'common', + proxy: 'common', + prompts: 'prompt', + providers: 'provider' +} + +const OPEN_SECTIONS = [ + 'common', + 'display', + 'provider', + 'mcp', + 'prompt', + 'acp', + 'skills', + 'knowledge-base', + 'database', + 'shortcut', + 'about' +] as const satisfies readonly OpenChatSettingsSection[] + +const OPEN_SECTION_ALIASES = [ + 'appearance', + 'theme', + 'language', + 'font', + 'font-size', + 'sound', + 'copy', + 'copy-cot', + 'proxy', + 'prompts', + 'providers' +] as const + +const OPEN_SECTION_VALUES = [...OPEN_SECTIONS, ...OPEN_SECTION_ALIASES] as const + +const openSchema = z + .object({ + section: z.enum([...OPEN_SECTION_VALUES] as [string, ...string[]]).optional() + }) + .strict() + +const SETTINGS_ROUTE_NAMES: Record = { + common: 'settings-common', + display: 'settings-display', + provider: 'settings-provider', + mcp: 'settings-mcp', + prompt: 'settings-prompt', + acp: 'settings-acp', + skills: 'settings-skills', + 'knowledge-base': 'settings-knowledge-base', + database: 'settings-database', + shortcut: 'settings-shortcut', + about: 'settings-about' +} + +const normalizeSection = (section?: string): OpenChatSettingsSection | undefined => { + if (!section) return undefined + const normalized = section.trim().toLowerCase() + if (!normalized) return undefined + if (OPEN_SECTIONS.includes(normalized as OpenChatSettingsSection)) { + return normalized as OpenChatSettingsSection + } + return SECTION_ALIASES[normalized] +} + +type ApplyError = Extract + +const buildError = ( + errorCode: ApplyError['errorCode'], + message: string, + details?: unknown +): ApplyError => ({ + ok: false, + errorCode, + message, + ...(details ? { details } : {}) +}) + +export class ChatSettingsToolHandler { + constructor( + private readonly options: { + configPresenter: IConfigPresenter + skillPresenter: ISkillPresenter + sessionPresenter: ISessionPresenter + windowPresenter: IWindowPresenter + } + ) {} + + private async ensureSkillActive(conversationId?: string): Promise { + if (!conversationId) { + return buildError('skill_inactive', 'No conversation context to apply settings.') + } + if (!this.options.configPresenter.getSkillsEnabled()) { + return buildError('skill_inactive', 'Skills are disabled.') + } + const activeSkills = await this.options.skillPresenter.getActiveSkills(conversationId) + if (!activeSkills.includes(CHAT_SETTINGS_SKILL_NAME)) { + return buildError('skill_inactive', 'deepchat-settings skill is not active.') + } + return null + } + + private getCurrentValue(key: string): ChatSettingValue | undefined { + const configPresenter = this.options.configPresenter + switch (key) { + case 'soundEnabled': + return configPresenter.getSoundEnabled() + case 'copyWithCotEnabled': + return configPresenter.getCopyWithCotEnabled() + case 'language': + return configPresenter.getSetting('language') + case 'theme': + return configPresenter.getSetting('appTheme') + case 'fontSizeLevel': + return configPresenter.getSetting('fontSizeLevel') + default: + return undefined + } + } + + async toggle(raw: unknown, conversationId?: string): Promise { + const guard = await this.ensureSkillActive(conversationId) + if (guard) { + return guard + } + + const parsed = toggleSchema.safeParse(raw) + if (!parsed.success) { + return buildError('invalid_request', 'Invalid toggle request.', parsed.error.flatten()) + } + + const { setting, enabled } = parsed.data + const previousValue = this.getCurrentValue(setting) + const configPresenter = this.options.configPresenter + + try { + switch (setting) { + case 'soundEnabled': + configPresenter.setSoundEnabled(enabled) + break + case 'copyWithCotEnabled': + configPresenter.setCopyWithCotEnabled(enabled) + break + default: + return buildError('unknown_setting', `Unsupported toggle: ${setting}`) + } + } catch (error) { + return buildError( + 'apply_failed', + 'Failed to apply DeepChat toggle.', + error instanceof Error ? error.message : String(error) + ) + } + + return { + ok: true, + id: setting, + value: enabled, + previousValue, + appliedAt: Date.now() + } + } + + async setLanguage(raw: unknown, conversationId?: string): Promise { + const guard = await this.ensureSkillActive(conversationId) + if (guard) { + return guard + } + + const parsed = languageSchema.safeParse(raw) + if (!parsed.success) { + return buildError('invalid_request', 'Invalid language request.', parsed.error.flatten()) + } + + const { language } = parsed.data + const previousValue = this.getCurrentValue('language') + try { + this.options.configPresenter.setLanguage(language) + } catch (error) { + return buildError( + 'apply_failed', + 'Failed to apply DeepChat language.', + error instanceof Error ? error.message : String(error) + ) + } + + return { + ok: true, + id: 'language', + value: language, + previousValue, + appliedAt: Date.now() + } + } + + async setTheme(raw: unknown, conversationId?: string): Promise { + const guard = await this.ensureSkillActive(conversationId) + if (guard) { + return guard + } + + const parsed = themeSchema.safeParse(raw) + if (!parsed.success) { + return buildError('invalid_request', 'Invalid theme request.', parsed.error.flatten()) + } + + const { theme } = parsed.data + const previousValue = this.getCurrentValue('theme') + try { + await this.options.configPresenter.setTheme(theme) + } catch (error) { + return buildError( + 'apply_failed', + 'Failed to apply DeepChat theme.', + error instanceof Error ? error.message : String(error) + ) + } + + return { + ok: true, + id: 'theme', + value: theme, + previousValue, + appliedAt: Date.now() + } + } + + async setFontSize(raw: unknown, conversationId?: string): Promise { + const guard = await this.ensureSkillActive(conversationId) + if (guard) { + return guard + } + + const parsed = fontSizeSchema.safeParse(raw) + if (!parsed.success) { + return buildError('invalid_request', 'Invalid font size request.', parsed.error.flatten()) + } + + const { level } = parsed.data + const previousValue = this.getCurrentValue('fontSizeLevel') + try { + this.options.configPresenter.setSetting('fontSizeLevel', level) + } catch (error) { + return buildError( + 'apply_failed', + 'Failed to apply DeepChat font size.', + error instanceof Error ? error.message : String(error) + ) + } + + return { + ok: true, + id: 'fontSizeLevel', + value: level, + previousValue, + appliedAt: Date.now() + } + } + + async open(raw: unknown, conversationId?: string): Promise { + const guard = await this.ensureSkillActive(conversationId) + if (guard && !guard.ok) { + return { + ok: false, + errorCode: 'skill_inactive', + message: guard.message, + details: guard.details + } + } + + const parsed = openSchema.safeParse(raw) + if (!parsed.success) { + return { + ok: false, + errorCode: 'invalid_request', + message: 'Invalid settings navigation request.', + details: parsed.error.flatten() + } + } + + const { section } = parsed.data + const normalizedSection = normalizeSection(section) + const routeName = normalizedSection ? SETTINGS_ROUTE_NAMES[normalizedSection] : undefined + + const windowId = await this.options.windowPresenter.createSettingsWindow() + if (!windowId) { + return { + ok: false, + errorCode: 'open_failed', + message: 'Failed to open settings window.' + } + } + + if (routeName) { + this.options.windowPresenter.sendToWindow(windowId, SETTINGS_EVENTS.NAVIGATE, { + routeName, + section: normalizedSection + }) + } + + return { + ok: true, + section: normalizedSection, + routeName, + appliedAt: Date.now() + } + } +} + +export const buildChatSettingsToolDefinitions = (allowedTools: string[]): MCPToolDefinition[] => { + const definitions: MCPToolDefinition[] = [] + const allowToggle = allowedTools.includes(CHAT_SETTINGS_TOOL_NAMES.toggle) + const allowLanguage = allowedTools.includes(CHAT_SETTINGS_TOOL_NAMES.setLanguage) + const allowTheme = allowedTools.includes(CHAT_SETTINGS_TOOL_NAMES.setTheme) + const allowFontSize = allowedTools.includes(CHAT_SETTINGS_TOOL_NAMES.setFontSize) + const allowOpen = allowedTools.includes(CHAT_SETTINGS_TOOL_NAMES.open) + + if (allowToggle) { + definitions.push({ + type: 'function', + function: { + name: CHAT_SETTINGS_TOOL_NAMES.toggle, + description: 'Toggle a DeepChat setting (sound or copy COT).', + parameters: zodToJsonSchema(toggleSchema) as { + type: string + properties: Record + required?: string[] + } + }, + server: { + name: 'deepchat-settings', + icons: 'settings', + description: 'DeepChat settings control' + } + }) + } + + if (allowLanguage) { + definitions.push({ + type: 'function', + function: { + name: CHAT_SETTINGS_TOOL_NAMES.setLanguage, + description: 'Set DeepChat language/locale.', + parameters: zodToJsonSchema(languageSchema) as { + type: string + properties: Record + required?: string[] + } + }, + server: { + name: 'deepchat-settings', + icons: 'settings', + description: 'DeepChat settings control' + } + }) + } + + if (allowTheme) { + definitions.push({ + type: 'function', + function: { + name: CHAT_SETTINGS_TOOL_NAMES.setTheme, + description: 'Set DeepChat theme mode.', + parameters: zodToJsonSchema(themeSchema) as { + type: string + properties: Record + required?: string[] + } + }, + server: { + name: 'deepchat-settings', + icons: 'settings', + description: 'DeepChat settings control' + } + }) + } + + if (allowFontSize) { + definitions.push({ + type: 'function', + function: { + name: CHAT_SETTINGS_TOOL_NAMES.setFontSize, + description: 'Set DeepChat font size level.', + parameters: zodToJsonSchema(fontSizeSchema) as { + type: string + properties: Record + required?: string[] + } + }, + server: { + name: 'deepchat-settings', + icons: 'settings', + description: 'DeepChat settings control' + } + }) + } + + if (allowOpen) { + definitions.push({ + type: 'function', + function: { + name: CHAT_SETTINGS_TOOL_NAMES.open, + description: + 'Open DeepChat settings only when the request cannot be fulfilled via other settings tools; do not call after the change is already applied.', + parameters: zodToJsonSchema(openSchema) as { + type: string + properties: Record + required?: string[] + } + }, + server: { + name: 'deepchat-settings', + icons: 'settings', + description: 'DeepChat settings control' + } + }) + } + + return definitions +} diff --git a/src/main/presenter/agentPresenter/index.ts b/src/main/presenter/agentPresenter/index.ts index 7642e8de6..ee403f94f 100644 --- a/src/main/presenter/agentPresenter/index.ts +++ b/src/main/presenter/agentPresenter/index.ts @@ -7,7 +7,7 @@ import type { ISQLitePresenter, MESSAGE_METADATA } from '@shared/presenter' -import type { AssistantMessage } from '@shared/chat' +import type { AssistantMessage, AssistantMessageBlock, UserMessageContent } from '@shared/chat' import { eventBus, SendTarget } from '@/eventbus' import { STREAM_EVENTS } from '@/events' import { presenter } from '@/presenter' @@ -161,6 +161,12 @@ export class AgentPresenter implements IAgentPresenter { this.buildMessageMetadata(conversation) ) + try { + await this.resolvePendingQuestionIfNeeded(agentId, userMessage.id, content) + } catch (error) { + console.warn('[AgentPresenter] Failed to auto-resolve pending question:', error) + } + const assistantMessage = await this.streamGenerationHandler.generateAIResponse( agentId, userMessage.id @@ -301,6 +307,25 @@ export class AgentPresenter implements IAgentPresenter { ) } + async resolveQuestion( + messageId: string, + toolCallId: string, + answerText: string, + answerMessageId?: string + ): Promise { + await this.handleQuestionResolution(messageId, toolCallId, { + resolution: 'replied', + answerText, + answerMessageId + }) + } + + async rejectQuestion(messageId: string, toolCallId: string): Promise { + await this.handleQuestionResolution(messageId, toolCallId, { + resolution: 'rejected' + }) + } + async getMessageRequestPreview(agentId: string, messageId?: string): Promise { if (!messageId) { return null @@ -309,6 +334,115 @@ export class AgentPresenter implements IAgentPresenter { return this.utilityHandler.getMessageRequestPreview(messageId) } + private async handleQuestionResolution( + messageId: string, + toolCallId: string, + payload: { + resolution: 'replied' | 'rejected' + answerText?: string + answerMessageId?: string + } + ): Promise { + if (!messageId || !toolCallId) { + return + } + + const message = await this.messageManager.getMessage(messageId) + if (!message || message.role !== 'assistant') { + throw new Error(`Message not found or not assistant (${messageId})`) + } + + const content = message.content as AssistantMessageBlock[] + const questionBlock = content.find( + (block) => + block.type === 'action' && + block.action_type === 'question_request' && + block.tool_call?.id === toolCallId + ) + + if (!questionBlock) { + throw new Error( + `Question block not found (messageId: ${messageId}, toolCallId: ${toolCallId})` + ) + } + + if (questionBlock.status !== 'pending') { + return + } + + const isReplied = payload.resolution === 'replied' + questionBlock.status = isReplied ? 'success' : 'denied' + questionBlock.extra = { + ...questionBlock.extra, + needsUserAction: false, + questionResolution: payload.resolution, + ...(isReplied && payload.answerText ? { answerText: payload.answerText } : {}), + ...(isReplied && payload.answerMessageId ? { answerMessageId: payload.answerMessageId } : {}) + } + + const generatingState = this.generatingMessages.get(messageId) + if (generatingState) { + const questionIndex = generatingState.message.content.findIndex( + (block) => + block.type === 'action' && + block.action_type === 'question_request' && + block.tool_call?.id === toolCallId + ) + if (questionIndex !== -1) { + const stateBlock = generatingState.message.content[questionIndex] + generatingState.message.content[questionIndex] = { + ...stateBlock, + ...questionBlock, + extra: questionBlock.extra ? { ...questionBlock.extra } : undefined, + tool_call: questionBlock.tool_call ? { ...questionBlock.tool_call } : undefined + } + } + } + + await this.messageManager.editMessage(messageId, JSON.stringify(content)) + presenter.sessionManager.clearPendingQuestion(message.conversationId) + presenter.sessionManager.setStatus(message.conversationId, 'idle') + } + + private async resolvePendingQuestionIfNeeded( + conversationId: string, + userMessageId: string, + rawContent: string + ): Promise { + const session = await this.sessionManager.getSession(conversationId) + const pendingQuestion = session.runtime?.pendingQuestion + if (!pendingQuestion?.messageId || !pendingQuestion.toolCallId) { + return + } + + const answerText = this.extractUserMessageText(rawContent) + if (!answerText.trim()) { + return + } + + await this.handleQuestionResolution(pendingQuestion.messageId, pendingQuestion.toolCallId, { + resolution: 'replied', + answerText, + answerMessageId: userMessageId + }) + } + + private extractUserMessageText(rawContent: string): string { + if (!rawContent) return '' + try { + const parsed = JSON.parse(rawContent) as UserMessageContent + if (typeof parsed.text === 'string') { + return parsed.text + } + if (Array.isArray(parsed.content)) { + return parsed.content.map((block) => block.content || '').join('') + } + } catch (error) { + console.warn('[AgentPresenter] Failed to parse user message content:', error) + } + return rawContent + } + private buildMessageMetadata(conversation: CONVERSATION): MESSAGE_METADATA { const { providerId, modelId } = conversation.settings return { @@ -416,6 +550,7 @@ export class AgentPresenter implements IAgentPresenter { this.sessionManager.updateRuntime(state.conversationId, { userStopRequested: true }) this.sessionManager.setStatus(state.conversationId, 'paused') this.sessionManager.clearPendingPermission(state.conversationId) + this.sessionManager.clearPendingQuestion(state.conversationId) state.isCancelled = true if (state.adaptiveBuffer) { diff --git a/src/main/presenter/agentPresenter/loop/agentLoopHandler.ts b/src/main/presenter/agentPresenter/loop/agentLoopHandler.ts index c4b6cc3da..a9d55b413 100644 --- a/src/main/presenter/agentPresenter/loop/agentLoopHandler.ts +++ b/src/main/presenter/agentPresenter/loop/agentLoopHandler.ts @@ -45,7 +45,8 @@ export class AgentLoopHandler { enabledMcpTools: context.enabledMcpTools, chatMode, supportsVision: this.currentSupportsVision, - agentWorkspacePath + agentWorkspacePath, + conversationId: context.conversationId }) return await this.filterToolsForChatMode(toolDefs, chatMode, modelId) @@ -271,7 +272,8 @@ export class AgentLoopHandler { enabledMcpTools, chatMode, supportsVision: this.currentSupportsVision, - agentWorkspacePath + agentWorkspacePath, + conversationId }) const filteredToolDefs = await this.filterToolsForChatMode(toolDefs, chatMode, modelId) diff --git a/src/main/presenter/agentPresenter/loop/toolCallHandler.ts b/src/main/presenter/agentPresenter/loop/toolCallHandler.ts index 538960b41..f3dd3dd82 100644 --- a/src/main/presenter/agentPresenter/loop/toolCallHandler.ts +++ b/src/main/presenter/agentPresenter/loop/toolCallHandler.ts @@ -1,4 +1,5 @@ import type { AssistantMessageBlock } from '@shared/chat' +import type { QuestionInfo } from '@shared/types/core/question' import { finalizeAssistantMessageBlocks } from '@shared/chat/messageBlocks' import type { LLMAgentEventData, @@ -215,6 +216,44 @@ export class ToolCallHandler { } } + async processQuestionRequest( + state: GeneratingMessageState, + event: LLMAgentEventData, + currentTime: number + ): Promise { + const payload = event.question_request as QuestionInfo | undefined + if (!payload) return + + this.finalizeLastBlock(state) + + state.message.content.push({ + type: 'action', + content: '', + status: 'pending', + timestamp: currentTime, + action_type: 'question_request', + tool_call: { + id: event.tool_call_id, + name: event.tool_call_name, + params: event.tool_call_params || '', + server_name: event.tool_call_server_name, + server_icons: event.tool_call_server_icons, + server_description: event.tool_call_server_description + }, + extra: { + needsUserAction: true, + questionHeader: payload.header ?? '', + questionText: payload.question, + questionOptions: payload.options, + questionMultiple: Boolean(payload.multiple), + questionCustom: payload.custom !== false, + questionResolution: 'asked' + } + }) + + state.pendingToolCall = undefined + } + async processMcpUiResourcesFromToolCall( state: GeneratingMessageState, event: LLMAgentEventData, diff --git a/src/main/presenter/agentPresenter/loop/toolCallProcessor.ts b/src/main/presenter/agentPresenter/loop/toolCallProcessor.ts index b36850a47..1d703e224 100644 --- a/src/main/presenter/agentPresenter/loop/toolCallProcessor.ts +++ b/src/main/presenter/agentPresenter/loop/toolCallProcessor.ts @@ -10,6 +10,7 @@ import fs from 'fs/promises' import path from 'path' import { isNonRetryableError } from './errorClassification' import { resolveToolOffloadPath } from '../../sessionPresenter/sessionPaths' +import { parseQuestionToolArgs, QUESTION_TOOL_NAME } from '../tools/questionTool' interface ToolCallProcessorOptions { getAllToolDefinitions: (context: ToolCallExecutionContext) => Promise @@ -42,6 +43,7 @@ interface ToolCallProcessResult { const TOOL_OUTPUT_OFFLOAD_THRESHOLD = 5000 const TOOL_OUTPUT_PREVIEW_LENGTH = 1024 +const QUESTION_ERROR_KEY = 'common.error.invalidQuestionRequest' // Tools that require offload when output exceeds threshold // Tools not in this list will never trigger offload (e.g., read_file has its own pagination) @@ -52,8 +54,7 @@ const TOOLS_REQUIRING_OFFLOAD = new Set([ 'glob_search', 'grep_search', 'text_replace', - 'browser_read_links', - 'browser_get_clickable_elements' + 'yo_browser_cdp_send' ]) export class ToolCallProcessor { @@ -76,7 +77,7 @@ export class ToolCallProcessor { return toolDefinitions.find((tool) => tool.function.name === toolName) } - for (const toolCall of context.toolCalls) { + for (const [index, toolCall] of context.toolCalls.entries()) { if (context.abortSignal.aborted) break if (toolCallCount >= context.maxToolCalls) { @@ -146,6 +147,70 @@ export class ToolCallProcessor { conversationId: context.conversationId } + if (toolCall.name === QUESTION_TOOL_NAME) { + const isStandalone = context.toolCalls.length === 1 + const isLast = index === context.toolCalls.length - 1 + if (!isStandalone || !isLast) { + notifyToolCallFinished('error') + this.appendToolError( + context.conversationMessages, + context.modelConfig, + toolCall, + 'Question tool must be the only tool call in a turn.' + ) + yield { + type: 'response', + data: { + eventId: context.eventId, + question_error: QUESTION_ERROR_KEY, + tool_call_id: toolCall.id, + tool_call_name: toolCall.name + } + } + continue + } + + const parsedQuestion = parseQuestionToolArgs(toolCall.arguments || '') + if (!parsedQuestion.success) { + notifyToolCallFinished('error') + this.appendToolError( + context.conversationMessages, + context.modelConfig, + toolCall, + `Invalid question tool arguments: ${parsedQuestion.error}` + ) + yield { + type: 'response', + data: { + eventId: context.eventId, + question_error: QUESTION_ERROR_KEY, + tool_call_id: toolCall.id, + tool_call_name: toolCall.name + } + } + continue + } + + notifyToolCallFinished('success') + yield { + type: 'response', + data: { + eventId: context.eventId, + tool_call: 'question-required', + tool_call_id: toolCall.id, + tool_call_name: toolCall.name, + tool_call_params: toolCall.arguments, + tool_call_server_name: toolDef.server.name, + tool_call_server_icons: toolDef.server.icons, + tool_call_server_description: toolDef.server.description, + question_request: parsedQuestion.data + } + } + + needContinueConversation = false + break + } + yield { type: 'response', data: { diff --git a/src/main/presenter/agentPresenter/message/messageBuilder.ts b/src/main/presenter/agentPresenter/message/messageBuilder.ts index ce39d457a..9cf6d5899 100644 --- a/src/main/presenter/agentPresenter/message/messageBuilder.ts +++ b/src/main/presenter/agentPresenter/message/messageBuilder.ts @@ -133,7 +133,8 @@ export async function preparePromptContent({ enabledMcpTools: effectiveEnabledMcpTools, chatMode, supportsVision, - agentWorkspacePath: conversation.settings.agentWorkspacePath?.trim() || null + agentWorkspacePath: conversation.settings.agentWorkspacePath?.trim() || null, + conversationId: conversation.id }) } catch (error) { console.warn('AgentPresenter: Failed to load tool definitions', error) diff --git a/src/main/presenter/agentPresenter/permission/permissionHandler.ts b/src/main/presenter/agentPresenter/permission/permissionHandler.ts index 927470c8b..6e1db94e0 100644 --- a/src/main/presenter/agentPresenter/permission/permissionHandler.ts +++ b/src/main/presenter/agentPresenter/permission/permissionHandler.ts @@ -182,6 +182,20 @@ export class PermissionHandler extends BaseHandler { return } + if (serverName === 'deepchat-settings') { + const toolName = + this.getStringFromObject(parsedPermissionRequest, 'toolName') || + this.getStringFromObject(permissionBlock.extra as Record, 'toolName') + if (!toolName) { + console.warn('[PermissionHandler] Missing tool name in settings permission request') + await this.continueAfterPermissionDenied(messageId, permissionBlock) + return + } + presenter.settingsPermissionService?.approve(message.conversationId, toolName, remember) + await this.restartAgentLoopAfterPermission(messageId, toolCallId) + return + } + try { await this.getMcpPresenter().grantPermission(serverName, permissionType, remember) await this.waitForMcpServiceReady(serverName) @@ -562,7 +576,8 @@ export class PermissionHandler extends BaseHandler { enabledMcpTools, chatMode, supportsVision: false, - agentWorkspacePath + agentWorkspacePath, + conversationId }) toolDef = toolDefinitions.find((definition) => { if (definition.function.name !== pendingToolCall.name) { diff --git a/src/main/presenter/agentPresenter/session/sessionContext.ts b/src/main/presenter/agentPresenter/session/sessionContext.ts index a29b7b7c0..b8f479321 100644 --- a/src/main/presenter/agentPresenter/session/sessionContext.ts +++ b/src/main/presenter/agentPresenter/session/sessionContext.ts @@ -1,4 +1,10 @@ -export type SessionStatus = 'idle' | 'generating' | 'paused' | 'waiting_permission' | 'error' +export type SessionStatus = + | 'idle' + | 'generating' + | 'paused' + | 'waiting_permission' + | 'waiting_question' + | 'error' export type SessionContextResolved = { chatMode: 'chat' | 'agent' | 'acp agent' @@ -28,5 +34,10 @@ export type SessionContext = { permissionType: 'read' | 'write' | 'all' | 'command' payload: unknown } + pendingQuestion?: { + messageId: string + toolCallId: string + } + pendingQuestionInitialized?: boolean } } diff --git a/src/main/presenter/agentPresenter/session/sessionManager.ts b/src/main/presenter/agentPresenter/session/sessionManager.ts index df6fe8205..7740e1d3a 100644 --- a/src/main/presenter/agentPresenter/session/sessionManager.ts +++ b/src/main/presenter/agentPresenter/session/sessionManager.ts @@ -2,6 +2,7 @@ import fs from 'fs' import path from 'path' import { app } from 'electron' import type { IConfigPresenter, ISessionPresenter } from '@shared/presenter' +import type { AssistantMessageBlock } from '@shared/chat' import type { SessionContext, SessionContextResolved, SessionStatus } from './sessionContext' import { resolveSessionContext } from './sessionResolver' @@ -41,6 +42,7 @@ export class SessionManager { existing.resolved = resolved existing.updatedAt = now this.ensureRuntime(existing) + await this.hydratePendingQuestion(existing) return existing } @@ -57,6 +59,7 @@ export class SessionManager { } } this.sessions.set(agentId, session) + await this.hydratePendingQuestion(session) return session } @@ -147,6 +150,7 @@ export class SessionManager { runtime.toolCallCount = 0 runtime.userStopRequested = false runtime.pendingPermission = undefined + runtime.pendingQuestion = undefined } setStatus(agentId: string, status: SessionStatus): void { @@ -176,6 +180,10 @@ export class SessionManager { this.updateRuntime(agentId, { pendingPermission: undefined }) } + clearPendingQuestion(agentId: string): void { + this.updateRuntime(agentId, { pendingQuestion: undefined }) + } + private ensureRuntime(session: SessionContext): NonNullable { if (!session.runtime) { session.runtime = { @@ -193,6 +201,53 @@ export class SessionManager { return session.runtime } + private async hydratePendingQuestion(session: SessionContext): Promise { + const runtime = this.ensureRuntime(session) + if (runtime.pendingQuestionInitialized) return + runtime.pendingQuestionInitialized = true + if (runtime.pendingQuestion) return + + try { + const lastAssistant = await this.options.sessionPresenter.getLastAssistantMessage( + session.agentId + ) + if (!lastAssistant || lastAssistant.role !== 'assistant') { + return + } + + const blocks = lastAssistant.content as AssistantMessageBlock[] + if (!Array.isArray(blocks) || blocks.length === 0) { + return + } + + const pendingQuestionBlock = [...blocks].reverse().find((block) => { + if ( + block.type !== 'action' || + block.action_type !== 'question_request' || + block.status !== 'pending' + ) { + return false + } + if (block.extra && block.extra.needsUserAction === false) { + return false + } + return Boolean(block.tool_call?.id) + }) + + const toolCallId = pendingQuestionBlock?.tool_call?.id + if (!toolCallId) return + + runtime.pendingQuestion = { + messageId: lastAssistant.id, + toolCallId + } + session.status = 'waiting_question' + session.updatedAt = Date.now() + } catch (error) { + console.warn('[SessionManager] Failed to hydrate pending question:', error) + } + } + private async resolveAgentWorkspacePath( conversationId: string | null, currentPath: string | null diff --git a/src/main/presenter/agentPresenter/streaming/llmEventHandler.ts b/src/main/presenter/agentPresenter/streaming/llmEventHandler.ts index 2f23dc0fc..9709ca895 100644 --- a/src/main/presenter/agentPresenter/streaming/llmEventHandler.ts +++ b/src/main/presenter/agentPresenter/streaming/llmEventHandler.ts @@ -55,6 +55,8 @@ export class LLMEventHandler { tool_call_server_description, tool_call_response_raw, tool_call, + question_request, + question_error, totalUsage, image_data } = msg @@ -106,6 +108,16 @@ export class LLMEventHandler { return } + if (question_error) { + this.finalizeLastBlock(state) + state.message.content.push({ + type: 'error', + content: question_error, + status: 'error', + timestamp: currentTime + }) + } + if (reasoning_content) { if (state.reasoningStartTime === null) { state.reasoningStartTime = currentTime @@ -127,7 +139,10 @@ export class LLMEventHandler { await this.toolCallHandler.processMcpUiResourcesFromToolCall(state, msg, currentTime) } - if (tool_call) { + const shouldSkipToolCall = + tool_call && tool_call_name === 'question' && tool_call !== 'question-required' + + if (tool_call && !shouldSkipToolCall) { switch (tool_call) { case 'start': presenter.sessionManager.incrementToolCallCount(state.conversationId) @@ -150,6 +165,16 @@ export class LLMEventHandler { presenter.sessionManager.setStatus(state.conversationId, 'waiting_permission') await this.toolCallHandler.processToolCallPermission(state, msg, currentTime) break + case 'question-required': + presenter.sessionManager.updateRuntime(state.conversationId, { + pendingQuestion: { + messageId: eventId, + toolCallId: tool_call_id || '' + } + }) + presenter.sessionManager.setStatus(state.conversationId, 'waiting_question') + await this.toolCallHandler.processQuestionRequest(state, msg, currentTime) + break case 'permission-granted': case 'permission-denied': case 'continue': @@ -258,7 +283,7 @@ export class LLMEventHandler { if (image_data) delta.image_data = image_data if (totalUsage) delta.totalUsage = totalUsage - if (tool_call) { + if (tool_call && !shouldSkipToolCall) { delta.tool_call = tool_call delta.tool_call_id = tool_call_id delta.tool_call_name = tool_call_name @@ -271,6 +296,13 @@ export class LLMEventHandler { if (msg.permission_request !== undefined) { delta.permission_request = msg.permission_request } + if (question_request !== undefined) { + delta.question_request = question_request + } + } + + if (question_error) { + delta.question_error = question_error } this.streamUpdateScheduler.enqueueDelta( @@ -305,11 +337,13 @@ export class LLMEventHandler { this.generatingMessages.delete(eventId) presenter.sessionManager.setStatus(state.conversationId, 'error') presenter.sessionManager.clearPendingPermission(state.conversationId) + presenter.sessionManager.clearPendingQuestion(state.conversationId) } else { const message = await this.messageManager.getMessage(eventId) if (message) { presenter.sessionManager.setStatus(message.conversationId, 'error') presenter.sessionManager.clearPendingPermission(message.conversationId) + presenter.sessionManager.clearPendingQuestion(message.conversationId) } } @@ -334,11 +368,21 @@ export class LLMEventHandler { block.action_type === 'tool_call_permission' && block.status === 'pending' ) + const hasPendingQuestions = state.message.content.some( + (block) => + block.type === 'action' && + block.action_type === 'question_request' && + block.status === 'pending' + ) - if (hasPendingPermissions) { + if (hasPendingPermissions || hasPendingQuestions) { state.message.content.forEach((block) => { if ( - !(block.type === 'action' && block.action_type === 'tool_call_permission') && + !( + block.type === 'action' && + (block.action_type === 'tool_call_permission' || + block.action_type === 'question_request') + ) && block.status === 'loading' ) { if (block.type !== 'tool_call') { @@ -357,12 +401,19 @@ export class LLMEventHandler { ) this.searchingMessages.delete(eventId) presenter.sessionManager.setStatus(state.conversationId, 'waiting_permission') + if (!hasPendingPermissions) { + presenter.sessionManager.setStatus(state.conversationId, 'waiting_question') + } + await this.streamUpdateScheduler.flushAll(eventId, 'final') + this.generatingMessages.delete(eventId) + eventBus.sendToRenderer(STREAM_EVENTS.END, SendTarget.ALL_WINDOWS, msg) return } await this.finalizeMessage(state, eventId, Boolean(userStop)) presenter.sessionManager.setStatus(state.conversationId, 'idle') presenter.sessionManager.clearPendingPermission(state.conversationId) + presenter.sessionManager.clearPendingQuestion(state.conversationId) } await this.streamUpdateScheduler.flushAll(eventId, 'final') @@ -376,7 +427,10 @@ export class LLMEventHandler { userStop: boolean ): Promise { state.message.content.forEach((block) => { - if (block.type === 'action' && block.action_type === 'tool_call_permission') { + if ( + block.type === 'action' && + (block.action_type === 'tool_call_permission' || block.action_type === 'question_request') + ) { return } block.status = 'success' diff --git a/src/main/presenter/agentPresenter/streaming/streamUpdateScheduler.ts b/src/main/presenter/agentPresenter/streaming/streamUpdateScheduler.ts index 9a91cb4b2..0b9963345 100644 --- a/src/main/presenter/agentPresenter/streaming/streamUpdateScheduler.ts +++ b/src/main/presenter/agentPresenter/streaming/streamUpdateScheduler.ts @@ -19,6 +19,8 @@ interface PendingDelta { tool_call_server_description?: string tool_call_response_raw?: unknown permission_request?: LLMAgentEventData['permission_request'] + question_request?: LLMAgentEventData['question_request'] + question_error?: LLMAgentEventData['question_error'] maximum_tool_calls_reached?: boolean image_data?: { data: string; mimeType: string } rate_limit?: LLMAgentEventData['rate_limit'] @@ -176,6 +178,12 @@ export class StreamUpdateScheduler { if (delta.permission_request !== undefined) { state.pendingDelta.permission_request = delta.permission_request } + if (delta.question_request !== undefined) { + state.pendingDelta.question_request = delta.question_request + } + if (delta.question_error !== undefined) { + state.pendingDelta.question_error = delta.question_error + } if (delta.maximum_tool_calls_reached !== undefined) { state.pendingDelta.maximum_tool_calls_reached = delta.maximum_tool_calls_reached } @@ -261,6 +269,8 @@ export class StreamUpdateScheduler { tool_call_server_description: delta.tool_call_server_description, tool_call_response_raw: delta.tool_call_response_raw, permission_request: delta.permission_request, + question_request: delta.question_request, + question_error: delta.question_error, maximum_tool_calls_reached: delta.maximum_tool_calls_reached, image_data: delta.image_data, rate_limit: delta.rate_limit, @@ -389,6 +399,8 @@ export class StreamUpdateScheduler { tool_call_server_description: delta.tool_call_server_description, tool_call_response_raw: delta.tool_call_response_raw, permission_request: delta.permission_request, + question_request: delta.question_request, + question_error: delta.question_error, maximum_tool_calls_reached: delta.maximum_tool_calls_reached, image_data: delta.image_data, rate_limit: delta.rate_limit, diff --git a/src/main/presenter/agentPresenter/tool/toolCallCenter.ts b/src/main/presenter/agentPresenter/tool/toolCallCenter.ts index 2f365d0dd..1e1b6c63f 100644 --- a/src/main/presenter/agentPresenter/tool/toolCallCenter.ts +++ b/src/main/presenter/agentPresenter/tool/toolCallCenter.ts @@ -10,6 +10,7 @@ export type ToolCallContext = { chatMode?: 'chat' | 'agent' | 'acp agent' supportsVision?: boolean agentWorkspacePath?: string | null + conversationId?: string } export class ToolCallCenter { diff --git a/src/main/presenter/agentPresenter/tools/questionTool.ts b/src/main/presenter/agentPresenter/tools/questionTool.ts new file mode 100644 index 000000000..1d71f8060 --- /dev/null +++ b/src/main/presenter/agentPresenter/tools/questionTool.ts @@ -0,0 +1,64 @@ +import { z } from 'zod' +import { jsonrepair } from 'jsonrepair' +import type { QuestionInfo } from '@shared/types/core/question' + +export const QUESTION_TOOL_NAME = 'deepchat_question' + +const questionOptionSchema = z.object({ + label: z.string().trim().min(1).max(30), + description: z.string().trim().max(200).optional() +}) + +export const questionToolSchema = z.object({ + header: z.string().trim().max(30).optional(), + question: z.string().trim().min(1).max(500), + options: z.array(questionOptionSchema).min(1).max(10), + multiple: z.boolean().optional().default(false), + custom: z.boolean().optional().default(true) +}) + +export type QuestionToolInput = z.infer + +const normalizeQuestionInfo = (input: QuestionToolInput): QuestionInfo => { + const header = input.header?.trim() + const question = input.question.trim() + const options = input.options.map((option) => { + const description = option.description?.trim() + return { + label: option.label.trim(), + ...(description ? { description } : {}) + } + }) + + return { + ...(header ? { header } : {}), + question, + options, + multiple: Boolean(input.multiple), + custom: input.custom !== false + } +} + +export const parseQuestionToolArgs = ( + rawArgs: string +): { success: true; data: QuestionInfo } | { success: false; error: string } => { + let parsed: unknown = {} + if (rawArgs && rawArgs.trim()) { + try { + parsed = JSON.parse(rawArgs) as Record + } catch { + try { + parsed = JSON.parse(jsonrepair(rawArgs)) as Record + } catch { + return { success: false, error: 'Invalid JSON for question tool arguments.' } + } + } + } + + const result = questionToolSchema.safeParse(parsed) + if (!result.success) { + return { success: false, error: result.error.message } + } + + return { success: true, data: normalizeQuestionInfo(result.data) } +} diff --git a/src/main/presenter/browser/BrowserTab.ts b/src/main/presenter/browser/BrowserTab.ts index 11bcfcaea..348b7703e 100644 --- a/src/main/presenter/browser/BrowserTab.ts +++ b/src/main/presenter/browser/BrowserTab.ts @@ -64,6 +64,11 @@ export class BrowserTab { return await this.cdpManager.evaluateScript(session, script) } + async sendCdpCommand(method: string, params?: Record): Promise { + const session = await this.ensureSession() + return await session.sendCommand(method, params ?? {}) + } + async takeScreenshot(options?: ScreenshotOptions): Promise { await this.ensureSession() this.ensureAvailable() diff --git a/src/main/presenter/browser/BrowserToolManager.ts b/src/main/presenter/browser/BrowserToolManager.ts deleted file mode 100644 index 38f441e4d..000000000 --- a/src/main/presenter/browser/BrowserToolManager.ts +++ /dev/null @@ -1,103 +0,0 @@ -import type { RequestHandlerExtra } from '@modelcontextprotocol/sdk/shared/protocol.js' -import type { Notification, Request } from '@modelcontextprotocol/sdk/types.js' -import type { BrowserToolContext, BrowserToolDefinition } from './tools/types' -import { createNavigateTools } from './tools/navigate' -import { createActionTools } from './tools/action' -import { createContentTools } from './tools/content' -// import { createScreenshotTools } from './tools/screenshot' -import { createTabTools } from './tools/tabs' -import { createDownloadTools } from './tools/download' -import type { YoBrowserPresenter } from './YoBrowserPresenter' - -export class BrowserToolManager { - private readonly presenter: YoBrowserPresenter - private readonly tools: BrowserToolDefinition[] - - constructor(presenter: YoBrowserPresenter) { - this.presenter = presenter - this.tools = [ - ...createNavigateTools(), - ...createActionTools(), - ...createContentTools(), - // ...createScreenshotTools(), - ...createTabTools(), - ...createDownloadTools() - ] - } - - getToolDefinitions() { - return this.tools - } - - async executeTool( - toolName: string, - args: any, - extra?: RequestHandlerExtra - ) { - const tool = this.tools.find((t) => t.name === toolName) - if (!tool) { - return { - content: [{ type: 'text', text: `Unknown tool: ${toolName}` }], - isError: true - } - } - - const context = this.createContext() - return await tool.handler(args, context, extra || ({} as any)) - } - - private createContext(): BrowserToolContext { - return { - getTab: async (tabId?: string) => { - return await this.presenter.getBrowserTab(tabId) - }, - getActiveTab: async () => { - return await this.presenter.getBrowserTab() - }, - resolveTabId: async (args?: { tabId?: string }) => { - if (args?.tabId) { - return args.tabId - } - const active = await this.presenter.getActiveTab() - if (active) return active.id - const tabs = await this.presenter.listTabs() - if (tabs.length > 0) return tabs[0].id - const newTab = await this.presenter.createTab('about:blank') - if (!newTab) { - throw new Error('No available tab to operate on') - } - return newTab.id - }, - createTab: async (url?: string) => { - const tab = await this.presenter.createTab(url) - if (!tab) return null - return { id: tab.id, url: tab.url, title: tab.title || '' } - }, - listTabs: async () => { - const tabs = await this.presenter.listTabs() - const active = await this.presenter.getActiveTab() - return tabs.map((tab) => ({ - id: tab.id, - url: tab.url, - title: tab.title || '', - isActive: tab.id === active?.id - })) - }, - activateTab: async (tabId: string) => { - await this.presenter.activateTab(tabId) - }, - closeTab: async (tabId: string) => { - await this.presenter.closeTab(tabId) - }, - downloadFile: async (url: string, savePath?: string) => { - const download = await this.presenter.startDownload(url, savePath) - return { - id: download.id, - url: download.url, - filePath: download.filePath, - status: download.status - } - } - } - } -} diff --git a/src/main/presenter/browser/YoBrowserPresenter.ts b/src/main/presenter/browser/YoBrowserPresenter.ts index d87029b3f..5ffb46c4c 100644 --- a/src/main/presenter/browser/YoBrowserPresenter.ts +++ b/src/main/presenter/browser/YoBrowserPresenter.ts @@ -5,7 +5,6 @@ import { TAB_EVENTS, YO_BROWSER_EVENTS } from '@/events' import { BrowserTabInfo, BrowserContextSnapshot, ScreenshotOptions } from '@shared/types/browser' import { IYoBrowserPresenter, - MCPToolDefinition, DownloadInfo, IWindowPresenter, ITabPresenter @@ -14,9 +13,8 @@ import { BrowserTab } from './BrowserTab' import { CDPManager } from './CDPManager' import { ScreenshotManager } from './ScreenshotManager' import { DownloadManager } from './DownloadManager' -import { BrowserToolManager } from './BrowserToolManager' -import { zodToJsonSchema } from 'zod-to-json-schema' import { clearYoBrowserSessionData } from './yoBrowserSession' +import { YoBrowserToolHandler } from './YoBrowserToolHandler' export class YoBrowserPresenter implements IYoBrowserPresenter { private windowId: number | null = null @@ -28,14 +26,14 @@ export class YoBrowserPresenter implements IYoBrowserPresenter { private readonly cdpManager = new CDPManager() private readonly screenshotManager = new ScreenshotManager(this.cdpManager) private readonly downloadManager = new DownloadManager() - private readonly browserToolManager: BrowserToolManager private readonly windowPresenter: IWindowPresenter private readonly tabPresenter: ITabPresenter + readonly toolHandler: YoBrowserToolHandler constructor(windowPresenter: IWindowPresenter, tabPresenter: ITabPresenter) { this.windowPresenter = windowPresenter this.tabPresenter = tabPresenter - this.browserToolManager = new BrowserToolManager(this) + this.toolHandler = new YoBrowserToolHandler(this) eventBus.on(TAB_EVENTS.CLOSED, (tabId: number) => this.handleTabClosed(tabId)) } @@ -170,10 +168,6 @@ export class YoBrowserPresenter implements IYoBrowserPresenter { return this.toTabInfo(tab) } - async getBrowserTab(tabId?: string): Promise { - return await this.resolveTab(tabId) - } - async goBack(tabId?: string): Promise { const tab = await this.resolveTab(tabId) if (tab?.contents.canGoBack()) { @@ -316,49 +310,6 @@ export class YoBrowserPresenter implements IYoBrowserPresenter { return this.viewIdToTabId.get(viewId) ?? null } - async getToolDefinitions(_supportsVision: boolean): Promise { - // Only return browser_* tools from BrowserToolManager - const browserTools = this.browserToolManager.getToolDefinitions() - const browserMcpTools: MCPToolDefinition[] = browserTools.map((tool) => { - const jsonSchema = zodToJsonSchema(tool.schema) as { - type?: string - properties?: Record - required?: string[] - [key: string]: unknown - } - return { - type: 'function' as const, - function: { - name: tool.name, - description: tool.description, - parameters: { - type: 'object' as const, - properties: (jsonSchema.properties || {}) as Record, - required: (jsonSchema.required || []) as string[] - } - }, - server: { - name: 'yo-browser', - icons: '🌐', - description: 'DeepChat built-in Yo Browser' - } - } - }) - return browserMcpTools - } - - async callTool(toolName: string, params: Record): Promise { - const result = await this.browserToolManager.executeTool(toolName, params) - const textParts = result.content - .filter((c): c is { type: 'text'; text: string } => c.type === 'text') - .map((c) => c.text) - const textContent = textParts.join('\n\n') - if (result.isError) { - throw new Error(textContent || 'Tool execution failed') - } - return textContent - } - async captureScreenshot(tabId: string, options?: ScreenshotOptions): Promise { const tab = await this.resolveTab(tabId) if (!tab) { @@ -641,6 +592,10 @@ export class YoBrowserPresenter implements IYoBrowserPresenter { eventBus.sendToRenderer(YO_BROWSER_EVENTS.TAB_CLOSED, SendTarget.ALL_WINDOWS, tabId) } + async getBrowserTab(tabId?: string): Promise { + return await this.resolveTab(tabId) + } + private emitTabActivated(tabId: string) { eventBus.sendToRenderer(YO_BROWSER_EVENTS.TAB_ACTIVATED, SendTarget.ALL_WINDOWS, tabId) } diff --git a/src/main/presenter/browser/YoBrowserToolDefinitions.ts b/src/main/presenter/browser/YoBrowserToolDefinitions.ts new file mode 100644 index 000000000..d8fbceff2 --- /dev/null +++ b/src/main/presenter/browser/YoBrowserToolDefinitions.ts @@ -0,0 +1,216 @@ +import { z } from 'zod' +import { zodToJsonSchema } from 'zod-to-json-schema' +import type { MCPToolDefinition } from '@shared/presenter' + +const yoBrowserSchemas = { + tab_list: z.object({}), + tab_new: z.object({ + url: z.string().url().optional().describe('Optional URL to navigate to when creating the tab') + }), + tab_activate: z.object({ + tabId: z.string().min(1).describe('ID of the tab to activate') + }), + tab_close: z.object({ + tabId: z.string().min(1).describe('ID of the tab to close') + }), + cdp_send: z.object({ + tabId: z.string().optional().describe('Optional tab ID. If omitted, uses the active tab'), + method: z + .enum([ + 'Page.navigate', + 'Page.reload', + 'Page.captureScreenshot', + 'Runtime.evaluate', + 'DOM.getDocument', + 'DOM.querySelector', + 'DOM.querySelectorAll', + 'DOM.getOuterHTML', + 'Input.dispatchMouseEvent', + 'Input.dispatchKeyEvent' + ]) + .describe('Common CDP method name'), + params: z + .union([ + z + .object({ + url: z.string().url().describe('Example: "https://example.com"') + }) + .describe('For Page.navigate. Example: {"url":"https://example.com"}'), + z + .object({ + ignoreCache: z.boolean().optional().describe('Example: true'), + scriptToEvaluateOnLoad: z + .string() + .optional() + .describe('Example: "console.log(document.title)"') + }) + .describe('For Page.reload. Example: {"ignoreCache":true}'), + z + .object({ + format: z.enum(['png', 'jpeg']).optional().describe('Example: "png"'), + quality: z.number().int().min(0).max(100).optional().describe('Example: 80'), + clip: z + .object({ + x: z.number().describe('Example: 0'), + y: z.number().describe('Example: 0'), + width: z.number().positive().describe('Example: 800'), + height: z.number().positive().describe('Example: 600'), + scale: z.number().positive().optional().describe('Example: 1') + }) + .optional() + .describe('Example: {"x":0,"y":0,"width":800,"height":600,"scale":1}') + }) + .describe('For Page.captureScreenshot. Example: {"format":"png"}'), + z + .object({ + expression: z.string().min(1).describe('Example: "document.title"'), + returnByValue: z.boolean().optional().describe('Example: true'), + awaitPromise: z.boolean().optional().describe('Example: true') + }) + .describe( + 'For Runtime.evaluate. Example: {"expression":"document.title","returnByValue":true}' + ), + z + .object({ + depth: z.number().int().min(0).optional().describe('Example: 1'), + pierce: z.boolean().optional().describe('Example: true') + }) + .describe('For DOM.getDocument. Example: {"depth":1,"pierce":true}'), + z + .object({ + nodeId: z.number().int().positive().describe('Example: 1'), + selector: z.string().min(1).describe('Example: "body"') + }) + .describe('For DOM.querySelector. Example: {"nodeId":1,"selector":"body"}'), + z + .object({ + nodeId: z.number().int().positive().describe('Example: 1'), + selector: z.string().min(1).describe('Example: "a"') + }) + .describe('For DOM.querySelectorAll. Example: {"nodeId":1,"selector":"a"}'), + z + .object({ + nodeId: z.number().int().positive().describe('Example: 1') + }) + .describe('For DOM.getOuterHTML. Example: {"nodeId":1}'), + z + .object({ + type: z + .enum(['mousePressed', 'mouseReleased', 'mouseMoved']) + .describe('Example: "mousePressed"'), + x: z.number().describe('Example: 120'), + y: z.number().describe('Example: 240'), + button: z + .enum(['none', 'left', 'middle', 'right']) + .optional() + .describe('Example: "left"'), + clickCount: z.number().int().min(1).optional().describe('Example: 1') + }) + .describe( + 'For Input.dispatchMouseEvent. Example: {"type":"mousePressed","x":120,"y":240,"button":"left","clickCount":1}' + ), + z + .object({ + type: z.enum(['keyDown', 'keyUp', 'rawKeyDown', 'char']).describe('Example: "keyDown"'), + key: z.string().optional().describe('Example: "a"'), + code: z.string().optional().describe('Example: "KeyA"'), + text: z.string().optional().describe('Example: "a"') + }) + .describe( + 'For Input.dispatchKeyEvent. Example: {"type":"keyDown","key":"a","code":"KeyA","text":"a"}' + ) + ]) + .describe('Parameters for the selected CDP method. Must be an object, not a JSON string') + }) +} + +export function getYoBrowserToolDefinitions(): MCPToolDefinition[] { + return [ + { + type: 'function', + function: { + name: 'yo_browser_tab_list', + description: 'List all browser tabs and identify the active tab', + parameters: zodToJsonSchema(yoBrowserSchemas.tab_list) as { + type: string + properties: Record + required?: string[] + } + }, + server: { + name: 'yobrowser', + icons: '🌐', + description: 'YoBrowser CDP automation' + } + }, + { + type: 'function', + function: { + name: 'yo_browser_tab_new', + description: 'Create a new browser tab with an optional URL', + parameters: zodToJsonSchema(yoBrowserSchemas.tab_new) as { + type: string + properties: Record + required?: string[] + } + }, + server: { + name: 'yobrowser', + icons: '🌐', + description: 'YoBrowser CDP automation' + } + }, + { + type: 'function', + function: { + name: 'yo_browser_tab_activate', + description: 'Make a specific tab the active tab', + parameters: zodToJsonSchema(yoBrowserSchemas.tab_activate) as { + type: string + properties: Record + required?: string[] + } + }, + server: { + name: 'yobrowser', + icons: '🌐', + description: 'YoBrowser CDP automation' + } + }, + { + type: 'function', + function: { + name: 'yo_browser_tab_close', + description: 'Close a specific browser tab', + parameters: zodToJsonSchema(yoBrowserSchemas.tab_close) as { + type: string + properties: Record + required?: string[] + } + }, + server: { + name: 'yobrowser', + icons: '🌐', + description: 'YoBrowser CDP automation' + } + }, + { + type: 'function', + function: { + name: 'yo_browser_cdp_send', + description: + 'Send a Chrome DevTools Protocol (CDP) command to a browser tab. Use this for navigation, content extraction, and DOM interaction', + parameters: zodToJsonSchema(yoBrowserSchemas.cdp_send) as { + type: string + properties: Record + required?: string[] + } + }, + server: { + name: 'yobrowser', + icons: '🌐', + description: 'YoBrowser CDP automation' + } + } + ] +} diff --git a/src/main/presenter/browser/YoBrowserToolHandler.ts b/src/main/presenter/browser/YoBrowserToolHandler.ts new file mode 100644 index 000000000..ffc1f07a9 --- /dev/null +++ b/src/main/presenter/browser/YoBrowserToolHandler.ts @@ -0,0 +1,131 @@ +import logger from '@shared/logger' +import { getYoBrowserToolDefinitions } from './YoBrowserToolDefinitions' +import type { YoBrowserPresenter } from './YoBrowserPresenter' + +export class YoBrowserToolHandler { + private readonly presenter: YoBrowserPresenter + + constructor(presenter: YoBrowserPresenter) { + this.presenter = presenter + } + + getToolDefinitions(): any[] { + return getYoBrowserToolDefinitions() + } + + async callTool(toolName: string, args: Record): Promise { + try { + switch (toolName) { + case 'yo_browser_tab_list': { + return await this.handleTabList() + } + case 'yo_browser_tab_new': { + const url = typeof args.url === 'string' ? args.url : undefined + return await this.handleTabNew(url) + } + case 'yo_browser_tab_activate': { + const tabId = typeof args.tabId === 'string' ? args.tabId : '' + if (!tabId) { + throw new Error('tabId is required') + } + return await this.handleTabActivate(tabId) + } + case 'yo_browser_tab_close': { + const tabId = typeof args.tabId === 'string' ? args.tabId : '' + if (!tabId) { + throw new Error('tabId is required') + } + return await this.handleTabClose(tabId) + } + case 'yo_browser_cdp_send': { + const tabId = typeof args.tabId === 'string' ? args.tabId : undefined + const method = typeof args.method === 'string' ? args.method : '' + const params = this.normalizeCdpParams(args.params) + return await this.handleCdpSend(tabId, method, params) + } + default: + throw new Error(`Unknown YoBrowser tool: ${toolName}`) + } + } catch (error) { + logger.error('[YoBrowserToolHandler] Tool execution failed', { toolName, error }) + throw error + } + } + + private async handleTabList(): Promise { + const tabs = await this.presenter.listTabs() + const activeTab = await this.presenter.getActiveTab() + return JSON.stringify({ + activeTabId: activeTab?.id ?? null, + tabs: tabs.map((tab: any) => ({ + id: tab.id, + url: tab.url, + title: tab.title, + isActive: tab.id === activeTab?.id + })) + }) + } + + private async handleTabNew(url?: string): Promise { + const tab = await this.presenter.createTab(url) + if (!tab) { + throw new Error('Failed to create new tab') + } + return JSON.stringify({ + id: tab.id, + url: tab.url, + title: tab.title + }) + } + + private async handleTabActivate(tabId: string): Promise { + await this.presenter.activateTab(tabId) + return JSON.stringify({ success: true, tabId }) + } + + private async handleTabClose(tabId: string): Promise { + await this.presenter.closeTab(tabId) + return JSON.stringify({ success: true, tabId }) + } + + private async handleCdpSend( + tabId: string | undefined, + method: string, + params: Record + ): Promise { + if (!method) { + throw new Error('CDP method is required') + } + const browserTab = await this.presenter.getBrowserTab(tabId) + if (!browserTab) { + throw new Error(tabId ? `Tab ${tabId} not found` : 'No active tab available') + } + if (tabId) { + const resolvedTabId = + (browserTab as { tabId?: string; id?: string }).tabId ?? (browserTab as { id?: string }).id + if (resolvedTabId !== tabId) { + throw new Error(`Tab ${tabId} not found`) + } + } + + const response = await browserTab.sendCdpCommand(method, params) + return JSON.stringify(response ?? {}) + } + + private normalizeCdpParams(value: unknown): Record { + if (typeof value === 'object' && value !== null && !Array.isArray(value)) { + return value as Record + } + if (typeof value === 'string' && value.trim()) { + try { + const parsed = JSON.parse(value) + if (typeof parsed === 'object' && parsed !== null && !Array.isArray(parsed)) { + return parsed as Record + } + } catch { + return {} + } + } + return {} + } +} diff --git a/src/main/presenter/browser/tools/action.ts b/src/main/presenter/browser/tools/action.ts deleted file mode 100644 index 698f0d1bf..000000000 --- a/src/main/presenter/browser/tools/action.ts +++ /dev/null @@ -1,200 +0,0 @@ -import { z } from 'zod' -import type { BrowserToolDefinition } from './types' - -const BaseArgsSchema = z.object({ - tabId: z.string().optional().describe('Tab identifier (defaults to active tab)') -}) - -const SelectorSchema = BaseArgsSchema.extend({ - selector: z.string().min(1).describe('CSS selector of the target element') -}) - -const ClickArgsSchema = SelectorSchema -const HoverArgsSchema = SelectorSchema - -const FormInputArgsSchema = SelectorSchema.extend({ - value: z.string().describe('Value to fill into the element'), - append: z - .boolean() - .optional() - .default(false) - .describe('Append to existing value instead of replacing') -}) - -const SelectArgsSchema = SelectorSchema.extend({ - value: z.union([z.string(), z.array(z.string())]).describe('Value or values to select') -}) - -const ScrollArgsSchema = BaseArgsSchema.extend({ - x: z.number().optional().default(0).describe('Horizontal scroll distance'), - y: z.number().optional().default(500).describe('Vertical scroll distance'), - behavior: z.enum(['auto', 'smooth']).optional().default('auto').describe('Scroll behavior') -}) - -const PressKeyArgsSchema = BaseArgsSchema.extend({ - key: z.string().min(1).describe('Key to press'), - count: z.number().int().min(1).optional().default(1).describe('Number of times to press the key') -}) - -export function createActionTools(): BrowserToolDefinition[] { - return [ - { - name: 'browser_click', - description: 'Click an element on the page using a CSS selector.', - schema: ClickArgsSchema, - handler: async (args, context) => { - const parsed = ClickArgsSchema.parse(args) - const tabId = await context.resolveTabId(parsed) - const tab = await context.getTab(tabId) - if (!tab) { - return { - content: [{ type: 'text', text: `Tab ${tabId} not found` }], - isError: true - } - } - await tab.waitForSelector(parsed.selector, { timeout: 5000 }) - await tab.click(parsed.selector) - return { - content: [ - { - type: 'text', - text: `Clicked element ${parsed.selector}` - } - ] - } - } - }, - { - name: 'browser_hover', - description: 'Hover over an element on the page.', - schema: HoverArgsSchema, - handler: async (args, context) => { - const parsed = HoverArgsSchema.parse(args) - const tabId = await context.resolveTabId(parsed) - const tab = await context.getTab(tabId) - if (!tab) { - return { - content: [{ type: 'text', text: `Tab ${tabId} not found` }], - isError: true - } - } - await tab.waitForSelector(parsed.selector, { timeout: 5000 }) - await tab.hover(parsed.selector) - return { - content: [ - { - type: 'text', - text: `Hovered over ${parsed.selector}` - } - ] - } - } - }, - { - name: 'browser_form_input_fill', - description: 'Fill text into an input or textarea element.', - schema: FormInputArgsSchema, - handler: async (args, context) => { - const parsed = FormInputArgsSchema.parse(args) - const tabId = await context.resolveTabId(parsed) - const tab = await context.getTab(tabId) - if (!tab) { - return { - content: [{ type: 'text', text: `Tab ${tabId} not found` }], - isError: true - } - } - await tab.waitForSelector(parsed.selector, { timeout: 5000 }) - await tab.fill(parsed.selector, parsed.value, parsed.append) - return { - content: [ - { - type: 'text', - text: `Filled ${parsed.selector} with value` - } - ] - } - } - }, - { - name: 'browser_select', - description: 'Select value(s) within a select element.', - schema: SelectArgsSchema, - handler: async (args, context) => { - const parsed = SelectArgsSchema.parse(args) - const tabId = await context.resolveTabId(parsed) - const tab = await context.getTab(tabId) - if (!tab) { - return { - content: [{ type: 'text', text: `Tab ${tabId} not found` }], - isError: true - } - } - await tab.waitForSelector(parsed.selector, { timeout: 5000 }) - await tab.select(parsed.selector, parsed.value) - return { - content: [ - { - type: 'text', - text: `Updated selection for ${parsed.selector}` - } - ] - } - } - }, - { - name: 'browser_scroll', - description: 'Scroll the page by specified offsets.', - schema: ScrollArgsSchema, - handler: async (args, context) => { - const parsed = ScrollArgsSchema.parse(args) - const tabId = await context.resolveTabId(parsed) - const tab = await context.getTab(tabId) - if (!tab) { - return { - content: [{ type: 'text', text: `Tab ${tabId} not found` }], - isError: true - } - } - await tab.scroll({ - x: parsed.x, - y: parsed.y, - behavior: parsed.behavior - }) - return { - content: [ - { - type: 'text', - text: `Scrolled by x=${parsed.x}, y=${parsed.y}` - } - ] - } - } - }, - { - name: 'browser_press_key', - description: 'Send keyboard input to the active page.', - schema: PressKeyArgsSchema, - handler: async (args, context) => { - const parsed = PressKeyArgsSchema.parse(args) - const tabId = await context.resolveTabId(parsed) - const tab = await context.getTab(tabId) - if (!tab) { - return { - content: [{ type: 'text', text: `Tab ${tabId} not found` }], - isError: true - } - } - await tab.pressKey(parsed.key, parsed.count) - return { - content: [ - { - type: 'text', - text: `Pressed key "${parsed.key}" ${parsed.count} time(s)` - } - ] - } - } - } - ] -} diff --git a/src/main/presenter/browser/tools/content.ts b/src/main/presenter/browser/tools/content.ts deleted file mode 100644 index cd10a692b..000000000 --- a/src/main/presenter/browser/tools/content.ts +++ /dev/null @@ -1,256 +0,0 @@ -import { z } from 'zod' -import TurndownService from 'turndown' -import type { BrowserToolDefinition } from './types' - -const BaseArgsSchema = z.object({ - tabId: z.string().optional().describe('Tab identifier (defaults to active tab)') -}) - -const SelectorArgsSchema = BaseArgsSchema.extend({ - selector: z.string().optional().describe('Optional CSS selector to scope extraction') -}) - -const ContentArgsSchema = SelectorArgsSchema.extend({ - offset: z - .number() - .int() - .min(0) - .optional() - .default(0) - .describe('Character offset from which to start reading (0 = beginning)'), - limit: z - .number() - .int() - .min(1) - .max(16000) - .optional() - .default(4000) - .describe('Maximum number of characters to return from the offset') -}) - -const LinksArgsSchema = BaseArgsSchema.extend({ - limit: z.number().int().min(1).max(200).optional().default(50).describe('Maximum links to return') -}) - -const ClickableArgsSchema = BaseArgsSchema.extend({ - limit: z - .number() - .int() - .min(1) - .max(200) - .optional() - .default(50) - .describe('Maximum clickable elements to return') -}) - -const turndown = new TurndownService({ - headingStyle: 'atx' -}) - -const DEFAULT_TEXT_LIMIT = 4000 -const MAX_TEXT_LIMIT = 16000 - -const paginateText = ( - fullText: string | undefined, - offset?: number, - limit?: number -): { slice: string; meta?: string } => { - const text = fullText || '' - const length = text.length - const safeOffset = Math.max(0, offset ?? 0) - const safeLimit = Math.min(MAX_TEXT_LIMIT, Math.max(1, limit ?? DEFAULT_TEXT_LIMIT)) - - if (!text) { - return { slice: '', meta: undefined } - } - - if (safeOffset >= length) { - return { - slice: '', - meta: `Offset ${safeOffset} is beyond content length ${length}. No content returned.` - } - } - - const end = Math.min(length, safeOffset + safeLimit) - const slice = text.slice(safeOffset, end) - const remaining = length - end - - if (remaining <= 0) { - return { slice, meta: undefined } - } - - const nextOffset = end - const meta = `Content length: ${length} characters. Returned range: [${safeOffset}, ${end}) (${slice.length} characters). Remaining: ${remaining} characters. To continue reading, call this tool again with offset=${nextOffset}.` - - return { slice, meta } -} - -export function createContentTools(): BrowserToolDefinition[] { - return [ - { - name: 'browser_get_text', - description: - 'Extract visible text from the page or a specific element. Supports offset/limit pagination to avoid overly long outputs.', - schema: ContentArgsSchema, - handler: async (args, context) => { - const parsed = ContentArgsSchema.parse(args) - const tabId = await context.resolveTabId(parsed) - const tab = await context.getTab(tabId) - if (!tab) { - return { - content: [{ type: 'text', text: `Tab ${tabId} not found` }], - isError: true - } - } - - const text = await tab.getInnerText(parsed.selector) - const { slice, meta } = paginateText(text, parsed.offset, parsed.limit) - - const content: { type: 'text'; text: string }[] = [] - - if (!slice && !meta) { - content.push({ type: 'text', text: '(no text found)' }) - } else { - if (meta) { - content.push({ - type: 'text', - text: `[pagination] ${meta}` - }) - } - if (slice) { - content.push({ - type: 'text', - text: slice - }) - } - } - - return { content } - }, - annotations: { - readOnlyHint: true - } - }, - { - name: 'browser_get_markdown', - description: - 'Extract the page content as Markdown. Supports offset/limit pagination to avoid overly long outputs.', - schema: ContentArgsSchema, - handler: async (args, context) => { - const parsed = ContentArgsSchema.parse(args) - const tabId = await context.resolveTabId(parsed) - const tab = await context.getTab(tabId) - if (!tab) { - return { - content: [{ type: 'text', text: `Tab ${tabId} not found` }], - isError: true - } - } - - await tab.waitForNetworkIdle() - const html = await tab.getHtml(parsed.selector) - const markdown = html ? turndown.turndown(html) : '' - const { slice, meta } = paginateText(markdown, parsed.offset, parsed.limit) - - const content: { type: 'text'; text: string }[] = [] - - if (!slice && !meta) { - content.push({ type: 'text', text: '(no content found)' }) - } else { - if (meta) { - content.push({ - type: 'text', - text: `[pagination] ${meta}` - }) - } - if (slice) { - content.push({ - type: 'text', - text: slice - }) - } - } - - return { content } - }, - annotations: { - readOnlyHint: true - } - }, - { - name: 'browser_read_links', - description: 'List hyperlinks on the current page.', - schema: LinksArgsSchema, - handler: async (args, context) => { - const parsed = LinksArgsSchema.parse(args) - const tabId = await context.resolveTabId(parsed) - const tab = await context.getTab(tabId) - if (!tab) { - return { - content: [{ type: 'text', text: `Tab ${tabId} not found` }], - isError: true - } - } - - const links = await tab.getLinks(parsed.limit) - const formatted = - links.length === 0 - ? 'No links found.' - : links - .map((link, index) => `${index + 1}. ${link.text || '(no text)'} -> ${link.href}`) - .join('\n') - - return { - content: [ - { - type: 'text', - text: formatted - } - ] - } - }, - annotations: { - readOnlyHint: true - } - }, - { - name: 'browser_get_clickable_elements', - description: 'List clickable elements with simple selectors.', - schema: ClickableArgsSchema, - handler: async (args, context) => { - const parsed = ClickableArgsSchema.parse(args) - const tabId = await context.resolveTabId(parsed) - const tab = await context.getTab(tabId) - if (!tab) { - return { - content: [{ type: 'text', text: `Tab ${tabId} not found` }], - isError: true - } - } - - const elements = await tab.getClickableElements(parsed.limit) - const formatted = - elements.length === 0 - ? 'No clickable elements found.' - : elements - .map( - (element, index) => - `${index + 1}. [${element.tag}] ${element.text || element.ariaLabel || '(no text)'} -> ${element.selector}` - ) - .join('\n') - - return { - content: [ - { - type: 'text', - text: formatted - } - ] - } - }, - annotations: { - readOnlyHint: true - } - } - ] -} diff --git a/src/main/presenter/browser/tools/download.ts b/src/main/presenter/browser/tools/download.ts deleted file mode 100644 index 0a0fa0fa5..000000000 --- a/src/main/presenter/browser/tools/download.ts +++ /dev/null @@ -1,89 +0,0 @@ -import { z } from 'zod' -import type { BrowserToolDefinition } from './types' - -const DownloadListArgsSchema = z.object({ - tabId: z.string().optional().describe('Tab identifier (defaults to active tab)') -}) - -const DownloadFileArgsSchema = z.object({ - url: z.string().url().describe('File URL to download'), - savePath: z.string().optional().describe('Optional file path to save as'), - tabId: z - .string() - .optional() - .describe('Tab identifier to use for download context (defaults to active tab)') -}) - -export function createDownloadTools(): BrowserToolDefinition[] { - return [ - { - name: 'browser_get_download_list', - description: 'Get download items for the current browser session.', - schema: DownloadListArgsSchema, - handler: async () => { - // Note: Download list functionality needs to be implemented in YoBrowserPresenter - // For now, return empty list - const downloads: Array<{ - filename: string - state: string - receivedBytes: number - totalBytes: number - url: string - }> = [] - const formatted = - downloads.length === 0 - ? 'No downloads yet.' - : downloads - .map( - (item) => - `- ${item.filename} [${item.state}] ${item.receivedBytes}/${item.totalBytes} bytes (${item.url})` - ) - .join('\n') - - return { - content: [ - { - type: 'text', - text: formatted - } - ] - } - } - }, - { - name: 'browser_download_file', - description: - 'Download a file using the browser session, preserving cookies of the active tab.', - schema: DownloadFileArgsSchema, - handler: async (args, context) => { - const parsed = DownloadFileArgsSchema.parse(args) - if (!context.downloadFile) { - return { - content: [{ type: 'text', text: 'Download functionality not available' }], - isError: true - } - } - - try { - const download = await context.downloadFile(parsed.url, parsed.savePath) - return { - content: [ - { - type: 'text', - text: `Download started: ${parsed.url}\nStatus: ${download.status}\nID: ${download.id}${ - download.filePath ? `\nSave path: ${download.filePath}` : '' - }` - } - ] - } - } catch (error) { - const errorMsg = error instanceof Error ? error.message : String(error) - return { - content: [{ type: 'text', text: `Download failed: ${errorMsg}` }], - isError: true - } - } - } - } - ] -} diff --git a/src/main/presenter/browser/tools/navigate.ts b/src/main/presenter/browser/tools/navigate.ts deleted file mode 100644 index 2d6e95938..000000000 --- a/src/main/presenter/browser/tools/navigate.ts +++ /dev/null @@ -1,283 +0,0 @@ -import { z } from 'zod' -import type { BrowserToolDefinition, ToolResult } from './types' - -const NavigateArgsSchema = z.object({ - url: z.string().url().describe('URL to navigate to'), - tabId: z.string().optional().describe('Tab identifier (defaults to active tab)'), - newTab: z.boolean().optional().default(false).describe('Open navigation in a new tab'), - reuse: z - .boolean() - .optional() - .default(true) - .describe('Reuse an existing tab that matches the domain when true') -}) - -const NavigationOnlyArgsSchema = z.object({ - tabId: z.string().optional().describe('Tab identifier (defaults to active tab)') -}) - -export function createNavigateTools(): BrowserToolDefinition[] { - return [ - { - name: 'browser_navigate', - description: 'Navigate the browser to the specified URL.', - schema: NavigateArgsSchema, - handler: async (args, context) => { - const parsed = NavigateArgsSchema.parse(args) - - // Handle new tab creation - if (parsed.newTab) { - if (!context.createTab) { - return { - content: [{ type: 'text', text: 'Tab creation not available' }], - isError: true - } - } - const newTab = await context.createTab(parsed.url) - if (!newTab) { - return { - content: [{ type: 'text', text: 'Failed to create new tab' }], - isError: true - } - } - return { - content: [ - { - type: 'text', - text: `Opened new tab ${newTab.id} -> ${parsed.url}\nTitle: ${newTab.title || 'unknown'}` - } - ] - } - } - - // Handle reuse logic - if (parsed.reuse && !parsed.tabId) { - // Try to find a reusable tab by domain - const tabs = await context.listTabs?.() - if (tabs && tabs.length > 0) { - try { - const targetHost = new URL(parsed.url).hostname - const reusableTab = tabs.find((t) => { - try { - return new URL(t.url).hostname === targetHost - } catch { - return false - } - }) - if (reusableTab) { - const tab = await context.getTab(reusableTab.id) - if (tab) { - await tab.navigate(parsed.url) - await context.activateTab?.(reusableTab.id) - return { - content: [ - { - type: 'text', - text: `Reused tab ${reusableTab.id} and navigated to ${parsed.url}\nTitle: ${tab.title || 'unknown'}` - } - ] - } - } - } - } catch { - // Ignore URL parse errors, fall through to normal navigation - } - } - } - - // Normal navigation - let tab = parsed.tabId ? await context.getTab(parsed.tabId) : await context.getActiveTab() - - if (!tab) { - // Create a new tab if none exists - if (context.createTab) { - const newTab = await context.createTab(parsed.url) - if (newTab) { - // Add a small delay to ensure BrowserTab is fully initialized - // This is especially important on first call when browser window is just created - await new Promise((resolve) => setTimeout(resolve, 100)) - // Get the BrowserTab object and wait for navigation to complete - // Note: createTab already started navigation via tabPresenter.createTab, - // so we just need to wait for it to complete - const browserTab = await context.getTab(newTab.id) - if (browserTab) { - try { - // createTab already started navigation via tabPresenter.createTab - // If tab is loading, wait for it to complete instead of calling navigate again - if (browserTab.contents.isLoading()) { - // Wait for current navigation to complete - await new Promise((resolve, reject) => { - let timeout: ReturnType - let onStopLoading: () => void - let onFailLoad: ( - _event: unknown, - errorCode: number, - errorDescription: string - ) => void - - const cleanup = () => { - clearTimeout(timeout) - browserTab.contents.removeListener('did-stop-loading', onStopLoading) - browserTab.contents.removeListener('did-fail-load', onFailLoad) - } - - onStopLoading = () => { - cleanup() - resolve() - } - - onFailLoad = (_event, errorCode, errorDescription) => { - cleanup() - reject(new Error(`Navigation failed ${errorCode}: ${errorDescription}`)) - } - - timeout = setTimeout(() => { - cleanup() - reject(new Error('Timeout waiting for page load')) - }, 15000) - - browserTab.contents.once('did-stop-loading', onStopLoading) - browserTab.contents.once('did-fail-load', onFailLoad) - }) - - // Check if URL matches after loading - const finalUrl = browserTab.contents.getURL() - if (finalUrl !== parsed.url) { - // URL doesn't match, need to navigate - await browserTab.navigate(parsed.url, 15000) // 15 second timeout - } - } else { - // Tab is not loading, check if URL matches - const currentUrl = browserTab.contents.getURL() - if (currentUrl !== parsed.url) { - // URL doesn't match, need to navigate - await browserTab.navigate(parsed.url, 15000) // 15 second timeout - } - } - - const result: ToolResult = { - content: [ - { - type: 'text' as const, - text: `Created new tab and navigated to ${parsed.url}\nTitle: ${browserTab.title || 'unknown'}` - } - ] - } - return result - } catch (error) { - console.error('[browser_navigate] Failed to navigate newly created tab:', error) - const errorMessage = error instanceof Error ? error.message : String(error) - const result: ToolResult = { - content: [ - { - type: 'text' as const, - text: `Failed to navigate new tab ${browserTab.tabId} to ${parsed.url}\nError: ${errorMessage}\nTitle: ${browserTab.title || 'unknown'}` - } - ], - isError: true - } - return result - } - } - // Fallback if getTab fails - const result: ToolResult = { - content: [ - { - type: 'text' as const, - text: `Created new tab and navigated to ${parsed.url}\nTitle: ${newTab.title || 'unknown'}` - } - ] - } - return result - } - } - const errorResult: ToolResult = { - content: [ - { - type: 'text' as const, - text: 'No active tab available' - } - ], - isError: true - } - return errorResult - } - - await tab.navigate(parsed.url) - const result: ToolResult = { - content: [ - { - type: 'text' as const, - text: `Navigated to ${parsed.url}\nTitle: ${tab.title || 'unknown'}` - } - ] - } - return result - } - }, - { - name: 'browser_go_back', - description: 'Go back to the previous page in the current tab.', - schema: NavigationOnlyArgsSchema, - handler: async (args, context) => { - const parsed = NavigationOnlyArgsSchema.parse(args) - const tabId = await context.resolveTabId(parsed) - const tab = await context.getTab(tabId) - - if (!tab) { - return { - content: [ - { - type: 'text', - text: `Tab ${tabId} not found` - } - ], - isError: true - } - } - - await tab.goBack() - return { - content: [ - { - type: 'text', - text: `Went back. Current URL: ${tab.url || 'about:blank'}` - } - ] - } - } - }, - { - name: 'browser_go_forward', - description: 'Go forward to the next page in the current tab.', - schema: NavigationOnlyArgsSchema, - handler: async (args, context) => { - const parsed = NavigationOnlyArgsSchema.parse(args) - const tabId = await context.resolveTabId(parsed) - const tab = await context.getTab(tabId) - - if (!tab) { - return { - content: [ - { - type: 'text', - text: `Tab ${tabId} not found` - } - ], - isError: true - } - } - - await tab.goForward() - return { - content: [ - { - type: 'text', - text: `Went forward. Current URL: ${tab.url || 'about:blank'}` - } - ] - } - } - } - ] -} diff --git a/src/main/presenter/browser/tools/screenshot.ts b/src/main/presenter/browser/tools/screenshot.ts deleted file mode 100644 index a4de7c1b6..000000000 --- a/src/main/presenter/browser/tools/screenshot.ts +++ /dev/null @@ -1,48 +0,0 @@ -import { z } from 'zod' -import type { BrowserToolDefinition } from './types' - -const ScreenshotArgsSchema = z.object({ - tabId: z.string().optional().describe('Tab identifier (defaults to active tab)'), - selector: z.string().optional().describe('Capture only the element matching this selector'), - fullPage: z.boolean().optional().default(false).describe('Capture the full page'), - highlightSelectors: z - .array(z.string()) - .optional() - .describe('Selectors to highlight before capture') -}) - -export function createScreenshotTools(): BrowserToolDefinition[] { - return [ - { - name: 'browser_screenshot', - description: 'Capture a screenshot of the current page or a specific element.', - schema: ScreenshotArgsSchema, - handler: async (args, context) => { - const parsed = ScreenshotArgsSchema.parse(args) - const tabId = await context.resolveTabId(parsed) - const tab = await context.getTab(tabId) - if (!tab) { - return { - content: [{ type: 'text', text: `Tab ${tabId} not found` }], - isError: true - } - } - - const base64 = await tab.takeScreenshot({ - selector: parsed.selector, - fullPage: parsed.fullPage, - highlightSelectors: parsed.highlightSelectors - }) - - return { - content: [ - { - type: 'text', - text: `data:image/png;base64,${base64}` - } - ] - } - } - } - ] -} diff --git a/src/main/presenter/browser/tools/tabs.ts b/src/main/presenter/browser/tools/tabs.ts deleted file mode 100644 index 5cdec59af..000000000 --- a/src/main/presenter/browser/tools/tabs.ts +++ /dev/null @@ -1,152 +0,0 @@ -import { z } from 'zod' -import type { BrowserToolDefinition } from './types' - -const BaseArgsSchema = z.object({ - tabId: z.string().optional().describe('Tab identifier (defaults to active tab)') -}) - -const NewTabArgsSchema = z.object({ - url: z.string().url().optional().describe('Optional URL to open in the new tab') -}) - -const SwitchTabArgsSchema = z.object({ - tabId: z.string().describe('Tab identifier to activate') -}) - -const CloseTabArgsSchema = z.object({ - tabId: z.string().optional().describe('Tab identifier to close (defaults to active tab)') -}) - -export function createTabTools(): BrowserToolDefinition[] { - return [ - { - name: 'browser_new_tab', - description: 'Open a new browser tab (window) for the session.', - schema: NewTabArgsSchema, - handler: async (args, context) => { - const parsed = NewTabArgsSchema.parse(args) - if (!context.createTab) { - return { - content: [{ type: 'text', text: 'Tab creation not available' }], - isError: true - } - } - const tab = await context.createTab(parsed.url) - if (!tab) { - return { - content: [{ type: 'text', text: 'Failed to create new tab' }], - isError: true - } - } - return { - content: [ - { - type: 'text', - text: `Opened new tab ${tab.id}${parsed.url ? ` -> ${parsed.url}` : ''}` - } - ] - } - } - }, - { - name: 'browser_tab_list', - description: 'List all tabs (windows) for the current session.', - schema: BaseArgsSchema, - handler: async (_args, context) => { - if (!context.listTabs) { - return { - content: [{ type: 'text', text: 'Tab list not available' }], - isError: true - } - } - const tabs = await context.listTabs() - const formatted = - tabs.length === 0 - ? 'No tabs open.' - : tabs - .map( - (tab) => - `${tab.isActive ? '*' : ' '} Tab ${tab.id}: ${tab.title || 'Untitled'} (${tab.url || 'about:blank'})` - ) - .join('\n') - - return { - content: [ - { - type: 'text', - text: formatted - } - ] - } - } - }, - { - name: 'browser_switch_tab', - description: 'Activate a specific tab (window) by its id.', - schema: SwitchTabArgsSchema, - handler: async (args, context) => { - const parsed = SwitchTabArgsSchema.parse(args) - if (!context.activateTab) { - return { - content: [{ type: 'text', text: 'Tab activation not available' }], - isError: true - } - } - const tab = await context.getTab(parsed.tabId) - if (!tab) { - return { - content: [ - { - type: 'text', - text: `Tab ${parsed.tabId} not found` - } - ], - isError: true - } - } - - await context.activateTab(parsed.tabId) - return { - content: [ - { - type: 'text', - text: `Switched to tab ${parsed.tabId}: ${tab.title || 'Untitled'}` - } - ] - } - } - }, - { - name: 'browser_close_tab', - description: 'Close a tab (window). Defaults to the active tab.', - schema: CloseTabArgsSchema, - handler: async (args, context) => { - const parsed = CloseTabArgsSchema.parse(args) - if (!context.closeTab) { - return { - content: [{ type: 'text', text: 'Tab closing not available' }], - isError: true - } - } - const tabId = parsed.tabId - ? parsed.tabId - : (await context.getActiveTab())?.tabId || (await context.resolveTabId(undefined)) - if (!tabId) { - return { - content: [{ type: 'text', text: 'No active tab to close' }], - isError: true - } - } - await context.closeTab(tabId) - return { - content: [ - { - type: 'text', - text: `Closed tab ${tabId}` - } - ] - } - } - } - ] -} diff --git a/src/main/presenter/browser/tools/types.ts b/src/main/presenter/browser/tools/types.ts deleted file mode 100644 index eba58504d..000000000 --- a/src/main/presenter/browser/tools/types.ts +++ /dev/null @@ -1,48 +0,0 @@ -import type { RequestHandlerExtra } from '@modelcontextprotocol/sdk/shared/protocol.js' -import type { CallToolResult, Notification, Request } from '@modelcontextprotocol/sdk/types.js' -import type { ZodTypeAny } from 'zod' -import type { BrowserTab } from '../BrowserTab' - -export type ToolResult = CallToolResult - -export interface BrowserToolContext { - getTab: (tabId?: string) => Promise - getActiveTab: () => Promise - resolveTabId: ( - args: { tabId?: string } | undefined, - extra?: RequestHandlerExtra - ) => Promise - // Tab management methods - createTab?: (url?: string) => Promise<{ id: string; url: string; title: string } | null> - listTabs?: () => Promise> - activateTab?: (tabId: string) => Promise - closeTab?: (tabId: string) => Promise - // Download methods - downloadFile?: ( - url: string, - savePath?: string - ) => Promise<{ - id: string - url: string - filePath?: string - status: string - }> -} - -export interface BrowserToolDefinition { - name: string - description: string - schema: ZodTypeAny - handler: ( - args: any, - context: BrowserToolContext, - extra: RequestHandlerExtra - ) => Promise - annotations?: { - title?: string - readOnlyHint?: boolean - destructiveHint?: boolean - idempotentHint?: boolean - openWorldHint?: boolean - } -} diff --git a/src/main/presenter/configPresenter/index.ts b/src/main/presenter/configPresenter/index.ts index 02eb06af2..01ead3eef 100644 --- a/src/main/presenter/configPresenter/index.ts +++ b/src/main/presenter/configPresenter/index.ts @@ -893,6 +893,14 @@ export class ConfigPresenter implements IConfigPresenter { this.uiSettingsHelper.setSearchPreviewEnabled(enabled) } + getAutoScrollEnabled(): boolean { + return this.uiSettingsHelper.getAutoScrollEnabled() + } + + setAutoScrollEnabled(enabled: boolean): void { + this.uiSettingsHelper.setAutoScrollEnabled(enabled) + } + getContentProtectionEnabled(): boolean { return this.uiSettingsHelper.getContentProtectionEnabled() } diff --git a/src/main/presenter/configPresenter/providers.ts b/src/main/presenter/configPresenter/providers.ts index a6cf6ec77..db04a89d4 100644 --- a/src/main/presenter/configPresenter/providers.ts +++ b/src/main/presenter/configPresenter/providers.ts @@ -217,6 +217,21 @@ export const DEFAULT_PROVIDERS: LLM_PROVIDER_BASE[] = [ defaultBaseUrl: 'https://api.openai.com/v1' } }, + { + id: 'voiceai', + name: 'Voice.ai', + apiType: 'voiceai', + apiKey: '', + baseUrl: 'https://dev.voice.ai', + enable: false, + websites: { + official: 'https://voice.ai/', + apiKey: 'https://voice.ai/app/dashboard/developers', + docs: 'https://voice.ai/docs/introduction', + models: 'https://voice.ai/docs/api-reference/text-to-speech/list-voices', + defaultBaseUrl: 'https://dev.voice.ai' + } + }, { id: 'gemini', name: 'Gemini', diff --git a/src/main/presenter/configPresenter/uiSettingsHelper.ts b/src/main/presenter/configPresenter/uiSettingsHelper.ts index e64ccab6e..2c7f1893c 100644 --- a/src/main/presenter/configPresenter/uiSettingsHelper.ts +++ b/src/main/presenter/configPresenter/uiSettingsHelper.ts @@ -49,6 +49,18 @@ export class UiSettingsHelper { eventBus.send(CONFIG_EVENTS.SEARCH_PREVIEW_CHANGED, SendTarget.ALL_WINDOWS, boolValue) } + getAutoScrollEnabled(): boolean { + const value = this.getSetting('autoScrollEnabled') + if (value === undefined) return true + return Boolean(value) + } + + setAutoScrollEnabled(enabled: boolean): void { + const boolValue = Boolean(enabled) + this.setSetting('autoScrollEnabled', boolValue) + eventBus.send(CONFIG_EVENTS.AUTO_SCROLL_CHANGED, SendTarget.ALL_WINDOWS, boolValue) + } + getContentProtectionEnabled(): boolean { const value = this.getSetting('contentProtectionEnabled') return value === undefined || value === null ? false : value diff --git a/src/main/presenter/index.ts b/src/main/presenter/index.ts index c2f8402f3..a44432eca 100644 --- a/src/main/presenter/index.ts +++ b/src/main/presenter/index.ts @@ -50,7 +50,11 @@ import { CONFIG_EVENTS, WINDOW_EVENTS } from '@/events' import { KnowledgePresenter } from './knowledgePresenter' import { WorkspacePresenter } from './workspacePresenter' import { ToolPresenter } from './toolPresenter' -import { CommandPermissionService, FilePermissionService } from './permission' +import { + CommandPermissionService, + FilePermissionService, + SettingsPermissionService +} from './permission' import { AgentPresenter } from './agentPresenter' import { SessionManager } from './agentPresenter/session/sessionManager' import { SearchPresenter } from './searchPresenter' @@ -106,6 +110,7 @@ export class Presenter implements IPresenter { skillPresenter: ISkillPresenter skillSyncPresenter: ISkillSyncPresenter filePermissionService: FilePermissionService + settingsPermissionService: SettingsPermissionService private constructor(lifecycleManager: ILifecycleManager) { // Store lifecycle manager reference for component access @@ -121,6 +126,7 @@ export class Presenter implements IPresenter { this.llmproviderPresenter = new LLMProviderPresenter(this.configPresenter, this.sqlitePresenter) const commandPermissionHandler = new CommandPermissionService() this.filePermissionService = new FilePermissionService() + this.settingsPermissionService = new SettingsPermissionService() const messageManager = new MessageManager(this.sqlitePresenter) this.devicePresenter = new DevicePresenter() this.searchPresenter = new SearchPresenter({ diff --git a/src/main/presenter/llmProviderPresenter/baseAgentProvider.ts b/src/main/presenter/llmProviderPresenter/baseAgentProvider.ts deleted file mode 100644 index 0d48eab11..000000000 --- a/src/main/presenter/llmProviderPresenter/baseAgentProvider.ts +++ /dev/null @@ -1,47 +0,0 @@ -import { BaseLLMProvider } from './baseProvider' -import type { - AgentPermissionRequest, - AgentPermissionResult, - AgentProcessManager, - AgentSessionManager -} from '../agentPresenter/acp' - -/** - * Base class for Agent-specific providers. - * Ensures that session/process lifecycle management is centralized - * while allowing subclasses to supply concrete managers. - */ -export abstract class BaseAgentProvider< - TSessionManager extends AgentSessionManager = AgentSessionManager, - TProcessManager extends AgentProcessManager = AgentProcessManager, - TPermissionRequest = AgentPermissionRequest, - TPermissionResult = AgentPermissionResult -> extends BaseLLMProvider { - protected abstract getSessionManager(): TSessionManager - protected abstract getProcessManager(): TProcessManager - protected abstract requestPermission(params: TPermissionRequest): Promise - - /** - * Default cleanup hook invoked when provider instances are torn down. - * Clears in-memory sessions and tears down any running agent processes. - */ - public cleanup(): void { - void this.getSessionManager() - .clearAllSessions() - .catch((error) => { - console.warn( - `[AgentProvider] Failed to clear sessions for provider "${this.provider.id}":`, - error - ) - }) - - void this.getProcessManager() - .shutdown() - .catch((error) => { - console.warn( - `[AgentProvider] Failed to shutdown process manager for provider "${this.provider.id}":`, - error - ) - }) - } -} diff --git a/src/main/presenter/llmProviderPresenter/index.ts b/src/main/presenter/llmProviderPresenter/index.ts index 61c9b86cc..3b8046593 100644 --- a/src/main/presenter/llmProviderPresenter/index.ts +++ b/src/main/presenter/llmProviderPresenter/index.ts @@ -118,10 +118,6 @@ export class LLMProviderPresenter implements ILlmProviderPresenter { return this.providerInstanceManager.getProviderById(id) } - isAgentProvider(providerId: string): boolean { - return this.providerInstanceManager.isAgentProvider(providerId) - } - async setCurrentProvider(providerId: string): Promise { // 如果有正在生成的流,先停止它们 await this.stopAllStreams() diff --git a/src/main/presenter/llmProviderPresenter/managers/providerInstanceManager.ts b/src/main/presenter/llmProviderPresenter/managers/providerInstanceManager.ts index f55260155..49900bb4a 100644 --- a/src/main/presenter/llmProviderPresenter/managers/providerInstanceManager.ts +++ b/src/main/presenter/llmProviderPresenter/managers/providerInstanceManager.ts @@ -1,7 +1,6 @@ import { ProviderBatchUpdate, ProviderChange } from '@shared/provider-operations' import { IConfigPresenter, LLM_PROVIDER } from '@shared/presenter' import { BaseLLMProvider } from '../baseProvider' -import { BaseAgentProvider } from '../baseAgentProvider' import { OpenAIProvider } from '../providers/openAIProvider' import { DeepseekProvider } from '../providers/deepseekProvider' import { SiliconcloudProvider } from '../providers/siliconcloudProvider' @@ -35,6 +34,7 @@ import { PoeProvider } from '../providers/poeProvider' import { JiekouProvider } from '../providers/jiekouProvider' import { ZenmuxProvider } from '../providers/zenmuxProvider' import { O3fanProvider } from '../providers/o3fanProvider' +import { VoiceAIProvider } from '../providers/voiceAIProvider' import { RateLimitManager } from './rateLimitManager' import { StreamState } from '../types' import { AcpSessionPersistence } from '../../agentPresenter/acp' @@ -86,6 +86,7 @@ export class ProviderInstanceManager { ['anthropic', AnthropicProvider], ['doubao', DoubaoProvider], ['openai', OpenAIProvider], + ['voiceai', VoiceAIProvider], ['openai-responses', OpenAIResponsesProvider], ['cherryin', CherryInProvider], ['lmstudio', LMStudioProvider], @@ -118,6 +119,7 @@ export class ProviderInstanceManager { ['anthropic', AnthropicProvider], ['doubao', DoubaoProvider], ['openai', OpenAIProvider], + ['voiceai', VoiceAIProvider], ['openai-compatible', OpenAICompatibleProvider], ['openai-responses', OpenAIResponsesProvider], ['lmstudio', LMStudioProvider], @@ -134,11 +136,6 @@ export class ProviderInstanceManager { ]) } - private static isAgentConstructor(ctor?: ProviderConstructor): boolean { - if (!ctor) return false - return BaseAgentProvider.prototype.isPrototypeOf(ctor.prototype) - } - init(): void { const providers = this.options.configPresenter.getProviders() for (const provider of providers) { @@ -250,24 +247,6 @@ export class ProviderInstanceManager { return instance } - isAgentProvider(providerId: string): boolean { - const instance = this.providerInstances.get(providerId) - if (instance) { - return instance instanceof BaseAgentProvider - } - - const provider = this.providers.get(providerId) - if (!provider) { - return false - } - - const ProviderClass = - ProviderInstanceManager.PROVIDER_ID_MAP.get(provider.id) ?? - ProviderInstanceManager.PROVIDER_TYPE_MAP.get(provider.apiType) - - return ProviderInstanceManager.isAgentConstructor(ProviderClass) - } - private handleProviderAdd(change: ProviderChange): void { if (!change.provider) return diff --git a/src/main/presenter/llmProviderPresenter/providers/acpProvider.ts b/src/main/presenter/llmProviderPresenter/providers/acpProvider.ts index 8f5d9e153..84629f61a 100644 --- a/src/main/presenter/llmProviderPresenter/providers/acpProvider.ts +++ b/src/main/presenter/llmProviderPresenter/providers/acpProvider.ts @@ -1,6 +1,5 @@ import type * as schema from '@agentclientprotocol/sdk/dist/schema.js' -import { SUMMARY_TITLES_PROMPT } from '../baseProvider' -import { BaseAgentProvider } from '../baseAgentProvider' +import { BaseLLMProvider, SUMMARY_TITLES_PROMPT } from '../baseProvider' import type { ChatMessage, LLMResponse, @@ -58,12 +57,7 @@ type PendingPermissionState = { reject: (error: Error) => void } -export class AcpProvider extends BaseAgentProvider< - AcpSessionManager, - AcpProcessManager, - schema.RequestPermissionRequest, - schema.RequestPermissionResponse -> { +export class AcpProvider extends BaseLLMProvider { private readonly processManager: AcpProcessManager private readonly sessionManager: AcpSessionManager private readonly sessionPersistence: AcpSessionPersistence @@ -102,21 +96,6 @@ export class AcpProvider extends BaseAgentProvider< void this.initWhenEnabled() } - protected getSessionManager(): AcpSessionManager { - return this.sessionManager - } - - protected getProcessManager(): AcpProcessManager { - return this.processManager - } - - protected async requestPermission( - params: schema.RequestPermissionRequest - ): Promise { - void params - return { outcome: { outcome: 'cancelled' } } - } - protected async fetchProviderModels(): Promise { try { const acpEnabled = await this.configPresenter.getAcpEnabled() diff --git a/src/main/presenter/llmProviderPresenter/providers/voiceAIProvider.ts b/src/main/presenter/llmProviderPresenter/providers/voiceAIProvider.ts new file mode 100644 index 000000000..24971ed13 --- /dev/null +++ b/src/main/presenter/llmProviderPresenter/providers/voiceAIProvider.ts @@ -0,0 +1,469 @@ +import { + ChatMessage, + IConfigPresenter, + LLM_PROVIDER, + LLMResponse, + MODEL_META, + LLMCoreStreamEvent, + MCPToolDefinition, + ModelConfig +} from '@shared/presenter' +import { createStreamEvent } from '@shared/types/core/llm-events' +import { BaseLLMProvider } from '../baseProvider' +import { proxyConfig } from '../../proxyConfig' +import { ProxyAgent } from 'undici' + +const DEFAULT_BASE_URL = 'https://dev.voice.ai' +const DEFAULT_AUDIO_FORMAT = 'mp3' +const DEFAULT_TTS_MODEL = 'voiceai-tts-v1-latest' +const DEFAULT_LANGUAGE = 'en' +const DEFAULT_TEMPERATURE = 1 +const DEFAULT_TOP_P = 0.8 +const SUPPORTED_LANGUAGES = new Set([ + 'en', + 'ca', + 'sv', + 'es', + 'fr', + 'de', + 'it', + 'pt', + 'pl', + 'ru', + 'nl' +]) + +const AUDIO_MIME_TYPE: Record = { + mp3: 'audio/mpeg', + wav: 'audio/wav', + pcm: 'audio/pcm' +} + +type VoiceStatusResponse = { + voice_id: string + name?: string | null + status?: string + voice_visibility?: string | null +} + +type VoiceAITtsConfig = { + audioFormat: string + model: string + language: string + temperature: number + topP: number +} + +export class VoiceAIProvider extends BaseLLMProvider { + private proxyAgent?: ProxyAgent + private proxyUrl?: string + + constructor(provider: LLM_PROVIDER, configPresenter: IConfigPresenter) { + super(provider, configPresenter) + this.init() + } + + public onProxyResolved(): void { + this.proxyAgent = undefined + this.proxyUrl = undefined + } + + public async check(): Promise<{ isOk: boolean; errorMsg: string | null }> { + if (!this.provider.apiKey) { + return { isOk: false, errorMsg: 'API key is required' } + } + + try { + await this.listVoices() + return { isOk: true, errorMsg: null } + } catch (error: unknown) { + const message = error instanceof Error ? error.message : String(error) + return { isOk: false, errorMsg: message } + } + } + + public async summaryTitles(messages: ChatMessage[], _modelId: string): Promise { + const text = this.extractLatestUserText(messages) + if (!text) return 'Voice AI' + return this.buildShortTitle(text) + } + + public async completions( + messages: ChatMessage[], + modelId: string, + temperature?: number, + _maxTokens?: number + ): Promise { + const text = this.extractLatestUserText(messages) + if (!text) { + throw new Error('No user text provided for Voice.ai TTS') + } + + await this.generateSpeech(text, modelId, temperature) + + return { + content: text + } + } + + public async summaries( + text: string, + modelId: string, + temperature?: number, + _maxTokens?: number + ): Promise { + if (!text) { + throw new Error('No text provided for Voice.ai TTS') + } + + await this.generateSpeech(text, modelId, temperature) + + return { + content: this.buildShortTitle(text) + } + } + + public async generateText( + prompt: string, + modelId: string, + temperature?: number, + _maxTokens?: number + ): Promise { + if (!prompt) { + throw new Error('No prompt provided for Voice.ai TTS') + } + + await this.generateSpeech(prompt, modelId, temperature) + + return { + content: prompt + } + } + + public async *coreStream( + messages: ChatMessage[], + modelId: string, + _modelConfig: ModelConfig, + temperature: number, + _maxTokens: number, + _mcpTools: MCPToolDefinition[] + ): AsyncGenerator { + const text = this.extractLatestUserText(messages) + if (!text) { + yield createStreamEvent.error('No user text provided for Voice.ai TTS') + yield createStreamEvent.stop('error') + return + } + + try { + const { audioBase64, mimeType } = await this.generateSpeech(text, modelId, temperature) + + yield createStreamEvent.imageData({ + data: audioBase64, + mimeType + }) + + yield createStreamEvent.stop('complete') + } catch (error: unknown) { + const message = error instanceof Error ? error.message : String(error) + yield createStreamEvent.error(message) + yield createStreamEvent.stop('error') + } + } + + protected async fetchProviderModels(): Promise { + if (!this.provider.apiKey) return [] + + try { + const voices = await this.listVoices() + const models: MODEL_META[] = voices.map((voice) => ({ + id: voice.voice_id, + name: voice.name && voice.name.trim().length > 0 ? voice.name : voice.voice_id, + group: 'default', + providerId: this.provider.id, + isCustom: false, + contextLength: 4096, + maxTokens: 2048 + })) + + const defaultVoice: MODEL_META = { + id: 'default', + name: 'Default Voice', + group: 'default', + providerId: this.provider.id, + isCustom: false, + contextLength: 4096, + maxTokens: 2048 + } + + return [defaultVoice, ...models] + } catch (error) { + console.error('[VoiceAI] Failed to fetch voices:', error) + return [] + } + } + + private getFetchOptions(): { dispatcher?: ProxyAgent } { + const proxyUrl = proxyConfig.getProxyUrl() + if (!proxyUrl) return {} + if (this.proxyUrl !== proxyUrl || !this.proxyAgent) { + this.proxyAgent = new ProxyAgent(proxyUrl) + this.proxyUrl = proxyUrl + } + return { dispatcher: this.proxyAgent } + } + + private getBaseUrl(): string { + const raw = this.provider.baseUrl?.trim() + if (raw && raw.length > 0) { + return raw.replace(/\/+$/, '') + } + return DEFAULT_BASE_URL + } + + private buildUrl(path: string): string { + const base = this.getBaseUrl() + const normalizedPath = path.startsWith('/') ? path : `/${path}` + return `${base}${normalizedPath}` + } + + private getAuthHeaders(): Record { + if (!this.provider.apiKey) { + throw new Error('API key is required') + } + + return { + Authorization: `Bearer ${this.provider.apiKey}`, + 'Content-Type': 'application/json', + ...this.defaultHeaders + } + } + + private getTtsConfig(): VoiceAITtsConfig { + const audioFormat = + this.configPresenter.getSetting('voiceAI_audioFormat') || DEFAULT_AUDIO_FORMAT + const model = this.configPresenter.getSetting('voiceAI_model') || DEFAULT_TTS_MODEL + const rawLanguage = this.configPresenter.getSetting('voiceAI_language') + const language = rawLanguage?.trim().toLowerCase() || DEFAULT_LANGUAGE + const temperatureSetting = this.configPresenter.getSetting('voiceAI_temperature') + const topPSetting = this.configPresenter.getSetting('voiceAI_topP') + + return { + audioFormat, + model, + language, + temperature: + typeof temperatureSetting === 'number' ? temperatureSetting : DEFAULT_TEMPERATURE, + topP: typeof topPSetting === 'number' ? topPSetting : DEFAULT_TOP_P + } + } + + private resolveVoiceId(modelId: string | undefined): string | null { + if (!modelId) return null + if (modelId === 'default') return null + return modelId + } + + private getAudioMimeType(format: string): string { + const key = format.toLowerCase() + return AUDIO_MIME_TYPE[key] || 'audio/mpeg' + } + + private parseDataUri(value: string): { mimeType: string; data: string } | null { + const match = value.match(/^data:([^;]+);base64,(.*)$/) + if (!match?.[1] || !match?.[2]) return null + return { mimeType: match[1], data: match[2] } + } + + private isHttpUrl(value: string): boolean { + return value.startsWith('http://') || value.startsWith('https://') + } + + private pickString(source: Record, keys: string[]): string | null { + for (const key of keys) { + const value = source[key] + if (typeof value === 'string' && value.trim().length > 0) { + return value + } + } + return null + } + + private async fetchAudioFromUrl( + url: string, + fallbackMimeType: string + ): Promise<{ audioBase64: string; mimeType: string }> { + const headers: Record = { ...this.defaultHeaders } + const baseUrl = this.getBaseUrl() + if (this.provider.apiKey && url.startsWith(baseUrl)) { + headers.Authorization = `Bearer ${this.provider.apiKey}` + } + + const response = await fetch(url, { + method: 'GET', + headers, + ...this.getFetchOptions() + }) + + if (!response.ok) { + const errorText = await response.text() + throw new Error(`Voice.ai audio fetch failed: ${response.status} ${errorText}`) + } + + const contentType = response.headers.get('content-type')?.split(';')[0]?.trim() + const mimeType = contentType && contentType.length > 0 ? contentType : fallbackMimeType + const buffer = Buffer.from(await response.arrayBuffer()) + return { audioBase64: buffer.toString('base64'), mimeType } + } + + private async resolveAudioValue( + value: string, + fallbackMimeType: string + ): Promise<{ audioBase64: string; mimeType: string } | null> { + const trimmed = value.trim() + if (!trimmed) return null + + const dataUri = this.parseDataUri(trimmed) + if (dataUri) { + return { audioBase64: dataUri.data, mimeType: dataUri.mimeType } + } + + if (this.isHttpUrl(trimmed)) { + return await this.fetchAudioFromUrl(trimmed, fallbackMimeType) + } + + return { audioBase64: trimmed, mimeType: fallbackMimeType } + } + + private async resolveAudioFromJson( + payload: unknown, + fallbackMimeType: string + ): Promise<{ audioBase64: string; mimeType: string } | null> { + if (!payload || typeof payload !== 'object') return null + + const data = payload as Record + const rootMimeType = + this.pickString(data, ['mime_type', 'content_type', 'contentType']) || fallbackMimeType + + const audioField = data.audio + if (audioField && typeof audioField === 'object') { + const audioData = audioField as Record + const audioMimeType = + this.pickString(audioData, ['mime_type', 'content_type', 'contentType']) || rootMimeType + const audioValue = + this.pickString(audioData, ['base64', 'data', 'audio_base64', 'audioBase64', 'audio']) || + this.pickString(audioData, ['url', 'audio_url', 'audioUrl']) + if (audioValue) { + return await this.resolveAudioValue(audioValue, audioMimeType) + } + } + + const directAudioValue = + this.pickString(data, ['audio_base64', 'audioBase64', 'audio', 'data']) || + this.pickString(data, ['audio_url', 'audioUrl', 'url']) + if (directAudioValue) { + return await this.resolveAudioValue(directAudioValue, rootMimeType) + } + + return null + } + + private async listVoices(): Promise { + const response = await fetch(this.buildUrl('/api/v1/tts/voices'), { + method: 'GET', + headers: this.getAuthHeaders(), + ...this.getFetchOptions() + }) + + if (!response.ok) { + const errorText = await response.text() + throw new Error(`Voice.ai list voices failed: ${response.status} ${errorText}`) + } + + const data = await response.json() + if (!Array.isArray(data)) return [] + return data as VoiceStatusResponse[] + } + + private async generateSpeech( + text: string, + modelId: string, + temperature?: number + ): Promise<{ audioBase64: string; mimeType: string }> { + const config = this.getTtsConfig() + if (!SUPPORTED_LANGUAGES.has(config.language)) { + throw new Error( + `Unsupported language code: ${config.language}. Supported languages: ${Array.from( + SUPPORTED_LANGUAGES + ).join(', ')}` + ) + } + const voiceId = this.resolveVoiceId(modelId) + const requestBody: Record = { + text, + audio_format: config.audioFormat, + model: config.model, + language: config.language, + temperature: typeof temperature === 'number' ? temperature : config.temperature, + top_p: config.topP + } + + if (voiceId) { + requestBody['voice_id'] = voiceId + } + + const response = await fetch(this.buildUrl('/api/v1/tts/speech'), { + method: 'POST', + headers: this.getAuthHeaders(), + body: JSON.stringify(requestBody), + ...this.getFetchOptions() + }) + + if (!response.ok) { + const errorText = await response.text() + throw new Error(`Voice.ai generate speech failed: ${response.status} ${errorText}`) + } + + const contentType = response.headers.get('content-type')?.split(';')[0]?.trim() + const fallbackMimeType = this.getAudioMimeType(config.audioFormat) + + if (contentType?.includes('application/json')) { + const json = await response.json() + const resolved = await this.resolveAudioFromJson(json, fallbackMimeType) + if (!resolved) { + throw new Error('Voice.ai generate speech returned JSON without audio data') + } + return resolved + } + + const mimeType = contentType && contentType.length > 0 ? contentType : fallbackMimeType + const buffer = Buffer.from(await response.arrayBuffer()) + return { audioBase64: buffer.toString('base64'), mimeType } + } + + private extractLatestUserText(messages: ChatMessage[]): string | null { + const lastUser = [...messages].reverse().find((message) => message.role === 'user') + if (!lastUser?.content) return null + + if (typeof lastUser.content === 'string') { + return lastUser.content + } + + if (Array.isArray(lastUser.content)) { + const textParts = lastUser.content + .filter((part) => part.type === 'text') + .map((part) => part.text) + .filter(Boolean) + + return textParts.length > 0 ? textParts.join('\n') : null + } + + return null + } + + private buildShortTitle(text: string): string { + const normalized = text.replace(/\s+/g, ' ').trim() + if (!normalized) return 'Voice AI' + return normalized.length > 32 ? `${normalized.slice(0, 32)}…` : normalized + } +} diff --git a/src/main/presenter/mcpPresenter/inMemoryServers/appleServer.ts b/src/main/presenter/mcpPresenter/inMemoryServers/appleServer.ts index 68d7aa4da..6a1675ba1 100644 --- a/src/main/presenter/mcpPresenter/inMemoryServers/appleServer.ts +++ b/src/main/presenter/mcpPresenter/inMemoryServers/appleServer.ts @@ -1569,11 +1569,16 @@ export class AppleServer { if (!parsedArgs.query) { throw new Error('Query is required for search operation') } + await runAppleScript(` + open location "maps://?q=${encodeURIComponent(parsedArgs.query)}" + delay 0.3 + tell application "Maps" to activate + `) return { content: [ { type: 'text' as const, - text: `Found locations for "${parsedArgs.query}":\n\n• Location 1: ${parsedArgs.query} Restaurant\n• Location 2: ${parsedArgs.query} Store\n• Location 3: ${parsedArgs.query} Center` + text: `Search for "${parsedArgs.query}" has been launched in Apple Maps app. Please check the Maps window for results.` } ] } diff --git a/src/main/presenter/mcpPresenter/index.ts b/src/main/presenter/mcpPresenter/index.ts index 19f6a0707..105b86662 100644 --- a/src/main/presenter/mcpPresenter/index.ts +++ b/src/main/presenter/mcpPresenter/index.ts @@ -645,6 +645,7 @@ export class McpPresenter implements IMCPPresenter { toolDefinition: MCPToolDefinition, serverName: string ): MCPTool { + const toolParameters = toolDefinition.function.parameters const mcpTool = { id: toolDefinition.function.name, name: toolDefinition.function.name, @@ -652,12 +653,9 @@ export class McpPresenter implements IMCPPresenter { description: toolDefinition.function.description, serverName, inputSchema: { - properties: toolDefinition.function.parameters.properties as Record< - string, - Record - >, - type: toolDefinition.function.parameters.type, - required: toolDefinition.function.parameters.required + properties: (toolParameters?.properties ?? {}) as Record>, + type: toolParameters?.type ?? 'object', + required: toolParameters?.required ?? [] } } as MCPTool return mcpTool @@ -676,7 +674,7 @@ export class McpPresenter implements IMCPPresenter { '$def' ] - const properties = tool.inputSchema.properties + const properties = tool.inputSchema.properties ?? {} // Recursive cleanup function to ensure all values are serializable const cleanValue = (value: unknown): unknown => { diff --git a/src/main/presenter/permission/index.ts b/src/main/presenter/permission/index.ts index ca6af98dc..3bbc8956c 100644 --- a/src/main/presenter/permission/index.ts +++ b/src/main/presenter/permission/index.ts @@ -1,6 +1,7 @@ export { CommandPermissionService } from './commandPermissionService' export { CommandPermissionCache } from './commandPermissionCache' export { FilePermissionService, FilePermissionRequiredError } from './filePermissionService' +export { SettingsPermissionService } from './settingsPermissionService' export type { CommandRiskLevel, CommandPermissionCheckResult, diff --git a/src/main/presenter/permission/settingsPermissionService.ts b/src/main/presenter/permission/settingsPermissionService.ts new file mode 100644 index 000000000..c3da6b189 --- /dev/null +++ b/src/main/presenter/permission/settingsPermissionService.ts @@ -0,0 +1,53 @@ +export class SettingsPermissionService { + private readonly sessionApprovals = new Map>() + private readonly oneTimeApprovals = new Map>() + + approve(conversationId: string, toolName: string, remember: boolean): void { + if (!conversationId) return + const normalized = toolName.trim() + if (!normalized) return + + if (remember) { + const existing = this.sessionApprovals.get(conversationId) ?? new Set() + existing.add(normalized) + this.sessionApprovals.set(conversationId, existing) + return + } + + const existing = this.oneTimeApprovals.get(conversationId) ?? new Set() + existing.add(normalized) + this.oneTimeApprovals.set(conversationId, existing) + } + + consumeApproval(conversationId: string, toolName: string): boolean { + if (!conversationId) return false + const normalized = toolName.trim() + if (!normalized) return false + + const session = this.sessionApprovals.get(conversationId) + if (session?.has(normalized)) { + return true + } + + const oneTime = this.oneTimeApprovals.get(conversationId) + if (!oneTime?.has(normalized)) { + return false + } + + oneTime.delete(normalized) + if (oneTime.size === 0) { + this.oneTimeApprovals.delete(conversationId) + } + return true + } + + clearConversation(conversationId: string): void { + this.sessionApprovals.delete(conversationId) + this.oneTimeApprovals.delete(conversationId) + } + + clearAll(): void { + this.sessionApprovals.clear() + this.oneTimeApprovals.clear() + } +} diff --git a/src/main/presenter/sessionPresenter/index.ts b/src/main/presenter/sessionPresenter/index.ts index 1671f0178..eb9a785d8 100644 --- a/src/main/presenter/sessionPresenter/index.ts +++ b/src/main/presenter/sessionPresenter/index.ts @@ -64,6 +64,7 @@ export class SessionPresenter implements ISessionPresenter { if (activeConversationId) { this.commandPermissionService.clearConversation(activeConversationId) presenter.filePermissionService?.clearConversation(activeConversationId) + presenter.settingsPermissionService?.clearConversation(activeConversationId) this.clearActiveConversation(tabId, { notify: true }) console.log(`SessionPresenter: Cleaned up conversation binding for closed tab ${tabId}.`) } @@ -164,6 +165,10 @@ export class SessionPresenter implements ISessionPresenter { return this.messageManager.getLastUserMessage(sessionId) } + async getLastAssistantMessage(sessionId: string): Promise { + return this.messageManager.getLastAssistantMessage(sessionId) + } + async forkSession( targetSessionId: string, targetMessageId: string, @@ -285,6 +290,7 @@ export class SessionPresenter implements ISessionPresenter { if (conversationId) { this.commandPermissionService.clearConversation(conversationId) presenter.filePermissionService?.clearConversation(conversationId) + presenter.settingsPermissionService?.clearConversation(conversationId) } this.conversationManager.clearActiveConversation(tabId, options) } @@ -292,6 +298,7 @@ export class SessionPresenter implements ISessionPresenter { clearConversationBindings(conversationId: string): void { this.commandPermissionService.clearConversation(conversationId) presenter.filePermissionService?.clearConversation(conversationId) + presenter.settingsPermissionService?.clearConversation(conversationId) this.conversationManager.clearConversationBindings(conversationId) } @@ -299,10 +306,12 @@ export class SessionPresenter implements ISessionPresenter { if (conversationId) { this.commandPermissionService.clearConversation(conversationId) presenter.filePermissionService?.clearConversation(conversationId) + presenter.settingsPermissionService?.clearConversation(conversationId) return } this.commandPermissionService.clearAll() presenter.filePermissionService?.clearAll() + presenter.settingsPermissionService?.clearAll() } async setActiveConversation(conversationId: string, tabId: number): Promise { @@ -434,6 +443,7 @@ export class SessionPresenter implements ISessionPresenter { async deleteConversation(conversationId: string): Promise { this.commandPermissionService.clearConversation(conversationId) presenter.filePermissionService?.clearConversation(conversationId) + presenter.settingsPermissionService?.clearConversation(conversationId) await this.deleteSessionOffloadFiles(conversationId) await this.conversationManager.deleteConversation(conversationId) } diff --git a/src/main/presenter/sessionPresenter/managers/messageManager.ts b/src/main/presenter/sessionPresenter/managers/messageManager.ts index 81e47d99f..c1b5001c2 100644 --- a/src/main/presenter/sessionPresenter/managers/messageManager.ts +++ b/src/main/presenter/sessionPresenter/managers/messageManager.ts @@ -354,6 +354,14 @@ export class MessageManager implements IMessageManager { return this.convertToMessage(sqliteMessage) } + async getLastAssistantMessage(conversationId: string): Promise { + const sqliteMessage = await this.sqlitePresenter.getLastAssistantMessage(conversationId) + if (!sqliteMessage) { + return null + } + return this.convertToMessage(sqliteMessage) + } + async clearAllMessages(conversationId: string): Promise { await this.sqlitePresenter.deleteAllMessagesInConversation(conversationId) } diff --git a/src/main/presenter/sessionPresenter/types.ts b/src/main/presenter/sessionPresenter/types.ts index be36a0b9d..0873dcb1d 100644 --- a/src/main/presenter/sessionPresenter/types.ts +++ b/src/main/presenter/sessionPresenter/types.ts @@ -1,4 +1,10 @@ -export type SessionStatus = 'idle' | 'generating' | 'paused' | 'waiting_permission' | 'error' +export type SessionStatus = + | 'idle' + | 'generating' + | 'paused' + | 'waiting_permission' + | 'waiting_question' + | 'error' export type SessionConfig = { sessionId: string diff --git a/src/main/presenter/sqlitePresenter/index.ts b/src/main/presenter/sqlitePresenter/index.ts index af867f0ff..d0619c2d8 100644 --- a/src/main/presenter/sqlitePresenter/index.ts +++ b/src/main/presenter/sqlitePresenter/index.ts @@ -397,6 +397,10 @@ export class SQLitePresenter implements ISQLitePresenter { return this.messagesTable.getLastUserMessage(conversationId) } + public async getLastAssistantMessage(conversationId: string): Promise { + return this.messagesTable.getLastAssistantMessage(conversationId) + } + public async getMainMessageByParentId( conversationId: string, parentId: string diff --git a/src/main/presenter/sqlitePresenter/tables/messages.ts b/src/main/presenter/sqlitePresenter/tables/messages.ts index 69120f20a..764148da2 100644 --- a/src/main/presenter/sqlitePresenter/tables/messages.ts +++ b/src/main/presenter/sqlitePresenter/tables/messages.ts @@ -270,6 +270,32 @@ export class MessagesTable extends BaseTable { .get(conversationId) as SQLITE_MESSAGE | null } + async getLastAssistantMessage(conversationId: string): Promise { + return this.db + .prepare( + ` + SELECT + msg_id as id, + conversation_id, + parent_id, + content, + role, + created_at, + order_seq, + token_count, + status, + metadata, + is_context_edge, + is_variant + FROM messages + WHERE conversation_id = ? AND role = 'assistant' AND is_variant = 0 + ORDER BY created_at DESC, order_seq DESC + LIMIT 1 + ` + ) + .get(conversationId) as SQLITE_MESSAGE | null + } + async getMainMessageByParentId( conversationId: string, parentId: string diff --git a/src/main/presenter/toolPresenter/index.ts b/src/main/presenter/toolPresenter/index.ts index b5d65c8e4..1a0c2b8af 100644 --- a/src/main/presenter/toolPresenter/index.ts +++ b/src/main/presenter/toolPresenter/index.ts @@ -7,6 +7,7 @@ import type { MCPToolResponse } from '@shared/presenter' import { resolveToolOffloadTemplatePath } from '../sessionPresenter/sessionPaths' +import { QUESTION_TOOL_NAME } from '../agentPresenter/tools/questionTool' import { ToolMapper } from './toolMapper' import { AgentToolManager, type AgentToolCallResult } from '../agentPresenter/acp' import { jsonrepair } from 'jsonrepair' @@ -18,6 +19,7 @@ export interface IToolPresenter { chatMode?: 'chat' | 'agent' | 'acp agent' supportsVision?: boolean agentWorkspacePath?: string | null + conversationId?: string }): Promise callTool(request: MCPToolCall): Promise<{ content: unknown; rawData: MCPToolResponse }> buildToolSystemPrompt(context: { conversationId?: string }): string @@ -53,6 +55,7 @@ export class ToolPresenter implements IToolPresenter { chatMode?: 'chat' | 'agent' | 'acp agent' supportsVision?: boolean agentWorkspacePath?: string | null + conversationId?: string }): Promise { const defs: MCPToolDefinition[] = [] this.mapper.clear() @@ -71,7 +74,6 @@ export class ToolPresenter implements IToolPresenter { // Initialize or update AgentToolManager if workspace path changed if (!this.agentToolManager) { this.agentToolManager = new AgentToolManager({ - yoBrowserPresenter: this.options.yoBrowserPresenter, agentWorkspacePath, configPresenter: this.options.configPresenter, commandPermissionHandler: this.options.commandPermissionHandler @@ -82,7 +84,8 @@ export class ToolPresenter implements IToolPresenter { const agentDefs = await this.agentToolManager.getAllToolDefinitions({ chatMode, supportsVision, - agentWorkspacePath + agentWorkspacePath, + conversationId: context.conversationId }) const filteredAgentDefs = agentDefs.filter((tool) => { if (!this.mapper.hasTool(tool.function.name)) return true @@ -170,7 +173,8 @@ export class ToolPresenter implements IToolPresenter { return [ 'Tool outputs may be offloaded when large.', `When you see an offload stub, read the full output from: ${offloadPath}`, - 'Use file tools to read that path. Access is limited to the current conversation session.' + 'Use file tools to read that path. Access is limited to the current conversation session.', + `If you need user confirmation or choices, ask with the ${QUESTION_TOOL_NAME} tool.` ].join('\n') } } diff --git a/src/renderer/settings/App.vue b/src/renderer/settings/App.vue index db26c0eb1..26de3fa75 100644 --- a/src/renderer/settings/App.vue +++ b/src/renderer/settings/App.vue @@ -65,7 +65,7 @@ import ModelCheckDialog from '@/components/settings/ModelCheckDialog.vue' import { useDeviceVersion } from '../src/composables/useDeviceVersion' import { Toaster } from '@shadcn/components/ui/sonner' import 'vue-sonner/style.css' -import { NOTIFICATION_EVENTS } from '@/events' +import { NOTIFICATION_EVENTS, SETTINGS_EVENTS } from '@/events' import { useToast } from '@/components/use-toast' import { useThemeStore } from '@/stores/theme' import { useProviderStore } from '@/stores/providerStore' @@ -113,6 +113,21 @@ const { t, locale } = useI18n() const router = useRouter() const route = useRoute() const title = useTitle() +const handleSettingsNavigate = async ( + _event: unknown, + payload?: { routeName?: string; section?: string } +) => { + const routeName = payload?.routeName + if (!routeName || !router.hasRoute(routeName)) return + await router.isReady() + if (router.currentRoute.value.name !== routeName) { + await router.push({ name: routeName }) + } +} + +if (window?.electron?.ipcRenderer) { + window.electron.ipcRenderer.on(SETTINGS_EVENTS.NAVIGATE, handleSettingsNavigate) +} const settings: Ref< { title: string @@ -333,6 +348,7 @@ onBeforeUnmount(() => { } window.electron.ipcRenderer.removeAllListeners(NOTIFICATION_EVENTS.SHOW_ERROR) + window.electron.ipcRenderer.removeListener(SETTINGS_EVENTS.NAVIGATE, handleSettingsNavigate) cleanupMcpDeeplink() }) diff --git a/src/renderer/settings/components/AnthropicProviderSettingsDetail.vue b/src/renderer/settings/components/AnthropicProviderSettingsDetail.vue index 4c5f2fb8f..b6a9671bc 100644 --- a/src/renderer/settings/components/AnthropicProviderSettingsDetail.vue +++ b/src/renderer/settings/components/AnthropicProviderSettingsDetail.vue @@ -54,11 +54,34 @@ @keyup.enter="handleApiHostChange(apiHost)" />
- {{ - t('settings.provider.urlFormat', { - defaultUrl: 'https://api.anthropic.com' - }) - }} + + + + + + + {{ t('settings.provider.urlFormatFill') }} + + + + + {{ + t('settings.provider.urlFormat', { + defaultUrl: defaultBaseUrl + }) + }} +