|
| 1 | +import type { Page } from "playwright-core"; |
1 | 2 | import z from "zod"; |
2 | 3 | import type { Tool } from "."; |
3 | 4 | import { createAgentLogUpdate } from "../utils/agent-log"; |
@@ -67,7 +68,26 @@ export const parseComputerToolArgs = (args: string | object) => { |
67 | 68 | if (actionString.includes("wait")) { |
68 | 69 | return { schema: waitActionSchema, args: parsedArgs }; |
69 | 70 | } |
70 | | - return null; |
| 71 | + |
| 72 | + // Handle screenshot action specifically - screenshots should be automatic, not manual |
| 73 | + if (actionString.includes("screenshot")) { |
| 74 | + console.warn( |
| 75 | + "[parseComputerToolArgs] LLM requested screenshot action - screenshots should be automatic", |
| 76 | + ); |
| 77 | + return { |
| 78 | + schema: waitActionSchema, |
| 79 | + args: { ...parsedArgs, action: { type: "wait", duration: 0 } }, |
| 80 | + }; |
| 81 | + } |
| 82 | + |
| 83 | + // Fallback for any other unrecognized actions - don't return null |
| 84 | + console.warn( |
| 85 | + `[parseComputerToolArgs] Unrecognized computer action: ${actionString}`, |
| 86 | + ); |
| 87 | + return { |
| 88 | + schema: waitActionSchema, |
| 89 | + args: { ...parsedArgs, action: { type: "wait", duration: 0 } }, |
| 90 | + }; |
71 | 91 | }; |
72 | 92 |
|
73 | 93 | const clickActionSchema = z |
@@ -344,6 +364,34 @@ export function createComputerTool<T, R>({ |
344 | 364 | }, |
345 | 365 | execute: async (args, context) => { |
346 | 366 | const result = await computerProvider.performAction(args.action, context); |
| 367 | + |
| 368 | + // Smart delay with network idle support |
| 369 | + if (args.action.type === "click" || args.action.type === "double_click") { |
| 370 | + try { |
| 371 | + const instance = await computerProvider.getInstance( |
| 372 | + context.sessionId, |
| 373 | + ); |
| 374 | + const page = (instance as { page?: Page })?.page; |
| 375 | + |
| 376 | + if (page?.waitForLoadState) { |
| 377 | + await page.waitForLoadState("networkidle", { timeout: 1500 }); |
| 378 | + } else { |
| 379 | + await new Promise((resolve) => setTimeout(resolve, 1500)); |
| 380 | + } |
| 381 | + } catch { |
| 382 | + await new Promise((resolve) => setTimeout(resolve, 1500)); |
| 383 | + } |
| 384 | + } else { |
| 385 | + // Other action types get fixed delays |
| 386 | + const delay = |
| 387 | + args.action.type === "type" |
| 388 | + ? 300 |
| 389 | + : args.action.type === "scroll" |
| 390 | + ? 800 |
| 391 | + : 500; |
| 392 | + await new Promise((resolve) => setTimeout(resolve, delay)); |
| 393 | + } |
| 394 | + |
347 | 395 | const screenshot = await computerProvider.takeScreenshot( |
348 | 396 | context.sessionId, |
349 | 397 | ); |
|
0 commit comments