diff --git a/src/agents/planner-executor/extraction-keywords.ts b/src/agents/planner-executor/extraction-keywords.ts
index 77fbb13..b641cd3 100644
--- a/src/agents/planner-executor/extraction-keywords.ts
+++ b/src/agents/planner-executor/extraction-keywords.ts
@@ -282,12 +282,62 @@ export function isTextExtractionTask(task: string): boolean {
* @param extractQuery - What to extract
* @returns Tuple of [systemPrompt, userPrompt]
*/
+function looksLikeJson(text: string): boolean {
+ let trimmed = text.trim();
+ const fenceMatch = trimmed.match(/^`{1,4}\s*\n?([\s\S]*?)(?:\n?`{1,4}\s*)?$/);
+ if (fenceMatch) {
+ trimmed = fenceMatch[1].trim();
+ }
+ return (
+ trimmed.startsWith('{') ||
+ trimmed.startsWith('[') ||
+ trimmed.startsWith('callback(') ||
+ trimmed.startsWith('jsonp(')
+ );
+}
+
+function stripJsonp(text: string): string {
+ let trimmed = text.trim();
+ const fenceMatch = trimmed.match(/^`{1,4}\s*\n?([\s\S]*?)(?:\n?`{1,4}\s*)?$/);
+ if (fenceMatch) {
+ trimmed = fenceMatch[1].trim();
+ }
+ const m = trimmed.match(/^(?:callback|jsonp)\s*\(\s*([\s\S]*)\s*\)\s*;?\s*$/);
+ return m ? m[1].trim() : trimmed;
+}
+
export function buildExtractionPrompt(pageContent: string, extractQuery: string): [string, string] {
- // NOTE: /no_think MUST be at the START of user message for Qwen3 models.
- // Without it, Qwen3 puts the answer in tags and content is empty.
- const system = `You extract specific text from page content. Return only the extracted text. Do NOT output any thinking, reasoning, or explanation.`;
+ const isJson = looksLikeJson(pageContent);
+ const contentForPrompt = isJson ? stripJsonp(pageContent) : pageContent;
+
+ let system: string;
+ let user: string;
+
+ if (isJson) {
+ system = `You extract data from JSON content. Return ONLY the extracted data as readable text. Do NOT output any thinking, reasoning, or explanation.`;
- const user = `/no_think
+ user = `/no_think
+You are a data extraction assistant. The page content below is JSON data from a search API response. Parse the JSON and extract the specific information requested.
+
+JSON CONTENT:
+${contentForPrompt}
+
+EXTRACTION REQUEST:
+${extractQuery}
+
+INSTRUCTIONS:
+1. Parse the JSON structure carefully
+2. Look for the requested fields in the JSON objects (e.g., in "docs", "results", "items", "response.docs", or similar arrays)
+3. If the requested fields exist, extract and format them as readable text
+4. If the request asks for first/top/last N items, return exactly N matching items when available and do not include extra rows
+5. If the JSON does NOT contain the requested fields, list what fields ARE available and return "NOT_FOUND: available fields: "
+6. Return ONLY the extracted text or the NOT_FOUND message
+
+EXTRACTED TEXT:`;
+ } else {
+ system = `You extract specific text from page content. Return only the extracted text. Do NOT output any thinking, reasoning, or explanation.`;
+
+ user = `/no_think
You are a text extraction assistant. Given the page content below, extract the specific information requested.
PAGE CONTENT:
@@ -299,10 +349,12 @@ ${extractQuery}
INSTRUCTIONS:
1. Read the content carefully
2. Find and extract ONLY the specific information requested
-3. Return ONLY the extracted text, nothing else
-4. If the information is not found, return "NOT_FOUND"
+3. If the request asks for first/top/last N items, return exactly N matching items when available and do not include extra rows
+4. Return ONLY the extracted text, nothing else
+5. If the information is not found, return "NOT_FOUND"
EXTRACTED TEXT:`;
+ }
return [system, user];
}
@@ -351,6 +403,8 @@ If the task asks to COUNT items (e.g., "how many listings", "number of results",
- Set "countTarget" to describe what to count (e.g., "listings", "products", "articles")
- The system will scroll through the entire page and sum up counts
- Do NOT use EXTRACT for counting tasks — EXTRACT only sees the current viewport
+- Do NOT use SCROLL_AND_COUNT for "first N", "top N", "latest N", or "oldest N" requests.
+ Those ask for a list of records, not a total count. Use EXTRACT for those.
Example - count all listings:
Goal: "note how many listings are available"
diff --git a/src/agents/planner-executor/plan-models.ts b/src/agents/planner-executor/plan-models.ts
index b9209fd..36667c2 100644
--- a/src/agents/planner-executor/plan-models.ts
+++ b/src/agents/planner-executor/plan-models.ts
@@ -44,6 +44,7 @@ export const ActionType = z.enum([
'CLICK',
'TYPE',
'TYPE_AND_SUBMIT',
+ 'FILL_FORM',
'SCROLL',
'SCROLL_AND_COUNT',
'PRESS',
@@ -65,6 +66,8 @@ export interface PlanStep {
target?: string;
intent?: string;
input?: string;
+ fields?: Array<{ label: string; value: string }>;
+ submitText?: string;
verify: PredicateSpec[];
required: boolean;
stopIfTrue: boolean;
@@ -91,7 +94,7 @@ export const PlanStepSchema = z.lazy(() =>
id: z.number().optional().describe('Step ID (1-indexed, contiguous)'),
goal: z.string().optional().describe('Human-readable goal for this step'),
action: ActionType.describe(
- 'Action type: NAVIGATE, CLICK, TYPE, TYPE_AND_SUBMIT, SCROLL, SCROLL_AND_COUNT, PRESS, WAIT, EXTRACT, STUCK, DONE'
+ 'Action type: NAVIGATE, CLICK, TYPE, TYPE_AND_SUBMIT, FILL_FORM, SCROLL, SCROLL_AND_COUNT, PRESS, WAIT, EXTRACT, STUCK, DONE'
),
target: z
.union([z.string(), z.record(z.string(), z.unknown())])
@@ -99,6 +102,11 @@ export const PlanStepSchema = z.lazy(() =>
.describe('URL for NAVIGATE action'),
intent: z.string().optional().describe('Intent hint for CLICK action'),
input: z.string().optional().describe('Text for TYPE_AND_SUBMIT action'),
+ fields: z
+ .array(z.object({ label: z.string(), value: z.string() }))
+ .optional()
+ .describe('Fields for FILL_FORM action'),
+ submitText: z.string().optional().describe('Submit button text for FILL_FORM action'),
verify: z.array(PredicateSpecSchema).default([]).describe('Verification predicates'),
required: z.boolean().default(true).describe('If True, step failure triggers replan'),
stopIfTrue: z
diff --git a/src/agents/planner-executor/planner-executor-agent.ts b/src/agents/planner-executor/planner-executor-agent.ts
index a0157ab..560a9e7 100644
--- a/src/agents/planner-executor/planner-executor-agent.ts
+++ b/src/agents/planner-executor/planner-executor-agent.ts
@@ -433,6 +433,9 @@ export interface AgentRuntime {
/** Click an element by ID */
click(elementId: number): Promise;
+ /** Select an option on a native select/dropdown control */
+ selectOption?(elementId: number, value: string): Promise;
+
/** Click at viewport coordinates (vision fallback) */
clickCoordinate?(x: number, y: number): Promise;
@@ -1092,6 +1095,17 @@ export class PlannerExecutorAgent {
}
plannerAction = this.promoteVisibleResultClick(task, ctx, plannerAction);
+ if (plannerAction.action === 'SCROLL_AND_COUNT' && !isCountingTask(task)) {
+ plannerAction = {
+ action: 'EXTRACT',
+ goal: task,
+ target: task,
+ intent: task,
+ verify: [],
+ reasoning:
+ 'SCROLL_AND_COUNT is only valid for total counting tasks; this request asks to extract listed records.',
+ };
+ }
this.composableHeuristics.setStepHints(plannerAction.heuristicHints || []);
this.emitPlannerAction(stepNum, plannerAction, plannerActionSource);
@@ -1635,6 +1649,17 @@ export class PlannerExecutorAgent {
}
plannerAction = this.promoteVisibleResultClick(task, ctx, plannerAction);
+ if (plannerAction.action === 'SCROLL_AND_COUNT' && !isCountingTask(task)) {
+ plannerAction = {
+ action: 'EXTRACT',
+ goal: task,
+ target: task,
+ intent: task,
+ verify: [],
+ reasoning:
+ 'SCROLL_AND_COUNT is only valid for total counting tasks; this request asks to extract listed records.',
+ };
+ }
this.composableHeuristics.setStepHints(plannerAction.heuristicHints || []);
this.emitPlannerAction(stepNum, plannerAction, plannerActionSource);
@@ -2043,9 +2068,12 @@ export class PlannerExecutorAgent {
}
private extractSearchQueryFromTask(task: string): string | null {
- const directPattern = /search\s+for\s+(.+?)(?:\s+on\s+|,|then|$)/i;
+ const boundary =
+ /(?:\s+on\s+|,|\s+then\b|\s+and\s+(?:output|extract|capture|return|list|show|give)\b|$)/i;
+ const directPattern = new RegExp(`search\\s+for\\s+(.+?)${boundary.source}`, 'i');
const directMatch = task.match(directPattern);
- const raw = directMatch?.[1] ?? task.match(/search\s+(.+?)(?:\s+on\s+|,|then|$)/i)?.[1] ?? null;
+ const fallbackPattern = new RegExp(`search\\s+(.+?)${boundary.source}`, 'i');
+ const raw = directMatch?.[1] ?? task.match(fallbackPattern)?.[1] ?? null;
if (!raw) {
return null;
}
@@ -2097,6 +2125,7 @@ export class PlannerExecutorAgent {
(plannerAction.verify?.length || 0) > 0 &&
plannerAction.action !== 'TYPE_AND_SUBMIT' &&
plannerAction.action !== 'TYPE' &&
+ plannerAction.action !== 'FILL_FORM' &&
!this.isFormDataEntryPlannerAction(plannerAction) &&
!this.isForwardNavigationPlannerAction(plannerAction)
) {
@@ -2129,7 +2158,9 @@ export class PlannerExecutorAgent {
};
}
- if (this.isCopiedPlaceholderNavigation(plannerAction.target, currentUrl, task)) {
+ const navigationTarget = plannerAction.target;
+
+ if (this.isCopiedPlaceholderNavigation(navigationTarget, currentUrl, task)) {
return {
stepId: stepNum,
goal: stepGoal,
@@ -2144,14 +2175,14 @@ export class PlannerExecutorAgent {
}
try {
- await runtime.goto(plannerAction.target);
+ await runtime.goto(navigationTarget);
const verificationPassed = await this.verifyStepOutcome(runtime, plannerAction);
const urlAfter = await runtime.getCurrentUrl();
return {
stepId: stepNum,
goal: stepGoal,
status: verificationPassed ? StepStatus.SUCCESS : StepStatus.FAILED,
- actionTaken: `NAVIGATE(${plannerAction.target})`,
+ actionTaken: `NAVIGATE(${navigationTarget})`,
verificationPassed,
usedVision: false,
durationMs: Date.now() - stepStart,
@@ -2227,13 +2258,149 @@ export class PlannerExecutorAgent {
}
if (plannerAction.action === 'FILL_FORM') {
- const fields = plannerAction.fields || [];
+ let fields = plannerAction.fields || [];
const submitText = plannerAction.submitText || '';
- const elements = ctx.snapshot?.elements || [];
+ let elements = ctx.snapshot?.elements || [];
const actions: string[] = [];
let filledCount = 0;
const inputRoles = ['textbox', 'searchbox', 'combobox', 'input', 'password'];
+ const alreadyUsed = new Set();
+
+ const uniqueValues = new Set(fields.map(f => f.value));
+ const duplicateSingleValue = fields.length > 1 && uniqueValues.size === 1;
+ const searchQueryValue = this.getFillFormPrimarySearchValue(task, fields);
+ if (searchQueryValue) {
+ const searchFilterFields = fields.filter(
+ field =>
+ this.normalizeSearchQueryText(field.value || '') !==
+ this.normalizeSearchQueryText(searchQueryValue)
+ );
+ let primarySearchInput = this.findPrimarySearchInput(elements);
+ if (!primarySearchInput) {
+ const expandedCtx = await this.snapshotWithEscalation(runtime, task, {
+ action: plannerAction.action,
+ intent: 'primary search input',
+ relaxPruning: true,
+ });
+ elements = expandedCtx.snapshot?.elements || elements;
+ primarySearchInput = this.findPrimarySearchInput(elements);
+ }
+ if (primarySearchInput) {
+ const preUrl = await runtime.getCurrentUrl();
+ await runtime.type(primarySearchInput.id, searchQueryValue);
+ actions.push(`TYPE(${primarySearchInput.id}, "${searchQueryValue}")`);
+ alreadyUsed.add(primarySearchInput.id);
+
+ const unmatchedSearchFilterFields: string[] = [];
+ for (const field of searchFilterFields) {
+ const filterControl = this.findFillFormFilterControl(elements, field, alreadyUsed);
+ if (filterControl) {
+ alreadyUsed.add(filterControl.id);
+ const filterRole = (filterControl.role || '').toLowerCase();
+ if ((filterRole === 'select' || filterRole === 'combobox') && runtime.selectOption) {
+ await runtime.selectOption(filterControl.id, field.value);
+ actions.push(`SELECT(${filterControl.id}, ${field.value})`);
+ } else {
+ await runtime.click(filterControl.id);
+ actions.push(`CLICK(${filterControl.id}, ${field.value})`);
+ }
+ } else {
+ unmatchedSearchFilterFields.push(field.label || field.value || 'unknown');
+ }
+ }
+
+ if (unmatchedSearchFilterFields.length > 0) {
+ actions.push(...unmatchedSearchFilterFields.map(label => `UNMATCHED(${label})`));
+ return {
+ stepId: stepNum,
+ goal: stepGoal,
+ status: StepStatus.FAILED,
+ actionTaken: `FILL_FORM(${actions.join(' -> ')})`,
+ verificationPassed: false,
+ usedVision: false,
+ durationMs: Date.now() - stepStart,
+ urlBefore: currentUrl,
+ urlAfter: await runtime.getCurrentUrl(),
+ error: `Search form filters were not applied: ${unmatchedSearchFilterFields.join(', ')}`,
+ };
+ }
+
+ const hasSearchFilters = searchFilterFields.length > 0;
+ let submitEl = submitText
+ ? this.findSubmitButtonByText(elements, submitText)
+ : this.findSubmitButton(elements, primarySearchInput.id, true);
+ if (submitEl === null && hasSearchFilters) {
+ const expandedSubmitCtx = await this.snapshotWithEscalation(runtime, task, {
+ action: plannerAction.action,
+ intent: submitText || 'search submit button',
+ relaxPruning: true,
+ });
+ elements = expandedSubmitCtx.snapshot?.elements || elements;
+ submitEl = submitText
+ ? this.findSubmitButtonByText(elements, submitText)
+ : this.findSubmitButton(elements, primarySearchInput.id, true);
+ }
+ if (submitEl !== null) {
+ await runtime.click(submitEl);
+ actions.push(`CLICK(${submitEl})`);
+ } else if (hasSearchFilters) {
+ return {
+ stepId: stepNum,
+ goal: stepGoal,
+ status: StepStatus.FAILED,
+ actionTaken: `FILL_FORM(${actions.join(' -> ')} -> SUBMIT_NOT_FOUND)`,
+ verificationPassed: false,
+ usedVision: false,
+ durationMs: Date.now() - stepStart,
+ urlBefore: currentUrl,
+ urlAfter: await runtime.getCurrentUrl(),
+ error: 'Search form filters were applied but no explicit submit control was found',
+ };
+ } else {
+ await runtime.pressKey('Enter');
+ actions.push('ENTER');
+ }
+
+ const changedUrl = await this.waitForUrlChange(runtime, preUrl, 5000);
+ const verificationPassed = await this.verifyStepOutcome(runtime, plannerAction);
+ const urlAfter = await runtime.getCurrentUrl();
+ return {
+ stepId: stepNum,
+ goal: stepGoal,
+ status:
+ verificationPassed || changedUrl !== null ? StepStatus.SUCCESS : StepStatus.FAILED,
+ actionTaken: `FILL_FORM(${actions.join(' -> ')})`,
+ verificationPassed,
+ usedVision: false,
+ durationMs: Date.now() - stepStart,
+ urlBefore: currentUrl,
+ urlAfter,
+ };
+ }
+
+ return {
+ stepId: stepNum,
+ goal: stepGoal,
+ status: StepStatus.FAILED,
+ actionTaken: 'FILL_FORM(primary search input not found)',
+ verificationPassed: false,
+ usedVision: false,
+ durationMs: Date.now() - stepStart,
+ urlBefore: currentUrl,
+ urlAfter: await runtime.getCurrentUrl(),
+ error: 'Search query form fill was not applied to secondary/AND fields',
+ };
+ }
+
+ if (duplicateSingleValue) {
+ if (this.config.verbose) {
+ console.log(
+ `[FILL_FORM] All ${fields.length} fields have same value "${fields[0].value}" — filling only primary field`
+ );
+ }
+ fields = [fields[0]];
+ }
for (const field of fields) {
const label = (field.label || '').toLowerCase();
@@ -2243,6 +2410,7 @@ export class PlannerExecutorAgent {
for (const el of elements) {
const role = (el.role || '').toLowerCase();
if (!inputRoles.some(r => role.includes(r))) continue;
+ if (alreadyUsed.has(el.id)) continue;
const elText = (el.text || '').toLowerCase();
const elName = (el.name || '').toLowerCase();
@@ -2257,6 +2425,7 @@ export class PlannerExecutorAgent {
}
if (matched) {
+ alreadyUsed.add(matched.id);
await runtime.type(matched.id, value);
actions.push(`TYPE(${matched.id}, "${value}")`);
filledCount++;
@@ -2323,6 +2492,7 @@ export class PlannerExecutorAgent {
plannerAction.target ||
task ||
'Extract relevant data from the current page';
+ const extractionRequest = this.buildExtractionRequest(task, extractQuery);
const stripThinkTags = (text: string): string =>
text
@@ -2330,78 +2500,146 @@ export class PlannerExecutorAgent {
.replace(/ {
+ const normalized = text.replace(/\s+/g, ' ').trim().toLowerCase();
+ if (normalized.length === 0) return true;
+ if (normalized.length > 300) return false;
+ const chromeOnlyPatterns = [
+ /^skip to main content\b/,
+ /\b(sign up|log in|upload|navigation|menu)\b/,
+ ];
+ const hasDataCue = /\b(title|date|result|item|listing|article|price|author|capture)\b/.test(
+ normalized
+ );
+ return chromeOnlyPatterns.some(pattern => pattern.test(normalized)) && !hasDataCue;
+ };
+
if (this.config.verbose) {
console.log(`[ACTION] EXTRACT - query: "${extractQuery}"`);
}
try {
+ const extractionUrl = currentUrl;
+ let markdownFailureOutcome: StepOutcome | null = null;
+
const useMarkdown = runtime.readMarkdown != null;
if (useMarkdown && runtime.readMarkdown) {
// Text-based extraction: read page as markdown, then use executor LLM
const pageContent = await runtime.readMarkdown({ maxChars: 16000 });
- if (!pageContent) {
- return {
- stepId: stepNum,
- goal: extractQuery,
- status: StepStatus.FAILED,
- actionTaken: 'EXTRACT',
- verificationPassed: false,
- usedVision: false,
- durationMs: Date.now() - stepStart,
- error: 'Failed to read page content as markdown',
- };
- }
-
- if (this.config.verbose) {
- console.log(` [ACTION] EXTRACT - got markdown (${pageContent.length} chars):`);
- console.log(pageContent.slice(0, 2000));
- if (pageContent.length > 2000)
- console.log(` ... [truncated, ${pageContent.length - 2000} more chars]`);
- }
-
- // Build extraction prompt and call executor LLM
- const [extSystem, extUser] = buildExtractionPrompt(pageContent, extractQuery);
- const extractResp = await this.executor.generate(extSystem, extUser, {
- temperature: 0.0,
- max_tokens: 1024,
- });
- this.recordTokenUsage('extract', extractResp);
-
- const extractedText = stripThinkTags((extractResp.content || '').trim());
- if (extractedText && extractedText !== 'NOT_FOUND') {
+ if (!pageContent || isSparseExtractionContent(pageContent)) {
if (this.config.verbose) {
- console.log(` [ACTION] EXTRACT ok: ${extractedText.slice(0, 160)}`);
+ console.log(
+ ` [ACTION] EXTRACT - markdown unavailable or too sparse, using fallback context`
+ );
}
- return {
- stepId: stepNum,
- goal: extractQuery,
- status: StepStatus.SUCCESS,
- actionTaken: 'EXTRACT',
- verificationPassed: true,
- usedVision: false,
- durationMs: Date.now() - stepStart,
- urlBefore: currentUrl,
- urlAfter: currentUrl,
- extractedData: { text: extractedText, query: extractQuery },
- };
} else {
- return {
- stepId: stepNum,
- goal: extractQuery,
- status: StepStatus.FAILED,
- actionTaken: 'EXTRACT',
- verificationPassed: false,
- usedVision: false,
- durationMs: Date.now() - stepStart,
- error: `Could not find requested data: ${extractQuery}`,
- pageContentPreview: pageContent.slice(0, 500),
- };
+ if (this.config.verbose) {
+ console.log(` [ACTION] EXTRACT - got markdown (${pageContent.length} chars):`);
+ console.log(pageContent.slice(0, 2000));
+ if (pageContent.length > 2000)
+ console.log(` ... [truncated, ${pageContent.length - 2000} more chars]`);
+ }
+
+ // Build extraction prompt and call executor LLM
+ const [extSystem, extUser] = buildExtractionPrompt(pageContent, extractionRequest);
+ const extractResp = await this.executor.generate(extSystem, extUser, {
+ temperature: 0.0,
+ max_tokens: 1024,
+ });
+ this.recordTokenUsage('extract', extractResp);
+
+ let extractedText = stripThinkTags((extractResp.content || '').trim());
+ extractedText = this.applyExtractionItemLimit(extractionRequest, extractedText);
+ const isNotFound =
+ !extractedText ||
+ extractedText === 'NOT_FOUND' ||
+ extractedText.startsWith('NOT_FOUND:');
+ if (!isNotFound) {
+ if (this.config.verbose) {
+ console.log(` [ACTION] EXTRACT ok: ${extractedText.slice(0, 160)}`);
+ }
+ return {
+ stepId: stepNum,
+ goal: extractQuery,
+ status: StepStatus.SUCCESS,
+ actionTaken: 'EXTRACT',
+ verificationPassed: true,
+ usedVision: false,
+ durationMs: Date.now() - stepStart,
+ urlBefore: currentUrl,
+ urlAfter: extractionUrl,
+ extractedData: { text: extractedText, query: extractionRequest },
+ };
+ } else {
+ const notFoundDetail = extractedText?.startsWith('NOT_FOUND:') ? extractedText : '';
+ markdownFailureOutcome = {
+ stepId: stepNum,
+ goal: extractQuery,
+ status: StepStatus.FAILED,
+ actionTaken: 'EXTRACT',
+ verificationPassed: false,
+ usedVision: false,
+ durationMs: Date.now() - stepStart,
+ error: notFoundDetail
+ ? `Search returned JSON but lacks requested fields. ${notFoundDetail}. Try modifying the search URL to include the missing fields (e.g., add fl[]=title&fl[]=date to the URL) or navigate to a regular search page.`
+ : `Could not find requested data: ${extractQuery}`,
+ pageContentPreview: pageContent.slice(0, 500),
+ urlBefore: currentUrl,
+ urlAfter: extractionUrl,
+ };
+ }
}
- } else {
+ }
+
+ {
// Fallback: use compact snapshot context for extraction
const pageContent = ctx.compactRepresentation;
+ const hasSnapshotElements = (ctx.snapshot?.elements || []).length > 0;
+ if (!hasSnapshotElements && ctx.screenshotBase64 && this.executor.supportsVision()) {
+ const visionUserPrompt = `/no_think
+Extract the requested data from the screenshot of the current browser page.
+
+Request:
+${extractionRequest}
+
+Return only the extracted rows. If the screenshot does not contain the requested data, return NOT_FOUND.`;
+ const visionResp = await this.executor.generateWithImage(
+ 'You extract visible data from browser screenshots.',
+ visionUserPrompt,
+ ctx.screenshotBase64,
+ {
+ temperature: 0.0,
+ max_tokens: 1024,
+ }
+ );
+ this.recordTokenUsage('extract', visionResp);
+ let extractedText = stripThinkTags((visionResp.content || '').trim());
+ extractedText = this.applyExtractionItemLimit(extractionRequest, extractedText);
+ const isNotFound =
+ !extractedText ||
+ extractedText === 'NOT_FOUND' ||
+ extractedText.startsWith('NOT_FOUND:');
+ if (!isNotFound) {
+ if (this.config.verbose) {
+ console.log(` [ACTION] EXTRACT ok (vision): ${extractedText.slice(0, 160)}`);
+ }
+ return {
+ stepId: stepNum,
+ goal: extractQuery,
+ status: StepStatus.SUCCESS,
+ actionTaken: 'EXTRACT',
+ verificationPassed: true,
+ usedVision: true,
+ durationMs: Date.now() - stepStart,
+ urlBefore: currentUrl,
+ urlAfter: extractionUrl,
+ extractedData: { text: extractedText, query: extractionRequest },
+ };
+ }
+ }
+
if (!pageContent || pageContent.trim().length === 0) {
return {
stepId: stepNum,
@@ -2415,15 +2653,20 @@ export class PlannerExecutorAgent {
};
}
- const [extSystem, extUser] = buildExtractionPrompt(pageContent, extractQuery);
+ const [extSystem, extUser] = buildExtractionPrompt(pageContent, extractionRequest);
const extractResp = await this.executor.generate(extSystem, extUser, {
temperature: 0.0,
max_tokens: 1024,
});
this.recordTokenUsage('extract', extractResp);
- const extractedText = stripThinkTags((extractResp.content || '').trim());
- if (extractedText && extractedText !== 'NOT_FOUND') {
+ let extractedText = stripThinkTags((extractResp.content || '').trim());
+ extractedText = this.applyExtractionItemLimit(extractionRequest, extractedText);
+ const isNotFound =
+ !extractedText ||
+ extractedText === 'NOT_FOUND' ||
+ extractedText.startsWith('NOT_FOUND:');
+ if (!isNotFound) {
if (this.config.verbose) {
console.log(` [ACTION] EXTRACT ok (snapshot): ${extractedText.slice(0, 160)}`);
}
@@ -2436,20 +2679,22 @@ export class PlannerExecutorAgent {
usedVision: false,
durationMs: Date.now() - stepStart,
urlBefore: currentUrl,
- urlAfter: currentUrl,
- extractedData: { text: extractedText, query: extractQuery },
+ urlAfter: extractionUrl,
+ extractedData: { text: extractedText, query: extractionRequest },
};
} else {
- return {
- stepId: stepNum,
- goal: extractQuery,
- status: StepStatus.FAILED,
- actionTaken: 'EXTRACT',
- verificationPassed: false,
- usedVision: false,
- durationMs: Date.now() - stepStart,
- error: `Could not extract requested data: ${extractQuery}`,
- };
+ return (
+ markdownFailureOutcome || {
+ stepId: stepNum,
+ goal: extractQuery,
+ status: StepStatus.FAILED,
+ actionTaken: 'EXTRACT',
+ verificationPassed: false,
+ usedVision: false,
+ durationMs: Date.now() - stepStart,
+ error: `Could not extract requested data: ${extractQuery}`,
+ }
+ );
}
}
} catch (e) {
@@ -2468,6 +2713,22 @@ export class PlannerExecutorAgent {
// Handle SCROLL_AND_COUNT action — scroll entire page and count items
if (plannerAction.action === 'SCROLL_AND_COUNT') {
+ if (!isCountingTask(task)) {
+ return {
+ stepId: stepNum,
+ goal: plannerAction.goal || plannerAction.countTarget || 'SCROLL_AND_COUNT',
+ status: StepStatus.FAILED,
+ actionTaken: 'SCROLL_AND_COUNT(rejected_non_counting_task)',
+ verificationPassed: false,
+ usedVision: false,
+ durationMs: Date.now() - stepStart,
+ urlBefore: currentUrl,
+ urlAfter: await runtime.getCurrentUrl(),
+ error:
+ 'SCROLL_AND_COUNT is only for counting tasks, not first/top N extraction; use EXTRACT for visible result data.',
+ };
+ }
+
const countTarget =
plannerAction.countTarget ||
plannerAction.goal ||
@@ -3847,6 +4108,196 @@ COUNT:`;
}
}
+ private findPrimarySearchInput(elements: SnapshotElement[]): SnapshotElement | null {
+ const inputRoles = new Set(['searchbox', 'textbox', 'combobox', 'input']);
+ const candidates = elements.filter(element => {
+ const role = (element.role || '').toLowerCase();
+ if (!inputRoles.has(role)) {
+ return false;
+ }
+
+ const text = `${this.elementText(element)} ${element.name || ''} ${element.ariaLabel || ''}`;
+ return (
+ !/\b(email|newsletter|subscribe|password)\b/i.test(text) &&
+ !/\boptional[_\s-]*field[_\s-]*\d+\b/i.test(text) &&
+ !/\band\s+(?:field|condition|clause)\b/i.test(text)
+ );
+ });
+
+ if (candidates.length === 0) {
+ return null;
+ }
+
+ const score = (element: SnapshotElement): number => {
+ const role = (element.role || '').toLowerCase();
+ const name = (element.name || '').toLowerCase();
+ const text = this.elementText(element).toLowerCase();
+ let value = 0;
+ if (role === 'searchbox') value += 100;
+ if (/\b(any|all)[_\s-]*(field|fields)\b/.test(`${name} ${text}`)) value += 140;
+ if (name === 'query' || name === 'search') value += 80;
+ if (name === 'q') value += 25;
+ if (/\b(search|query|keyword|terms?)\b/.test(text)) value += 40;
+ if (/\b(optional|advanced|field\d+|and)\b/.test(text)) value -= 50;
+ if (element.isPrimary) value += 20;
+ return value;
+ };
+
+ return [...candidates].sort((a, b) => score(b) - score(a))[0] || null;
+ }
+
+ private findFillFormFilterControl(
+ elements: SnapshotElement[],
+ field: { label: string; value: string },
+ alreadyUsed: Set
+ ): SnapshotElement | null {
+ const value = this.normalizeSearchQueryText(field.value || '');
+ if (!value) {
+ return null;
+ }
+
+ const label = this.normalizeSearchQueryText(field.label || '');
+ const labelTerms = this.normalizedSearchTerms(label).filter(
+ term => !['all', 'any', 'field', 'fields'].includes(term)
+ );
+ const valueTerms = this.normalizedSearchTerms(value);
+ const candidates: Array<{ element: SnapshotElement; score: number }> = [];
+ const clickRoles = new Set([
+ 'radio',
+ 'checkbox',
+ 'option',
+ 'menuitem',
+ 'button',
+ 'select',
+ 'combobox',
+ ]);
+
+ for (const element of elements) {
+ if (alreadyUsed.has(element.id)) {
+ continue;
+ }
+
+ const role = (element.role || '').toLowerCase();
+ if (!clickRoles.has(role)) {
+ continue;
+ }
+ if (
+ element.clickable === false &&
+ role !== 'radio' &&
+ role !== 'checkbox' &&
+ role !== 'select'
+ ) {
+ continue;
+ }
+
+ const text = this.normalizeSearchQueryText(this.elementText(element));
+ const terms = this.normalizedSearchTerms(text);
+ const valueMatches =
+ text === value ||
+ terms.includes(value) ||
+ text.includes(value) ||
+ valueTerms.some(term => terms.includes(term) || text.includes(term));
+ if (!valueMatches) {
+ continue;
+ }
+
+ let score = 0;
+ if (role === 'radio' || role === 'checkbox' || role === 'option') score += 100;
+ if (role === 'select' || role === 'combobox') score += 90;
+ if (text === value) score += 60;
+ if (terms.includes(value)) score += 30;
+ if (valueTerms.some(term => terms.includes(term))) score += 30;
+ if (labelTerms.some(term => terms.includes(term) || text.includes(term))) score += 20;
+ if (element.isPrimary) score += 5;
+ candidates.push({ element, score });
+ }
+
+ if (candidates.length === 0) {
+ return null;
+ }
+
+ return candidates.sort((a, b) => b.score - a.score)[0].element;
+ }
+
+ private normalizedSearchTerms(value: string): string[] {
+ return value
+ .split(/[^a-z0-9]+/)
+ .map(term => term.replace(/s$/, ''))
+ .filter(term => term.length > 0);
+ }
+
+ private getFillFormPrimarySearchValue(
+ task: string,
+ fields: Array<{ label: string; value: string }>
+ ): string | null {
+ if (fields.length === 0) {
+ return null;
+ }
+
+ const values = fields.map(field => (field.value || '').trim()).filter(Boolean);
+ if (values.length === 0) {
+ return null;
+ }
+
+ const query = this.extractSearchQueryFromTask(task);
+ if (!query) {
+ return null;
+ }
+
+ const normalizedQuery = this.normalizeSearchQueryText(query);
+ return values.find(value => this.normalizeSearchQueryText(value) === normalizedQuery) || null;
+ }
+
+ private normalizeSearchQueryText(value: string): string {
+ return value
+ .toLowerCase()
+ .replace(/["'`*_]+/g, '')
+ .replace(/\s+/g, ' ')
+ .trim();
+ }
+
+ private buildExtractionRequest(task: string, extractQuery: string): string {
+ const taskLower = task.toLowerCase();
+ const queryLower = extractQuery.toLowerCase();
+ const taskHasCountConstraint =
+ /\b(?:first|top|last|latest|earliest|newest|oldest)\s+\d+\b/.test(taskLower) ||
+ /\bexactly\s+\d+\b/.test(taskLower);
+ const queryHasCountConstraint =
+ /\b(?:first|top|last|latest|earliest|newest|oldest)\s+\d+\b/.test(queryLower) ||
+ /\bexactly\s+\d+\b/.test(queryLower);
+
+ if (taskHasCountConstraint && !queryHasCountConstraint && task.trim()) {
+ return `${extractQuery}\nOverall task: ${task}`;
+ }
+
+ return extractQuery;
+ }
+
+ private applyExtractionItemLimit(extractionRequest: string, extractedText: string): string {
+ const match = extractionRequest.match(
+ /\b(first|top|last|latest|earliest|newest|oldest)\s+(\d+)\b/i
+ );
+ if (!match) {
+ return extractedText;
+ }
+
+ const limit = Number.parseInt(match[2], 10);
+ if (!Number.isFinite(limit) || limit <= 0) {
+ return extractedText;
+ }
+
+ const lines = extractedText.split(/\r?\n/);
+ const nonEmptyLines = lines.filter(line => line.trim().length > 0);
+ if (nonEmptyLines.length <= limit) {
+ return extractedText;
+ }
+
+ const direction = match[1].toLowerCase();
+ const limited =
+ direction === 'last' ? nonEmptyLines.slice(-limit) : nonEmptyLines.slice(0, limit);
+ return limited.join('\n');
+ }
+
private promoteVisibleResultClick(
task: string,
ctx: SnapshotContext,
@@ -4419,6 +4870,12 @@ COUNT:`;
);
if (hasRepeatedAction) {
+ const lastUrl = sameHostEntries[sameHostEntries.length - 1].urlAfter;
+ const earlierUrls = sameHostEntries.slice(0, -1).map(e => e.urlAfter);
+ const lastUrlIsNew = lastUrl && !earlierUrls.includes(lastUrl);
+ if (lastUrlIsNew) {
+ continue;
+ }
return `Action cycle detected: visited ${host} ${count} times with repeated ${sameHostEntries[0].action} actions`;
}
}
@@ -4882,12 +5339,11 @@ COUNT:`;
const candidates: Array<{ id: number; score: number }> = [];
for (const element of elements) {
- // Only consider buttons and links
const role = (element.role || '').toLowerCase();
- if (!['button', 'link'].includes(role)) continue;
+ const isNativeSubmit = role === 'submit' || role === 'button' || role === 'input';
+ if (!['button', 'link', 'submit', 'input'].includes(role)) continue;
- // Skip if not clickable
- if (element.clickable === false) continue;
+ if (element.clickable === false && !isNativeSubmit) continue;
// Skip the input element itself
if (element.id === inputElementId) continue;
@@ -4927,14 +5383,18 @@ COUNT:`;
const lower = targetText.toLowerCase().trim();
for (const el of elements) {
const role = (el.role || '').toLowerCase();
- if (!['button', 'link'].includes(role)) continue;
- if (el.clickable === false) continue;
+ const isNativeSubmit = role === 'submit' || role === 'button' || role === 'input';
+ if (!['button', 'link', 'submit', 'input'].includes(role)) continue;
+ if (el.clickable === false && !isNativeSubmit) continue;
const text = (el.text || '').toLowerCase().trim();
+ const name = (el.name || '').toLowerCase().trim();
const ariaLabel = (el.ariaLabel || '').toLowerCase().trim();
if (
text === lower ||
text.includes(lower) ||
lower.includes(text) ||
+ name === lower ||
+ name.includes(lower) ||
ariaLabel.includes(lower)
) {
return el.id;
diff --git a/src/agents/planner-executor/prompts.ts b/src/agents/planner-executor/prompts.ts
index 41cfa16..f58f220 100644
--- a/src/agents/planner-executor/prompts.ts
+++ b/src/agents/planner-executor/prompts.ts
@@ -63,9 +63,9 @@ export function buildStepwisePlannerPrompt(
Actions:
- NAVIGATE: Go directly to a URL when the next destination is known. Set "target" to the URL.
- CLICK: Click an element. Set "intent" to describe the SPECIFIC element (include label, placeholder, or nearby text, e.g. "email textbox", "display name field", "Next button", NOT just "textbox" or "button"). Set "input" to EXACT text from elements list.
-- FILL_FORM: Fill ALL visible form fields and submit. Use for login, signup, checkout, or any multi-field form. Set "fields" to an array of {label, value} pairs. Set "submitText" to the submit button text. Set "verify" to check navigation after submit.
+- TYPE_AND_SUBMIT: Type text into a search/query box and submit. Set "input" to the SEARCH QUERY from the goal (NOT the element label).
- TYPE: Type text into a SINGLE form field. Prefer FILL_FORM for forms with multiple fields.
-- TYPE_AND_SUBMIT: Type text into a search box and submit. Set "input" to the SEARCH QUERY from the goal (NOT the element label).
+- FILL_FORM: Fill structured form fields and submit. Use ONLY for login, signup, checkout, or forms where the goal provides DISTINCT values per field (e.g. "username: X, password: Y"). Set "fields" to an array of {label, value} pairs. Set "submitText" to the submit button text. Set "verify" to check navigation after submit.
- SCROLL: Scroll page. Set "direction" to "up" or "down".
- SCROLL_AND_COUNT: Scroll through the ENTIRE page and count items. Use ONLY when the task asks to enumerate items (e.g., "how many listings", "number of results", "count the products"). Do NOT use when "count" is a data value to read (e.g., "calorie count", "word count" = use EXTRACT instead). Set "countTarget" to describe what to count.
- WAIT: Wait for content to appear when a follow-up verification is needed.
@@ -80,17 +80,33 @@ WHEN TO USE DONE:
- "Log in" task: DONE only AFTER the page navigates away from /login
- If goal has multiple steps, complete ALL steps before returning DONE
-CRITICAL RULE FOR FILL_FORM (PREFERRED for login/signup/checkout):
-- Use FILL_FORM when the goal provides values for 2+ form fields (e.g. "username: X, password: Y")
+STEP ORDERING:
+- If the task mentions a specific page or URL you are NOT on yet, NAVIGATE there FIRST before doing anything else.
+- Example: task says "Use advanced search on archive.org" but you are on the homepage → NAVIGATE to the advanced search page first, then search.
+- Example: task says "Go to amazon.com and search for headphones" → NAVIGATE to amazon.com first, then TYPE_AND_SUBMIT.
+
+SEARCH vs FORM FILL:
+- Use TYPE_AND_SUBMIT (NOT FILL_FORM) when entering a single search query into a search box. "input" = the search query.
+- On search forms with multiple fields (e.g. "Any field", "Title", "Creator"), type into the FIRST/PRIMARY query field only. Other fields are AND filters — leave them empty unless the task specifies filters.
+- Do NOT put the same search text into multiple form fields. Only the main query field needs it.
+- Use FILL_FORM ONLY when the goal provides DIFFERENT values for multiple fields (e.g. "username: X, password: Y").
+
+RECOVERY WHEN SEARCH RETURNS RAW DATA:
+- If a previous EXTRACT step failed with "JSON but lacks requested fields", the search API returned incomplete data.
+- Try navigating to a modified search URL, adding the missing fields (e.g., change fl[]=identifier to also include fl[]=title&fl[]=date).
+- Or try a regular search page instead (e.g., replace advancedsearch.php with /search?query=...).
+
+CRITICAL RULE FOR FILL_FORM (ONLY for login/signup/checkout with DISTINCT field values):
+- Use FILL_FORM ONLY when the goal provides DIFFERENT values for 2+ fields (e.g. "username: X, password: Y")
- "fields" is an array of {label, value} where label matches the field's visible text/placeholder
-- "submitText" is the text on the submit button (e.g. "Sign in", "Log in", "Submit", "Next")
+- "submitText" is the text on the submit button (e.g., "Sign in", "Log in", "Submit", "Next")
- The system will find and fill each field by matching label to element text/role
- This is MUCH faster than TYPE one field at a time
CRITICAL RULE FOR TYPE_AND_SUBMIT:
- "input" must be the SEARCH QUERY you want to type (e.g., "wireless headphones")
- "input" is NOT the element label (e.g., NOT "Search Amazon")
-- ONLY use if you see a "searchbox" or "textbox" element
+- ONLY use if you see a "searchbox" or "textbox" element on the current page
CRITICAL RULE FOR CLICK (after search):
- After searching, you are on a RESULTS PAGE. Click a PRODUCT LINK to go to product details.
@@ -123,9 +139,10 @@ RULES:
9. Do NOT output 时光网 or any reasoning
10. Do NOT return DONE until ALL parts of the goal are complete
11. Never copy example URLs from these instructions. Only NAVIGATE to a URL from the user's task, the current page, or a visible element.
-12. PREFER FILL_FORM for login/signup/checkout forms with 2+ fields. Do NOT use multiple TYPE actions when FILL_FORM can do it in one step.
-13. "intent" must be SPECIFIC: describe the element with its label or context (e.g., "email field", "plan dropdown", "Next button on step 2")
-14. Treat history results "success", "skipped", and "vision_fallback" as already satisfied. Do not repeat those steps; choose the next incomplete part of the goal.`;
+12. PREFER FILL_FORM for login/signup/checkout forms with 2+ DISTINCT field values. Do NOT use multiple TYPE actions when FILL_FORM can do it in one step.
+13. Do NOT use FILL_FORM for search queries. Use TYPE_AND_SUBMIT instead — search needs only ONE query field, not multiple fields.
+14. "intent" must be SPECIFIC: describe the element with its label or context (e.g., "email field", "plan dropdown", "Next button on step 2")
+15. Treat history results "success", "skipped", and "vision_fallback" as already satisfied. Do not repeat those steps; choose the next incomplete part of the goal.`;
// Inject extraction-specific guidance when the goal is an extraction task
const extractionGuidance = isExtractionTask(goal)
diff --git a/src/agents/planner-executor/replan-prompts.ts b/src/agents/planner-executor/replan-prompts.ts
index 335b0ed..b2e4ca4 100644
--- a/src/agents/planner-executor/replan-prompts.ts
+++ b/src/agents/planner-executor/replan-prompts.ts
@@ -68,6 +68,8 @@ Prefer a materially different recovery:
target: failedStep.target,
intent: failedStep.intent,
input: failedStep.input,
+ fields: failedStep.fields,
+ submitText: failedStep.submitText,
verify: failedStep.verify || [],
required: failedStep.required !== false,
optional_substeps: failedStep.optionalSubsteps || [],
diff --git a/tests/agents/planner-executor/search-submit.test.ts b/tests/agents/planner-executor/search-submit.test.ts
index e23fceb..f06a810 100644
--- a/tests/agents/planner-executor/search-submit.test.ts
+++ b/tests/agents/planner-executor/search-submit.test.ts
@@ -9,6 +9,7 @@ import {
isSearchLikeTypeAndSubmit,
isUrlChangeRelevantToIntent,
} from '../../../src/agents/planner-executor/boundary-detection';
+import { ReplanPatchSchema } from '../../../src/agents/planner-executor/plan-models';
import { normalizeReplanPatch } from '../../../src/agents/planner-executor/plan-utils';
import type { SnapshotElement } from '../../../src/agents/planner-executor/plan-models';
@@ -123,6 +124,7 @@ class RuntimeStub implements AgentRuntime {
public clickCalls: number[] = [];
public coordinateClickCalls: Array<{ x: number; y: number }> = [];
public typeCalls: Array<{ elementId: number; text: string }> = [];
+ public selectCalls: Array<{ elementId: number; value: string }> = [];
public coordinateTypeCalls: string[] = [];
public keyCalls: string[] = [];
@@ -165,6 +167,10 @@ class RuntimeStub implements AgentRuntime {
await this.handlers.onType?.(elementId, text, this);
}
+ async selectOption(elementId: number, value: string): Promise {
+ this.selectCalls.push({ elementId, value });
+ }
+
async typeCoordinate(text: string): Promise {
this.coordinateTypeCalls.push(text);
}
@@ -189,6 +195,26 @@ class RuntimeStub implements AgentRuntime {
}
}
+class MarkdownRuntimeStub extends RuntimeStub {
+ constructor(
+ initialUrl: string,
+ snapshotFactory: (runtime: RuntimeStub) => Snapshot | null,
+ private readonly markdown: string
+ ) {
+ super(initialUrl, snapshotFactory);
+ }
+
+ async readMarkdown(): Promise {
+ return this.markdown;
+ }
+}
+
+class NullMarkdownRuntimeStub extends RuntimeStub {
+ async readMarkdown(): Promise {
+ return null;
+ }
+}
+
function makeSnapshot(
url: string,
elements: Snapshot['elements'],
@@ -203,6 +229,892 @@ function makeSnapshot(
}
describe('PlannerExecutorAgent search submission parity', () => {
+ it('applies search filter controls in mixed FILL_FORM plans before submitting', async () => {
+ const planner = new ProviderStub([
+ JSON.stringify({
+ action: 'FILL_FORM',
+ fields: [
+ { label: 'any_field', value: 'Space images' },
+ { label: 'All mediatypes', value: 'IMAGES' },
+ ],
+ submitText: 'Search',
+ verify: [{ predicate: 'url_contains', args: ['search'] }],
+ }),
+ JSON.stringify({ action: 'DONE', reasoning: 'search submitted once' }),
+ ]);
+ const executor = new ProviderStub();
+ const runtime = new RuntimeStub(
+ 'https://example.test/advancedsearch',
+ rt =>
+ makeSnapshot(rt.currentUrl, [
+ {
+ id: 5,
+ role: 'textbox',
+ text: 'optional_field3_q',
+ name: 'optional_field3_q',
+ clickable: false,
+ },
+ { id: 8, role: 'searchbox', text: 'any field', name: 'q', clickable: true },
+ { id: 15, role: 'radio', text: 'texts', name: 'mediatype', nearbyText: 'All mediatypes' },
+ { id: 16, role: 'radio', text: 'image', name: 'mediatype', nearbyText: 'All mediatypes' },
+ { id: 17, role: 'button', text: 'Search', clickable: true },
+ ]),
+ {
+ onClick: (elementId, rt) => {
+ if (elementId === 17) {
+ rt.currentUrl = 'https://example.test/search?q=Space+images&mediatype=image';
+ }
+ },
+ }
+ );
+
+ const agent = new PlannerExecutorAgent({
+ planner,
+ executor,
+ config: {
+ retry: { verifyTimeoutMs: 20, verifyPollMs: 1, maxReplans: 0, executorRepairAttempts: 1 },
+ recovery: { enabled: false },
+ },
+ });
+
+ const result = await agent.runStepwise(runtime, {
+ task: 'Use advanced search for Space images and output the capture dates and titles of the first 10 images listed.',
+ });
+
+ expect(result.success).toBe(true);
+ expect(runtime.typeCalls).toEqual([{ elementId: 8, text: 'Space images' }]);
+ expect(runtime.clickCalls).toEqual([16, 17]);
+ expect(result.stepOutcomes[0].actionTaken).toBe(
+ 'FILL_FORM(TYPE(8, "Space images") -> CLICK(16, IMAGES) -> CLICK(17))'
+ );
+ });
+
+ it('sets select-style search filters in mixed FILL_FORM plans before submitting', async () => {
+ const planner = new ProviderStub([
+ JSON.stringify({
+ action: 'FILL_FORM',
+ fields: [
+ { label: 'any_field', value: 'Space images' },
+ { label: 'All mediatypes', value: 'IMAGES' },
+ ],
+ submitText: 'Search',
+ verify: [{ predicate: 'url_contains', args: ['search'] }],
+ }),
+ JSON.stringify({ action: 'DONE', reasoning: 'search submitted once' }),
+ ]);
+ const executor = new ProviderStub();
+ const runtime = new RuntimeStub(
+ 'https://example.test/advancedsearch',
+ rt =>
+ makeSnapshot(rt.currentUrl, [
+ { id: 8, role: 'searchbox', text: 'any field', name: 'q', clickable: true },
+ {
+ id: 14,
+ role: 'select',
+ text: 'All mediatypes Texts Collection Software Image Audio',
+ name: 'mediatype',
+ nearbyText: 'All mediatypes',
+ },
+ { id: 17, role: 'button', text: 'Search', clickable: true },
+ ]),
+ {
+ onClick: (elementId, rt) => {
+ if (elementId === 17) {
+ rt.currentUrl = 'https://example.test/search?q=Space+images&mediatype=image';
+ }
+ },
+ }
+ );
+
+ const agent = new PlannerExecutorAgent({
+ planner,
+ executor,
+ config: {
+ retry: { verifyTimeoutMs: 20, verifyPollMs: 1, maxReplans: 0, executorRepairAttempts: 1 },
+ recovery: { enabled: false },
+ },
+ });
+
+ const result = await agent.runStepwise(runtime, {
+ task: 'Use advanced search for Space images and output the capture dates and titles of the first 10 images listed.',
+ });
+
+ expect(result.success).toBe(true);
+ expect(runtime.typeCalls).toEqual([{ elementId: 8, text: 'Space images' }]);
+ expect(runtime.selectCalls).toEqual([{ elementId: 14, value: 'IMAGES' }]);
+ expect(runtime.clickCalls).toEqual([17]);
+ expect(result.stepOutcomes[0].actionTaken).toBe(
+ 'FILL_FORM(TYPE(8, "Space images") -> SELECT(14, IMAGES) -> CLICK(17))'
+ );
+ });
+
+ it('prefers the visible any-field query input over raw q fields in advanced search forms', async () => {
+ const planner = new ProviderStub([
+ JSON.stringify({
+ action: 'FILL_FORM',
+ fields: [
+ { label: 'any_field', value: 'Space images' },
+ { label: 'All mediatypes', value: 'image' },
+ ],
+ submitText: 'Search',
+ verify: [{ predicate: 'url_contains', args: ['Space'] }],
+ }),
+ JSON.stringify({ action: 'DONE', reasoning: 'filtered search submitted once' }),
+ ]);
+ const executor = new ProviderStub();
+ const runtime = new RuntimeStub(
+ 'https://example.test/advancedsearch',
+ rt =>
+ makeSnapshot(rt.currentUrl, [
+ { id: 8, role: 'searchbox', text: 'q', name: 'q', clickable: true },
+ { id: 13, role: 'textbox', text: 'any_field', name: 'any_field', clickable: true },
+ {
+ id: 41,
+ role: 'select',
+ text: 'All mediatypes Texts Collection Software Image Audio',
+ name: 'mediatype',
+ nearbyText: 'All mediatypes',
+ },
+ { id: 36, role: 'submit', text: 'Search', name: 'Search', clickable: true },
+ ]),
+ {
+ onClick: (elementId, rt) => {
+ if (elementId === 36) {
+ const typedAnyField = rt.typeCalls.some(
+ call => call.elementId === 13 && call.text === 'Space images'
+ );
+ rt.currentUrl = typedAnyField
+ ? 'https://example.test/search?query=(Space%20images)%20AND%20mediatype:(image)'
+ : 'https://example.test/search?query=mediatype:(image)';
+ }
+ },
+ }
+ );
+
+ const agent = new PlannerExecutorAgent({
+ planner,
+ executor,
+ config: {
+ retry: { verifyTimeoutMs: 20, verifyPollMs: 1, maxReplans: 0, executorRepairAttempts: 1 },
+ recovery: { enabled: false },
+ },
+ });
+
+ const result = await agent.runStepwise(runtime, {
+ task: 'Use advanced search for Space images and output the capture dates and titles of the first 10 images listed.',
+ });
+
+ expect(result.success).toBe(true);
+ expect(runtime.typeCalls).toEqual([{ elementId: 13, text: 'Space images' }]);
+ expect(runtime.typeCalls).not.toContainEqual({ elementId: 8, text: 'Space images' });
+ expect(result.stepOutcomes[0].urlAfter).toContain('Space%20images');
+ expect(result.stepOutcomes[0].actionTaken).toBe(
+ 'FILL_FORM(TYPE(13, "Space images") -> SELECT(41, image) -> CLICK(36))'
+ );
+ });
+
+ it('escalates to find an explicit submit control after applying search filters', async () => {
+ const planner = new ProviderStub([
+ JSON.stringify({
+ action: 'FILL_FORM',
+ fields: [
+ { label: 'any_field', value: 'Space images' },
+ { label: 'All mediatypes', value: 'image' },
+ ],
+ submitText: 'Search',
+ verify: [{ predicate: 'url_contains', args: ['mediatype=image'] }],
+ }),
+ JSON.stringify({ action: 'DONE', reasoning: 'filtered search submitted once' }),
+ ]);
+ const executor = new ProviderStub();
+ let snapshotCalls = 0;
+ const runtime = new RuntimeStub(
+ 'https://example.test/advancedsearch',
+ rt => {
+ snapshotCalls += 1;
+ const baseElements: Snapshot['elements'] = [
+ { id: 8, role: 'searchbox', text: 'any field', name: 'q', clickable: true },
+ {
+ id: 14,
+ role: 'select',
+ text: 'All mediatypes Texts Collection Software Image Audio',
+ name: 'mediatype',
+ nearbyText: 'All mediatypes',
+ },
+ ];
+ return makeSnapshot(
+ rt.currentUrl,
+ snapshotCalls === 1
+ ? baseElements
+ : [...baseElements, { id: 17, role: 'button', text: 'Search', clickable: true }]
+ );
+ },
+ {
+ onClick: (elementId, rt) => {
+ if (elementId === 17) {
+ rt.currentUrl = 'https://example.test/search?q=Space+images&mediatype=image';
+ }
+ },
+ onPressKey: (_key, rt) => {
+ rt.currentUrl = 'https://example.test/search?q=Space+images';
+ },
+ }
+ );
+
+ const agent = new PlannerExecutorAgent({
+ planner,
+ executor,
+ config: {
+ retry: { verifyTimeoutMs: 20, verifyPollMs: 1, maxReplans: 0, executorRepairAttempts: 1 },
+ recovery: { enabled: false },
+ },
+ });
+
+ const result = await agent.runStepwise(runtime, {
+ task: 'Use advanced search for Space images and output the capture dates and titles of the first 10 images listed.',
+ });
+
+ expect(result.success).toBe(true);
+ expect(runtime.selectCalls).toEqual([{ elementId: 14, value: 'image' }]);
+ expect(runtime.clickCalls).toEqual([17]);
+ expect(runtime.keyCalls).toEqual([]);
+ expect(result.stepOutcomes[0].actionTaken).toBe(
+ 'FILL_FORM(TYPE(8, "Space images") -> SELECT(14, image) -> CLICK(17))'
+ );
+ });
+
+ it('treats native submit inputs as explicit submit controls after applying search filters', async () => {
+ const planner = new ProviderStub([
+ JSON.stringify({
+ action: 'FILL_FORM',
+ fields: [
+ { label: 'any_field', value: 'Space images' },
+ { label: 'All mediatypes', value: 'image' },
+ ],
+ submitText: 'Search',
+ verify: [{ predicate: 'url_contains', args: ['mediatype=image'] }],
+ }),
+ JSON.stringify({ action: 'DONE', reasoning: 'filtered search submitted once' }),
+ ]);
+ const executor = new ProviderStub();
+ const runtime = new RuntimeStub(
+ 'https://example.test/advancedsearch',
+ rt =>
+ makeSnapshot(rt.currentUrl, [
+ { id: 8, role: 'searchbox', text: 'any field', name: 'q', clickable: true },
+ {
+ id: 14,
+ role: 'select',
+ text: 'All mediatypes Texts Collection Software Image Audio',
+ name: 'mediatype',
+ nearbyText: 'All mediatypes',
+ },
+ { id: 17, role: 'submit', text: 'Search', name: 'Search', clickable: false },
+ ]),
+ {
+ onClick: (elementId, rt) => {
+ if (elementId === 17) {
+ rt.currentUrl = 'https://example.test/search?q=Space+images&mediatype=image';
+ }
+ },
+ }
+ );
+
+ const agent = new PlannerExecutorAgent({
+ planner,
+ executor,
+ config: {
+ retry: { verifyTimeoutMs: 20, verifyPollMs: 1, maxReplans: 0, executorRepairAttempts: 1 },
+ recovery: { enabled: false },
+ },
+ });
+
+ const result = await agent.runStepwise(runtime, {
+ task: 'Use advanced search for Space images and output the capture dates and titles of the first 10 images listed.',
+ });
+
+ expect(result.success).toBe(true);
+ expect(runtime.clickCalls).toEqual([17]);
+ expect(runtime.keyCalls).toEqual([]);
+ expect(result.stepOutcomes[0].actionTaken).toBe(
+ 'FILL_FORM(TYPE(8, "Space images") -> SELECT(14, image) -> CLICK(17))'
+ );
+ });
+
+ it('accepts FILL_FORM repair steps because stepwise execution supports them', () => {
+ const normalized = normalizeReplanPatch({
+ mode: 'patch',
+ replace_steps: [
+ {
+ id: 2,
+ step: {
+ id: 2,
+ action: 'FILL_FORM',
+ fields: [{ label: 'q', value: 'Space images' }],
+ submitText: 'Search',
+ verify: [],
+ },
+ },
+ ],
+ });
+
+ expect(() => ReplanPatchSchema.parse(normalized)).not.toThrow();
+ });
+
+ it('does not submit mixed FILL_FORM search plans when filter fields are unmatched', async () => {
+ const planner = new ProviderStub([
+ JSON.stringify({
+ action: 'FILL_FORM',
+ fields: [
+ { label: 'q', value: 'Space images' },
+ { label: 'mediatype', value: 'image' },
+ ],
+ submitText: 'Search',
+ verify: [{ predicate: 'url_contains', args: ['Space'] }],
+ }),
+ JSON.stringify({ action: 'DONE', reasoning: 'search submitted once' }),
+ ]);
+ const executor = new ProviderStub();
+ const runtime = new RuntimeStub('https://example.test/advancedsearch', rt =>
+ makeSnapshot(rt.currentUrl, [
+ {
+ id: 5,
+ role: 'textbox',
+ text: 'optional field 3',
+ name: 'optional_field3_q',
+ clickable: true,
+ },
+ { id: 8, role: 'searchbox', text: 'Search', name: 'q', clickable: true },
+ { id: 9, role: 'button', text: 'Search', clickable: true },
+ ])
+ );
+
+ const agent = new PlannerExecutorAgent({
+ planner,
+ executor,
+ config: {
+ retry: { verifyTimeoutMs: 20, verifyPollMs: 1, maxReplans: 0, executorRepairAttempts: 1 },
+ recovery: { enabled: false },
+ },
+ });
+
+ const result = await agent.runStepwise(runtime, {
+ task: 'Use advanced search for Space images and output the capture dates and titles of the first 10 images listed.',
+ });
+
+ expect(result.success).toBe(false);
+ expect(runtime.typeCalls).toEqual([{ elementId: 8, text: 'Space images' }]);
+ expect(runtime.typeCalls).not.toContainEqual({ elementId: 5, text: 'Space images' });
+ expect(runtime.clickCalls).toEqual([]);
+ expect(runtime.keyCalls).toEqual([]);
+ expect(result.stepOutcomes[0].actionTaken).toBe(
+ 'FILL_FORM(TYPE(8, "Space images") -> UNMATCHED(mediatype))'
+ );
+ });
+
+ it('routes single-value FILL_FORM search queries to the primary search input, not AND fields', async () => {
+ const planner = new ProviderStub([
+ JSON.stringify({
+ action: 'FILL_FORM',
+ fields: [{ label: 'optional field 3', value: 'Space images' }],
+ verify: [{ predicate: 'url_contains', args: ['Space'] }],
+ }),
+ JSON.stringify({ action: 'DONE', reasoning: 'search submitted once' }),
+ ]);
+ const executor = new ProviderStub();
+ const runtime = new RuntimeStub(
+ 'https://example.test/advancedsearch',
+ rt =>
+ makeSnapshot(rt.currentUrl, [
+ { id: 1, role: 'searchbox', text: 'Search', name: 'q', clickable: true },
+ {
+ id: 2,
+ role: 'textbox',
+ text: 'optional field 3',
+ name: 'optional_field3_q',
+ clickable: true,
+ },
+ {
+ id: 3,
+ role: 'textbox',
+ text: 'optional field 4',
+ name: 'optional_field4_q',
+ clickable: true,
+ },
+ ]),
+ {
+ onPressKey: (_key, rt) => {
+ rt.currentUrl = 'https://example.test/search?q=Space+images';
+ },
+ }
+ );
+
+ const agent = new PlannerExecutorAgent({
+ planner,
+ executor,
+ config: {
+ retry: { verifyTimeoutMs: 20, verifyPollMs: 1, maxReplans: 0, executorRepairAttempts: 1 },
+ recovery: { enabled: false },
+ },
+ });
+
+ const result = await agent.runStepwise(runtime, {
+ task: 'Use advanced search for Space images and output the capture dates and titles of the first 10 images listed.',
+ });
+
+ expect(result.success).toBe(true);
+ expect(runtime.typeCalls).toEqual([{ elementId: 1, text: 'Space images' }]);
+ expect(runtime.typeCalls).not.toContainEqual({ elementId: 2, text: 'Space images' });
+ expect(runtime.keyCalls).toEqual(['Enter']);
+ expect(result.stepOutcomes[0].actionTaken).toBe('FILL_FORM(TYPE(1, "Space images") -> ENTER)');
+ });
+
+ it('escalates FILL_FORM search routing instead of typing into visible AND fields', async () => {
+ const planner = new ProviderStub([
+ JSON.stringify({
+ action: 'FILL_FORM',
+ fields: [{ label: 'optional field 3', value: 'Space images' }],
+ verify: [{ predicate: 'url_contains', args: ['Space'] }],
+ }),
+ JSON.stringify({ action: 'DONE', reasoning: 'search submitted once' }),
+ ]);
+ const executor = new ProviderStub();
+ let snapshotCalls = 0;
+ const runtime = new RuntimeStub(
+ 'https://example.test/advancedsearch',
+ rt => {
+ snapshotCalls += 1;
+ if (snapshotCalls === 1) {
+ return makeSnapshot(rt.currentUrl, [
+ {
+ id: 5,
+ role: 'textbox',
+ text: 'optional field 3',
+ name: 'optional_field3_q',
+ clickable: true,
+ },
+ ]);
+ }
+ return makeSnapshot(rt.currentUrl, [
+ {
+ id: 5,
+ role: 'textbox',
+ text: 'optional field 3',
+ name: 'optional_field3_q',
+ clickable: true,
+ },
+ { id: 8, role: 'searchbox', text: 'Search', name: 'q', clickable: true },
+ ]);
+ },
+ {
+ onPressKey: (_key, rt) => {
+ rt.currentUrl = 'https://example.test/search?q=Space+images';
+ },
+ }
+ );
+
+ const agent = new PlannerExecutorAgent({
+ planner,
+ executor,
+ config: {
+ retry: { verifyTimeoutMs: 20, verifyPollMs: 1, maxReplans: 0, executorRepairAttempts: 1 },
+ recovery: { enabled: false },
+ },
+ });
+
+ const result = await agent.runStepwise(runtime, {
+ task: 'Use advanced search for Space images and output the capture dates and titles of the first 10 images listed.',
+ });
+
+ expect(result.success).toBe(true);
+ expect(runtime.typeCalls).toEqual([{ elementId: 8, text: 'Space images' }]);
+ expect(runtime.typeCalls).not.toContainEqual({ elementId: 5, text: 'Space images' });
+ expect(runtime.keyCalls).toEqual(['Enter']);
+ });
+
+ it('treats duplicate-value FILL_FORM on search pages as a single primary search submission', async () => {
+ const planner = new ProviderStub([
+ JSON.stringify({
+ action: 'FILL_FORM',
+ fields: [
+ { label: 'optional field 3', value: 'Space images' },
+ { label: 'optional field 4', value: 'Space images' },
+ ],
+ verify: [{ predicate: 'url_contains', args: ['Space'] }],
+ }),
+ JSON.stringify({ action: 'DONE', reasoning: 'search submitted once' }),
+ ]);
+ const executor = new ProviderStub();
+ const runtime = new RuntimeStub(
+ 'https://example.test/advancedsearch',
+ rt =>
+ makeSnapshot(rt.currentUrl, [
+ { id: 1, role: 'searchbox', text: 'Search', name: 'q', clickable: true },
+ {
+ id: 2,
+ role: 'textbox',
+ text: 'optional field 3',
+ name: 'optional_field3_q',
+ clickable: true,
+ },
+ {
+ id: 3,
+ role: 'textbox',
+ text: 'optional field 4',
+ name: 'optional_field4_q',
+ clickable: true,
+ },
+ ]),
+ {
+ onPressKey: (_key, rt) => {
+ rt.currentUrl = 'https://example.test/search?q=Space+images';
+ },
+ }
+ );
+
+ const agent = new PlannerExecutorAgent({
+ planner,
+ executor,
+ config: {
+ retry: { verifyTimeoutMs: 20, verifyPollMs: 1, maxReplans: 0, executorRepairAttempts: 1 },
+ recovery: { enabled: false },
+ },
+ });
+
+ const result = await agent.runStepwise(runtime, {
+ task: 'Use advanced search for Space images and output the capture dates and titles of the first 10 images listed.',
+ });
+
+ expect(result.success).toBe(true);
+ expect(runtime.typeCalls).toEqual([{ elementId: 1, text: 'Space images' }]);
+ expect(runtime.keyCalls).toEqual(['Enter']);
+ expect(result.stepOutcomes[0].actionTaken).toBe('FILL_FORM(TYPE(1, "Space images") -> ENTER)');
+ });
+
+ it('falls back to snapshot content when markdown extraction is unavailable', async () => {
+ const planner = new ProviderStub([
+ JSON.stringify({
+ action: 'EXTRACT',
+ goal: 'capture dates and titles of the first 10 images',
+ verify: [],
+ }),
+ ]);
+ const executor = new ProviderStub(['2020-01-01T00:00:00Z - Space photo']);
+ const runtime = new NullMarkdownRuntimeStub('https://example.test/search', rt =>
+ makeSnapshot(rt.currentUrl, [
+ { id: 1, role: 'article', text: '2020-01-01T00:00:00Z - Space photo' },
+ ])
+ );
+
+ const agent = new PlannerExecutorAgent({
+ planner,
+ executor,
+ config: {
+ retry: { verifyTimeoutMs: 20, verifyPollMs: 1, maxReplans: 0, executorRepairAttempts: 1 },
+ recovery: { enabled: false },
+ },
+ });
+
+ const result = await agent.runStepwise(runtime, {
+ task: 'Output the capture dates and titles of the first 10 images listed.',
+ });
+
+ expect(result.success).toBe(true);
+ expect(result.stepOutcomes[0].status).toBe(StepStatus.SUCCESS);
+ expect(result.stepOutcomes[0].extractedData).toEqual({
+ text: '2020-01-01T00:00:00Z - Space photo',
+ query: 'capture dates and titles of the first 10 images',
+ });
+ });
+
+ it('falls back to snapshot content when markdown extraction returns not found', async () => {
+ const planner = new ProviderStub([
+ JSON.stringify({
+ action: 'EXTRACT',
+ goal: 'capture dates and titles of the first 10 images',
+ verify: [],
+ }),
+ ]);
+ const executor = new ProviderStub([
+ 'NOT_FOUND',
+ '2024-01-01T00:00:00Z - Space image one\n2024-01-02T00:00:00Z - Space image two',
+ ]);
+ const runtime = new MarkdownRuntimeStub(
+ 'https://example.test/search?query=(Space%20images)%20AND%20mediatype:(image)',
+ rt =>
+ makeSnapshot(rt.currentUrl, [
+ { id: 1, role: 'link', text: 'Space image one' },
+ { id: 2, role: 'text', text: '2024-01-01T00:00:00Z' },
+ { id: 3, role: 'link', text: 'Space image two' },
+ { id: 4, role: 'text', text: '2024-01-02T00:00:00Z' },
+ ]),
+ 'Search results page'
+ );
+
+ const agent = new PlannerExecutorAgent({
+ planner,
+ executor,
+ config: {
+ retry: { verifyTimeoutMs: 20, verifyPollMs: 1, maxReplans: 0, executorRepairAttempts: 1 },
+ recovery: { enabled: false },
+ },
+ });
+
+ const result = await agent.runStepwise(runtime, {
+ task: 'Use advanced search for Space images and output the capture dates and titles of the first 10 images listed.',
+ });
+
+ expect(result.success).toBe(true);
+ expect(executor.calls).toHaveLength(2);
+ expect(executor.calls[1]?.user).toContain('Space image one');
+ expect(result.stepOutcomes[0].extractedData).toEqual({
+ text: '2024-01-01T00:00:00Z - Space image one\n2024-01-02T00:00:00Z - Space image two',
+ query: 'capture dates and titles of the first 10 images',
+ });
+ });
+
+ it('uses vision extraction when text context is unavailable but a screenshot is present', async () => {
+ const planner = new ProviderStub([
+ JSON.stringify({
+ action: 'EXTRACT',
+ goal: 'capture dates and titles of the first 10 images',
+ verify: [],
+ }),
+ ]);
+ const executor = new ProviderStub(['2024-01-01T00:00:00Z - Space image one'], {
+ vision: true,
+ });
+ const runtime = new NullMarkdownRuntimeStub(
+ 'https://example.test/search?query=(Space%20images)%20AND%20mediatype:(image)',
+ rt => makeSnapshot(rt.currentUrl, [], { screenshot: 'base64-screenshot' })
+ );
+
+ const agent = new PlannerExecutorAgent({
+ planner,
+ executor,
+ config: {
+ retry: { verifyTimeoutMs: 20, verifyPollMs: 1, maxReplans: 0, executorRepairAttempts: 1 },
+ recovery: { enabled: false },
+ },
+ });
+
+ const result = await agent.runStepwise(runtime, {
+ task: 'Use advanced search for Space images and output the capture dates and titles of the first 10 images listed.',
+ });
+
+ expect(result.success).toBe(true);
+ expect(executor.imageCalls).toHaveLength(1);
+ expect(executor.imageCalls[0]?.imageBase64).toBe('base64-screenshot');
+ expect(result.stepOutcomes[0].usedVision).toBe(true);
+ expect(result.stepOutcomes[0].extractedData).toEqual({
+ text: '2024-01-01T00:00:00Z - Space image one',
+ query: 'capture dates and titles of the first 10 images',
+ });
+ });
+
+ it('treats page-chrome-only markdown as unavailable and uses vision extraction', async () => {
+ const planner = new ProviderStub([
+ JSON.stringify({
+ action: 'EXTRACT',
+ goal: 'capture dates and titles of the first 10 images',
+ verify: [],
+ }),
+ ]);
+ const executor = new ProviderStub(['2024-01-01T00:00:00Z - Space image one'], {
+ vision: true,
+ });
+ const runtime = new MarkdownRuntimeStub(
+ 'https://example.test/search?query=(Space%20images)%20AND%20mediatype:(image)',
+ rt => makeSnapshot(rt.currentUrl, [], { screenshot: 'base64-screenshot' }),
+ 'Skip to main content (https://example.test/#maincontent)'
+ );
+
+ const agent = new PlannerExecutorAgent({
+ planner,
+ executor,
+ config: {
+ retry: { verifyTimeoutMs: 20, verifyPollMs: 1, maxReplans: 0, executorRepairAttempts: 1 },
+ recovery: { enabled: false },
+ },
+ });
+
+ const result = await agent.runStepwise(runtime, {
+ task: 'Use advanced search for Space images and output the capture dates and titles of the first 10 images listed.',
+ });
+
+ expect(result.success).toBe(true);
+ expect(executor.calls).toHaveLength(0);
+ expect(executor.imageCalls).toHaveLength(1);
+ expect(result.stepOutcomes[0].usedVision).toBe(true);
+ expect(result.stepOutcomes[0].extractedData).toEqual({
+ text: '2024-01-01T00:00:00Z - Space image one',
+ query: 'capture dates and titles of the first 10 images',
+ });
+ });
+
+ it('coerces SCROLL_AND_COUNT into EXTRACT for first-N extraction tasks without scrolling', async () => {
+ const planner = new ProviderStub([
+ JSON.stringify({
+ action: 'SCROLL_AND_COUNT',
+ countTarget: 'image listings',
+ goal: 'Count image listings',
+ verify: [],
+ }),
+ ]);
+ const executor = new ProviderStub(['2024-01-01T00:00:00Z - Space image one']);
+ const runtime = new RuntimeStub(
+ 'https://example.test/search?query=(Space%20images)%20AND%20mediatype:(image)',
+ rt => makeSnapshot(rt.currentUrl, [{ id: 1, role: 'link', text: 'Space image one' }])
+ );
+
+ const agent = new PlannerExecutorAgent({
+ planner,
+ executor,
+ config: {
+ retry: { verifyTimeoutMs: 20, verifyPollMs: 1, maxReplans: 0, executorRepairAttempts: 1 },
+ recovery: { enabled: false },
+ },
+ });
+
+ const result = await agent.runStepwise(runtime, {
+ task: 'Use advanced search for Space images and output the capture dates and titles of the first 10 images listed.',
+ });
+
+ expect(result.success).toBe(true);
+ expect(result.stepOutcomes[0].actionTaken).toBe('EXTRACT');
+ expect(result.stepOutcomes[0].extractedData).toEqual({
+ text: '2024-01-01T00:00:00Z - Space image one',
+ query:
+ 'Use advanced search for Space images and output the capture dates and titles of the first 10 images listed.',
+ });
+ expect(await runtime.getCurrentUrl()).toBe(
+ 'https://example.test/search?query=(Space%20images)%20AND%20mediatype:(image)'
+ );
+ });
+
+ it('does not rewrite site URLs during extraction', async () => {
+ const currentUrl =
+ 'https://archive.org/advancedsearch.php?q=Space+images&fl%5B%5D=identifier&rows=50&page=1&output=json&callback=callback&save=yes';
+ const planner = new ProviderStub([
+ JSON.stringify({
+ action: 'EXTRACT',
+ goal: 'capture dates and titles of the first 10 images',
+ verify: [],
+ }),
+ ]);
+ const executor = new ProviderStub(['2020-01-01T00:00:00Z - Space photo']);
+ const runtime = new MarkdownRuntimeStub(
+ currentUrl,
+ rt => makeSnapshot(rt.currentUrl, []),
+ 'callback({"response":{"docs":[{"date":"2020-01-01T00:00:00Z","title":"Space photo","mediatype":"image"}]}})'
+ );
+
+ const agent = new PlannerExecutorAgent({
+ planner,
+ executor,
+ config: {
+ retry: { verifyTimeoutMs: 20, verifyPollMs: 1, maxReplans: 0, executorRepairAttempts: 1 },
+ recovery: { enabled: false },
+ },
+ });
+
+ const result = await agent.runStepwise(runtime, {
+ task: 'Use advanced search for Space images on archive.org and output the capture dates and titles of the first 10 images listed.',
+ });
+
+ expect(result.success).toBe(true);
+ expect(result.stepOutcomes).toHaveLength(1);
+ expect(result.stepOutcomes[0].status).toBe(StepStatus.SUCCESS);
+ expect(runtime.gotoCalls).toHaveLength(0);
+ expect(result.stepOutcomes[0].urlAfter).toBe(currentUrl);
+ });
+
+ it('preserves the full task constraints when planner gives a shortened extract goal', async () => {
+ const planner = new ProviderStub([
+ JSON.stringify({
+ action: 'EXTRACT',
+ goal: 'capture dates and titles',
+ verify: [],
+ }),
+ ]);
+ const executor = new ProviderStub(['2020-01-01T00:00:00Z - Space photo']);
+ const runtime = new MarkdownRuntimeStub(
+ 'https://example.test/search',
+ rt => makeSnapshot(rt.currentUrl, []),
+ '{"response":{"docs":[{"date":"2020-01-01T00:00:00Z","title":"Space photo"}]}}'
+ );
+
+ const agent = new PlannerExecutorAgent({
+ planner,
+ executor,
+ config: {
+ retry: { verifyTimeoutMs: 20, verifyPollMs: 1, maxReplans: 0, executorRepairAttempts: 1 },
+ recovery: { enabled: false },
+ },
+ });
+
+ const result = await agent.runStepwise(runtime, {
+ task: 'Use advanced search for Space images and output the capture dates and titles of the first 10 images listed.',
+ });
+
+ expect(result.success).toBe(true);
+ expect(executor.calls[0]?.user).toContain('first 10 images listed');
+ expect(executor.calls[0]?.user).toContain('If the request asks for first/top/last N items');
+ expect(result.stepOutcomes[0].extractedData).toEqual({
+ text: '2020-01-01T00:00:00Z - Space photo',
+ query:
+ 'capture dates and titles\nOverall task: Use advanced search for Space images and output the capture dates and titles of the first 10 images listed.',
+ });
+ });
+
+ it('enforces first N extraction limits when the extractor returns extra rows', async () => {
+ const planner = new ProviderStub([
+ JSON.stringify({
+ action: 'EXTRACT',
+ goal: 'capture dates and titles',
+ verify: [],
+ }),
+ ]);
+ const executor = new ProviderStub([
+ [
+ '1994-04-01T00:00:00Z - Space Radar Image',
+ '1957-05-21T00:00:00Z - Moon Rocket Cantata',
+ '2022-12-02T00:00:00Z - Deltadel Ebro',
+ '2000-12-16T00:00:00Z - Ganymede and Europa',
+ '2001-12-09T00:00:00Z - Praxidike',
+ '2006-01-01T00:00:00Z - photo-jsc2006e15538',
+ '2008-11-13T00:00:00Z - Young Scientist Challenge',
+ '2003-04-03T00:00:00Z - Dusty Star',
+ '1967-05-08T00:00:00Z - Recruiting Brochure',
+ '1967-06-22T00:00:00Z - Breadboard Checkout',
+ '2003-01-01T00:00:00Z - extra row',
+ ].join('\n'),
+ ]);
+ const runtime = new MarkdownRuntimeStub(
+ 'https://example.test/search',
+ rt => makeSnapshot(rt.currentUrl, []),
+ '{"response":{"docs":[]}}'
+ );
+
+ const agent = new PlannerExecutorAgent({
+ planner,
+ executor,
+ config: {
+ retry: { verifyTimeoutMs: 20, verifyPollMs: 1, maxReplans: 0, executorRepairAttempts: 1 },
+ recovery: { enabled: false },
+ },
+ });
+
+ const result = await agent.runStepwise(runtime, {
+ task: 'Use advanced search for Space images and output the capture dates and titles of the first 10 images listed.',
+ });
+
+ const text = (result.stepOutcomes[0].extractedData as { text: string }).text;
+ expect(result.success).toBe(true);
+ expect(text.split('\n')).toHaveLength(10);
+ expect(text).not.toContain('extra row');
+ });
+
it('identifies search-like TYPE_AND_SUBMIT actions and rejects unrelated URL changes', () => {
const searchbox: SnapshotElement = {
id: 1,