Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 58 additions & 6 deletions extension/src/background/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import {
performElementSwipe,
performElementDragAndDrop,
performElementSetSlider,
performElementUpload,
performKeyboardInput,
performElementSelect,
replayHoverState,
Expand Down Expand Up @@ -295,6 +296,7 @@ const IN_PAGE_HIGHLIGHT_COLORS: Record<string, { border: string; bg: string }> =
selectable: { border: '#FF6B6B', bg: 'rgba(255,107,107,0.7)' },
draggable: { border: '#FF6600', bg: 'rgba(255,102,0,0.7)' },
droppable: { border: '#339966', bg: 'rgba(51,153,102,0.7)' },
uploadable: { border: '#AA66FF', bg: 'rgba(170,102,255,0.7)' },
any: { border: '#00CCCC', bg: 'rgba(0,204,204,0.7)' },
};

Expand All @@ -307,7 +309,10 @@ function buildInPageHighlightScript(elements: InteractiveElement[]): string {
IN_PAGE_HIGHLIGHT_COLORS[el.type] || IN_PAGE_HIGHLIGHT_COLORS.clickable;
return {
id: el.id,
selector: el.selector,
// The overlay script renders the box on `selector`. For uploadable
// file inputs that are display:none, the visible anchor's selector
// lets the overlay land on something the user can actually see.
selector: el.overlaySelector || el.selector,
borderColor: colors.border,
bgColor: colors.bg,
labelPos: el.labelPosition || 'above',
Expand Down Expand Up @@ -578,7 +583,11 @@ async function captureHighlightedPageState(
: '';
const detectedViewport = detectionResult.result.value.viewport || {};
const layoutStability = detectionResult.result.value.layoutStability;
const inPagePerf = detectionResult.result.value._perf || {};
const highlightTraceStart = Date.now();
let paginationMs = 0;
let screenshotMs = 0;
let consistencyMs = 0;
const detectedViewportWidth =
typeof detectedViewport.width === 'number' ? detectedViewport.width : 0;
const detectedViewportHeight =
Expand Down Expand Up @@ -651,8 +660,9 @@ async function captureHighlightedPageState(
console.log(
`📄 [${logLabel}] Page ${page}/${totalPages}, showing ${paginatedElements.length} of ${filteredElements.length} elements`,
);
paginationMs = Date.now() - paginationBuildStart;
console.log(
`⏱️ [HighlightTrace] background pagination build-pages=${Date.now() - paginationBuildStart}ms (page=${page}, viewport=${detectedViewportWidth}x${detectedViewportHeight})`,
`⏱️ [HighlightTrace] background pagination build-pages=${paginationMs}ms (page=${page}, viewport=${detectedViewportWidth}x${detectedViewportHeight})`,
);
}

Expand Down Expand Up @@ -697,9 +707,8 @@ async function captureHighlightedPageState(
console.log(
`📸 [${logLabel}] Screenshot captured (with in-page highlights), size: ${screenshotResult.imageData.length} bytes`,
);
console.log(
`⏱️ [HighlightTrace] background screenshot ${Date.now() - screenshotStart}ms`,
);
screenshotMs = Date.now() - screenshotStart;
console.log(`⏱️ [HighlightTrace] background screenshot ${screenshotMs}ms`);

// Apply bboxes returned from the highlight injection script
const preCaptureData = screenshotResult.preCaptureResult;
Expand Down Expand Up @@ -761,8 +770,9 @@ async function captureHighlightedPageState(
})),
currentConsistencySamples,
);
consistencyMs = Date.now() - consistencyCheckStart;
console.log(
`⏱️ [HighlightTrace] background consistency-check ${Date.now() - consistencyCheckStart}ms (checked=${highlightConsistency.checkedCount}, matched=${highlightConsistency.matchedCount}, missing=${highlightConsistency.missingCount}, shifted=${highlightConsistency.shiftedCount}, maxCenterShift=${highlightConsistency.maxCenterShift}, maxSizeDelta=${highlightConsistency.maxSizeDelta}, retry=${highlightConsistency.shouldRetry})`,
`⏱️ [HighlightTrace] background consistency-check ${consistencyMs}ms (checked=${highlightConsistency.checkedCount}, matched=${highlightConsistency.matchedCount}, missing=${highlightConsistency.missingCount}, shifted=${highlightConsistency.shiftedCount}, maxCenterShift=${highlightConsistency.maxCenterShift}, maxSizeDelta=${highlightConsistency.maxSizeDelta}, retry=${highlightConsistency.shouldRetry})`,
);
const repeatedDrift = isRepeatedHighlightDrift(
highlightConsistency,
Expand Down Expand Up @@ -836,6 +846,15 @@ async function captureHighlightedPageState(
page: currentPage,
pageState,
readinessReasons,
_perf: {
scan_ms:
typeof inPagePerf.scan_ms === 'number' ? inPagePerf.scan_ms : 0,
scan_stats: inPagePerf.scan_stats || {},
scan_times: inPagePerf.scan_times || {},
pagination_ms: paginationMs,
screenshot_ms: screenshotMs,
consistency_ms: consistencyMs,
},
...buildScreenshotPayload(compressedScreenshotResult),
};
}
Expand Down Expand Up @@ -953,6 +972,7 @@ function isHeavyBrowserCommand(data: any): boolean {
case 'set_slider_value':
case 'keyboard_input':
case 'select_element':
case 'upload_file':
case 'handle_dialog':
return true;
case 'tab':
Expand Down Expand Up @@ -2328,6 +2348,38 @@ async function handleCommand(command: Command): Promise<CommandResponse> {
};
}

case 'upload_file': {
if (!command.conversation_id)
throw new Error('conversation_id required');
const uploadTabId = command.tab_id;
if (uploadTabId === undefined || uploadTabId === null)
throw new Error('tab_id is required');
if (!command.file_path || typeof command.file_path !== 'string')
throw new Error('file_path is required for upload_file');

const uploadResult = await performElementUpload(
command.conversation_id,
command.element_id,
uploadTabId,
command.file_path,
);
const uploadPageState = await captureDefaultHighlightedPageState({
tabId: uploadTabId,
conversationId: command.conversation_id,
logLabel: 'UploadFile',
});

return {
success: uploadResult.success,
data: {
...uploadResult,
...uploadPageState,
},
error: uploadResult.error,
timestamp: Date.now(),
};
}

case 'keyboard_input': {
if (!command.conversation_id)
throw new Error('conversation_id required');
Expand Down
114 changes: 114 additions & 0 deletions extension/src/commands/element-actions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import type { ElementActionResult } from '../types';
* - Handles dialog events using the same pattern as javascript.ts
*/

import { CdpCommander } from './cdp-commander';
import { buildElementCacheMissMessage, elementCache } from './element-cache';
import { executeJavaScript, type JavaScriptResult } from './javascript';
import { buildHitTestVisibilityHelpersScript } from '../utils/hit-test-visibility';
Expand Down Expand Up @@ -516,6 +517,15 @@ export interface HoverResult extends ElementActionResult {
error?: string;
}

/**
* Result type for file upload operation
*/
export interface UploadResult extends ElementActionResult {
uploaded: boolean;
staleElement?: boolean;
error?: string;
}

/**
* Result type for element select operation
*/
Expand Down Expand Up @@ -4096,6 +4106,109 @@ export async function performElementSelect(
return result;
}

/**
* Attach a local file (by absolute path on the host) to an <input type="file">
* via CDP `DOM.setFileInputFiles`. This bypasses the native OS file picker —
* attempting to click the input would pop the picker in front of the user,
* which the agent cannot drive.
*
* The server validates the path before dispatching, so here we only need to
* resolve the cached selector to a CDP `nodeId` and invoke setFileInputFiles.
*/
export async function performElementUpload(
conversationId: string,
elementId: string,
tabId: number,
filePath: string,
): Promise<UploadResult> {
console.log(
`📎 [ElementUpload] Uploading "${filePath}" to element ${elementId} on tab ${tabId}`,
);

const cachedElement = elementCache.getElementById(
conversationId,
tabId,
elementId,
);
if (!cachedElement) {
console.log(`❌ [ElementUpload] Element ${elementId} not found in cache`);
return {
success: false,
...buildResolvedElementResultFields(elementId, elementId),
uploaded: false,
staleElement: false,
error: buildElementCacheMissMessage({
conversationId,
tabId,
elementId,
}),
};
}

const element = cachedElement.element;
const resolvedElementFields = buildResolvedElementResultFields(
cachedElement.requestedElementId,
cachedElement.resolvedElementId,
);
const cdp = new CdpCommander(tabId);

try {
// Resolve selector → CDP nodeId. DOM.getDocument returns the document root
// node; DOM.querySelector is scoped to that root and accepts any CSS
// selector. A nodeId of 0 indicates no match (selector went stale).
const doc = (await cdp.sendCommand('DOM.getDocument', { depth: 0 })) as {
root?: { nodeId: number };
};
if (!doc || !doc.root || typeof doc.root.nodeId !== 'number') {
return {
success: false,
...resolvedElementFields,
uploaded: false,
error: 'CDP DOM.getDocument returned no root node',
};
}

const queryResult = (await cdp.sendCommand('DOM.querySelector', {
nodeId: doc.root.nodeId,
selector: element.selector,
})) as { nodeId?: number };

if (!queryResult || !queryResult.nodeId) {
return {
success: false,
...resolvedElementFields,
uploaded: false,
staleElement: true,
error: `Selector "${element.selector}" no longer resolves to a DOM node (element became stale).`,
};
}

await cdp.sendCommand('DOM.setFileInputFiles', {
nodeId: queryResult.nodeId,
files: [filePath],
});

console.log(
`✅ [ElementUpload] DOM.setFileInputFiles succeeded for ${elementId} (${filePath})`,
);

return {
success: true,
...resolvedElementFields,
uploaded: true,
};
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
console.error(`❌ [ElementUpload] failed: ${message}`);
return {
success: false,
...resolvedElementFields,
uploaded: false,
error: message,
};
}
}

/**
* Export element actions module
*/
Expand All @@ -4105,4 +4218,5 @@ export const elementActions = {
performElementScroll,
performKeyboardInput,
performElementSelect,
performElementUpload,
};
Loading
Loading