diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandExecution.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandExecution.swift index fba503ef2..bdc47dd0f 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandExecution.swift +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandExecution.swift @@ -449,24 +449,45 @@ extension RunnerTests { typeIntoTarget(text) } return Response(ok: true, data: DataPayload(message: "typed")) + case .interactionFrame: + let frame = resolvedTouchReferenceFrame(app: activeApp, appFrame: activeApp.frame) + return Response( + ok: true, + data: DataPayload( + x: frame.minX, + y: frame.minY, + referenceWidth: frame.width, + referenceHeight: frame.height + ) + ) case .swipe: guard let direction = command.direction else { return Response(ok: false, error: ErrorPayload(message: "swipe requires direction")) } - let referenceFrame = resolvedGestureReferenceFrame(app: activeApp) + var executedFrame: DragVisualizationFrame? let timing = measureGesture { withTemporaryScrollIdleTimeoutIfSupported(activeApp) { - swipe(app: activeApp, direction: direction) + executedFrame = swipe( + app: activeApp, + direction: direction + ) } } + guard let dragFrame = executedFrame else { + return Response(ok: false, error: ErrorPayload(message: "swipe is only supported on tvOS")) + } return Response( ok: true, data: DataPayload( message: "swiped", gestureStartUptimeMs: timing.gestureStartUptimeMs, gestureEndUptimeMs: timing.gestureEndUptimeMs, - referenceWidth: referenceFrame.referenceWidth, - referenceHeight: referenceFrame.referenceHeight + x: dragFrame.x, + y: dragFrame.y, + x2: dragFrame.x2, + y2: dragFrame.y2, + referenceWidth: dragFrame.referenceWidth, + referenceHeight: dragFrame.referenceHeight ) ) case .findText: diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Interaction.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Interaction.swift index 83ee916e0..b2399d4ab 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Interaction.swift +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Interaction.swift @@ -17,11 +17,6 @@ extension RunnerTests { let referenceHeight: Double } - struct GestureReferenceFrame { - let referenceWidth: Double - let referenceHeight: Double - } - // MARK: - Navigation Gestures func tapInAppBackControl(app: XCUIApplication) -> Bool { @@ -376,7 +371,7 @@ extension RunnerTests { ) } - private func resolvedTouchReferenceFrame(app: XCUIApplication, appFrame: CGRect) -> CGRect { + func resolvedTouchReferenceFrame(app: XCUIApplication, appFrame: CGRect) -> CGRect { let window = app.windows.firstMatch let windowFrame = window.frame if window.exists && !windowFrame.isEmpty { @@ -388,14 +383,6 @@ extension RunnerTests { return CGRect(x: 0, y: 0, width: 0, height: 0) } - func resolvedGestureReferenceFrame(app: XCUIApplication) -> GestureReferenceFrame { - let frame = resolvedTouchReferenceFrame(app: app, appFrame: app.frame) - return GestureReferenceFrame( - referenceWidth: frame.width, - referenceHeight: frame.height - ) - } - func runSeries(count: Int, pauseMs: Double, operation: (Int) -> Void) { let total = max(count, 1) let pause = max(pauseMs, 0) @@ -407,39 +394,36 @@ extension RunnerTests { } } - func swipe(app: XCUIApplication, direction: SwipeDirection) { + func swipe(app: XCUIApplication, direction: String) -> DragVisualizationFrame? { if performTvRemoteSwipeIfAvailable(direction: direction) { - return - } - let target = app.windows.firstMatch.exists ? app.windows.firstMatch : app - let start = target.coordinate(withNormalizedOffset: CGVector(dx: 0.5, dy: 0.2)) - let end = target.coordinate(withNormalizedOffset: CGVector(dx: 0.5, dy: 0.8)) - let left = target.coordinate(withNormalizedOffset: CGVector(dx: 0.2, dy: 0.5)) - let right = target.coordinate(withNormalizedOffset: CGVector(dx: 0.8, dy: 0.5)) - - switch direction { - case .up: - end.press(forDuration: 0.1, thenDragTo: start) - case .down: - start.press(forDuration: 0.1, thenDragTo: end) - case .left: - right.press(forDuration: 0.1, thenDragTo: left) - case .right: - left.press(forDuration: 0.1, thenDragTo: right) + let frame = resolvedTouchReferenceFrame(app: app, appFrame: app.frame) + let midX = frame.midX + let midY = frame.midY + return DragVisualizationFrame( + x: midX, + y: midY, + x2: midX, + y2: midY, + referenceWidth: frame.width, + referenceHeight: frame.height + ) } + return nil } - private func performTvRemoteSwipeIfAvailable(direction: SwipeDirection) -> Bool { + private func performTvRemoteSwipeIfAvailable(direction: String) -> Bool { #if os(tvOS) switch direction { - case .up: + case "up": XCUIRemote.shared.press(.up) - case .down: + case "down": XCUIRemote.shared.press(.down) - case .left: + case "left": XCUIRemote.shared.press(.left) - case .right: + case "right": XCUIRemote.shared.press(.right) + default: + return false } return true #else @@ -515,5 +499,4 @@ extension RunnerTests { let element = app.descendants(matching: .any).matching(predicate).firstMatch return element.exists ? element : nil } - } diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Lifecycle.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Lifecycle.swift index 0acd912ae..f260cf56e 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Lifecycle.swift +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Lifecycle.swift @@ -152,7 +152,7 @@ extension RunnerTests { func isReadOnlyCommand(_ command: Command) -> Bool { switch command.command { - case .findText, .readText, .snapshot, .screenshot: + case .interactionFrame, .findText, .readText, .snapshot, .screenshot: return true case .alert: let action = (command.action ?? "get").lowercased() diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Models.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Models.swift index 01d16a9dd..002298d5c 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Models.swift +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Models.swift @@ -5,6 +5,7 @@ enum CommandType: String, Codable { case mouseClick case tapSeries case longPress + case interactionFrame case drag case dragSeries case type @@ -27,13 +28,6 @@ enum CommandType: String, Codable { case shutdown } -enum SwipeDirection: String, Codable { - case up - case down - case left - case right -} - struct Command: Codable { let command: CommandType let appBundleId: String? @@ -52,7 +46,7 @@ struct Command: Codable { let x2: Double? let y2: Double? let durationMs: Double? - let direction: SwipeDirection? + let direction: String? let scale: Double? let outPath: String? let fps: Int? diff --git a/src/core/__tests__/dispatch-scroll.test.ts b/src/core/__tests__/dispatch-scroll.test.ts new file mode 100644 index 000000000..2662cb6f7 --- /dev/null +++ b/src/core/__tests__/dispatch-scroll.test.ts @@ -0,0 +1,23 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import { dispatchCommand } from '../dispatch.ts'; +import { AppError } from '../../utils/errors.ts'; +import type { DeviceInfo } from '../../utils/device.ts'; + +const IOS_DEVICE: DeviceInfo = { + platform: 'ios', + id: 'sim-1', + name: 'iPhone 17 Pro', + kind: 'simulator', + booted: true, +}; + +test('dispatch scroll rejects mixing amount and --pixels', async () => { + await assert.rejects( + () => dispatchCommand(IOS_DEVICE, 'scroll', ['down', '0.4'], undefined, { pixels: 240 }), + (error: unknown) => + error instanceof AppError && + error.code === 'INVALID_ARGS' && + /either a relative amount or --pixels/i.test(error.message), + ); +}); diff --git a/src/core/__tests__/scroll-gesture.test.ts b/src/core/__tests__/scroll-gesture.test.ts new file mode 100644 index 000000000..297b71a4d --- /dev/null +++ b/src/core/__tests__/scroll-gesture.test.ts @@ -0,0 +1,56 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import { AppError } from '../../utils/errors.ts'; +import { buildScrollGesturePlan } from '../scroll-gesture.ts'; + +test('buildScrollGesturePlan maps relative amount to viewport travel', () => { + const plan = buildScrollGesturePlan({ + direction: 'down', + amount: 0.5, + referenceWidth: 400, + referenceHeight: 800, + }); + + assert.deepEqual(plan, { + direction: 'down', + x1: 200, + y1: 600, + x2: 200, + y2: 200, + referenceWidth: 400, + referenceHeight: 800, + amount: 0.5, + pixels: 400, + }); +}); + +test('buildScrollGesturePlan clamps pixel travel to the safe gesture band', () => { + const plan = buildScrollGesturePlan({ + direction: 'right', + pixels: 500, + referenceWidth: 300, + referenceHeight: 600, + }); + + assert.equal(plan.x1, 285); + assert.equal(plan.x2, 15); + assert.equal(plan.y1, 300); + assert.equal(plan.y2, 300); + assert.equal(plan.pixels, 270); +}); + +test('buildScrollGesturePlan rejects invalid amounts', () => { + assert.throws( + () => + buildScrollGesturePlan({ + direction: 'down', + amount: 0, + referenceWidth: 400, + referenceHeight: 800, + }), + (error: unknown) => + error instanceof AppError && + error.code === 'INVALID_ARGS' && + /amount must be a positive number/i.test(error.message), + ); +}); diff --git a/src/core/dispatch.ts b/src/core/dispatch.ts index aeffc2019..2483da29f 100644 --- a/src/core/dispatch.ts +++ b/src/core/dispatch.ts @@ -21,6 +21,7 @@ import type { RawSnapshotNode } from '../utils/snapshot.ts'; import type { CliFlags } from '../utils/command-schema.ts'; import { emitDiagnostic, withDiagnosticTimer } from '../utils/diagnostics.ts'; import { successText, withSuccessText } from '../utils/success-text.ts'; +import { parseScrollDirection } from './scroll-gesture.ts'; import { requireIntInRange, clampIosSwipeDuration, @@ -67,6 +68,7 @@ export async function dispatchCommand( delayMs?: number; holdMs?: number; jitterPx?: number; + pixels?: number; doubleTap?: boolean; clickButton?: 'primary' | 'secondary' | 'middle'; backMode?: 'in-app' | 'system'; @@ -381,13 +383,33 @@ export async function dispatchCommand( return { x, y, text, delayMs, ...successText(formatTextLengthMessage('Filled', text)) }; } case 'scroll': { - const direction = positionals[0]; + const directionInput = positionals[0]; const amount = positionals[1] ? Number(positionals[1]) : undefined; - if (!direction) throw new AppError('INVALID_ARGS', 'scroll requires direction'); - const interactionResult = await interactor.scroll(direction, amount); + const pixels = context?.pixels; + if (!directionInput) throw new AppError('INVALID_ARGS', 'scroll requires direction'); + if (amount !== undefined && !Number.isFinite(amount)) { + throw new AppError('INVALID_ARGS', 'scroll amount must be a number'); + } + if (amount !== undefined && pixels !== undefined) { + throw new AppError( + 'INVALID_ARGS', + 'scroll accepts either a relative amount or --pixels, not both', + ); + } + const direction = parseScrollDirection(directionInput); + const interactionResult = await interactor.scroll(direction, { amount, pixels }); return withSuccessText( - { direction, amount, ...interactionResult }, - amount !== undefined ? `Scrolled ${direction} by ${amount}` : `Scrolled ${direction}`, + { + direction, + ...(amount !== undefined ? { amount } : {}), + ...(pixels !== undefined ? { pixels } : {}), + ...interactionResult, + }, + pixels !== undefined + ? `Scrolled ${direction} by ${pixels}px` + : amount !== undefined + ? `Scrolled ${direction} by ${amount}` + : `Scrolled ${direction}`, ); } case 'scrollintoview': { diff --git a/src/core/scroll-gesture.ts b/src/core/scroll-gesture.ts new file mode 100644 index 000000000..8e19fc7b2 --- /dev/null +++ b/src/core/scroll-gesture.ts @@ -0,0 +1,92 @@ +import { AppError } from '../utils/errors.ts'; + +export type ScrollDirection = 'up' | 'down' | 'left' | 'right'; + +export type ScrollGestureOptions = { + direction: ScrollDirection; + amount?: number; + pixels?: number; + referenceWidth: number; + referenceHeight: number; +}; + +export type ScrollGesturePlan = { + direction: ScrollDirection; + x1: number; + y1: number; + x2: number; + y2: number; + referenceWidth: number; + referenceHeight: number; + amount?: number; + pixels: number; +}; + +const DEFAULT_SCROLL_AMOUNT = 0.6; +const DEFAULT_EDGE_PADDING_FRACTION = 0.05; + +export function buildScrollGesturePlan(options: ScrollGestureOptions): ScrollGesturePlan { + const direction = options.direction; + const axisLength = + direction === 'up' || direction === 'down' ? options.referenceHeight : options.referenceWidth; + const requestedAmount = resolveRequestedAmount(options.amount); + const requestedPixels = + options.pixels !== undefined + ? normalizeRequestedPixels(options.pixels) + : Math.round(axisLength * requestedAmount); + const edgePadding = Math.max(1, Math.round(axisLength * DEFAULT_EDGE_PADDING_FRACTION)); + const maxTravelPixels = Math.max(1, axisLength - edgePadding * 2); + const travelPixels = Math.max(1, Math.min(requestedPixels, maxTravelPixels)); + const halfTravel = Math.round(travelPixels / 2); + const centerX = Math.round(options.referenceWidth / 2); + const centerY = Math.round(options.referenceHeight / 2); + const buildPlan = (x1: number, y1: number, x2: number, y2: number): ScrollGesturePlan => ({ + direction, + x1, + y1, + x2, + y2, + referenceWidth: options.referenceWidth, + referenceHeight: options.referenceHeight, + amount: options.amount, + pixels: travelPixels, + }); + + switch (direction) { + case 'up': + return buildPlan(centerX, centerY - halfTravel, centerX, centerY + halfTravel); + case 'down': + return buildPlan(centerX, centerY + halfTravel, centerX, centerY - halfTravel); + case 'left': + return buildPlan(centerX - halfTravel, centerY, centerX + halfTravel, centerY); + case 'right': + return buildPlan(centerX + halfTravel, centerY, centerX - halfTravel, centerY); + } +} + +export function parseScrollDirection(direction: string): ScrollDirection { + switch (direction) { + case 'up': + case 'down': + case 'left': + case 'right': + return direction; + default: + throw new AppError('INVALID_ARGS', `Unknown direction: ${direction}`); + } +} + +function resolveRequestedAmount(amount: number | undefined): number { + if (amount === undefined) return DEFAULT_SCROLL_AMOUNT; + if (!Number.isFinite(amount) || amount <= 0) { + throw new AppError('INVALID_ARGS', 'scroll amount must be a positive number'); + } + return amount; +} + +function normalizeRequestedPixels(pixels: number): number { + if (!Number.isFinite(pixels) || pixels <= 0) { + throw new AppError('INVALID_ARGS', 'scroll pixels must be a positive integer'); + } + return Math.max(1, Math.round(pixels)); +} diff --git a/src/daemon/__tests__/context.test.ts b/src/daemon/__tests__/context.test.ts index 670575707..0270dd168 100644 --- a/src/daemon/__tests__/context.test.ts +++ b/src/daemon/__tests__/context.test.ts @@ -1,8 +1,15 @@ import test from 'node:test'; import assert from 'node:assert/strict'; +import type { CommandFlags } from '../../core/dispatch.ts'; import { contextFromFlags } from '../context.ts'; test('contextFromFlags propagates back mode into the dispatch context', () => { const context = contextFromFlags('/tmp/agent-device.log', { backMode: 'system' }); assert.equal(context.backMode, 'system'); }); + +test('contextFromFlags forwards scroll pixels from CLI flags', () => { + const flags: CommandFlags = { pixels: 240 }; + const context = contextFromFlags('/tmp/agent-device.log', flags); + assert.equal(context.pixels, 240); +}); diff --git a/src/daemon/__tests__/recording-gestures.test.ts b/src/daemon/__tests__/recording-gestures.test.ts index 0de2fcdc0..b117de949 100644 --- a/src/daemon/__tests__/recording-gestures.test.ts +++ b/src/daemon/__tests__/recording-gestures.test.ts @@ -59,9 +59,9 @@ test('scroll records a semantic scroll gesture for visualization telemetry', () assert.equal(event.referenceWidth, 402); assert.equal(event.referenceHeight, 874); assert.equal(event.x, 201); - assert.equal(event.y, 612); + assert.equal(event.y, 699); assert.equal(event.x2, 201); - assert.equal(event.y2, 262); + assert.equal(event.y2, 175); assert.equal(event.durationMs, 250); assert.equal(event.contentDirection, 'down'); }); @@ -117,6 +117,28 @@ test('scroll augmentation preserves explicit reference frame from platform resul assert.equal(augmented.y2, 240); }); +test('scroll augmentation preserves explicit pixel travel coordinates', () => { + const session = makeSession(); + session.snapshot = undefined; + + const augmented = augmentScrollVisualizationResult(session, 'scroll', ['down'], { + direction: 'down', + pixels: 240, + x1: 201, + y1: 557, + x2: 201, + y2: 317, + referenceWidth: 402, + referenceHeight: 874, + }) as Record; + + assert.equal(augmented.x1, 201); + assert.equal(augmented.y1, 557); + assert.equal(augmented.x2, 201); + assert.equal(augmented.y2, 317); + assert.equal(augmented.pixels, 240); +}); + test('gesture recording prefers native runner timing when available', () => { const session = makeSession(); session.recording = { diff --git a/src/daemon/context.ts b/src/daemon/context.ts index 7378639e6..9a49bab72 100644 --- a/src/daemon/context.ts +++ b/src/daemon/context.ts @@ -20,6 +20,7 @@ export type DaemonCommandContext = { delayMs?: number; holdMs?: number; jitterPx?: number; + pixels?: number; doubleTap?: boolean; clickButton?: 'primary' | 'secondary' | 'middle'; backMode?: 'in-app' | 'system'; @@ -53,6 +54,7 @@ export function contextFromFlags( delayMs: flags?.delayMs, holdMs: flags?.holdMs, jitterPx: flags?.jitterPx, + pixels: flags?.pixels, doubleTap: flags?.doubleTap, clickButton: resolveClickButton(flags), backMode: flags?.backMode, diff --git a/src/daemon/recording-gestures.ts b/src/daemon/recording-gestures.ts index ddf3f148d..988b8de9d 100644 --- a/src/daemon/recording-gestures.ts +++ b/src/daemon/recording-gestures.ts @@ -6,6 +6,7 @@ import { resolveTapVisualizationOffsetMs, } from './recording-timing.ts'; import { emitDiagnostic } from '../utils/diagnostics.ts'; +import { buildScrollGesturePlan } from '../core/scroll-gesture.ts'; import { getSnapshotReferenceFrame, type TouchReferenceFrame as ReferenceFrame, @@ -14,9 +15,6 @@ import { const DEFAULT_TAP_GAP_MS = 90; const DEFAULT_SWIPE_DURATION_MS = 250; const DEFAULT_PINCH_DURATION_MS = 280; -const DEFAULT_SCROLL_FRACTION = 0.4; -const MIN_SCROLL_FRACTION = 0.2; -const MAX_SCROLL_FRACTION = 0.7; const DEFAULT_SCROLL_REFERENCE_FRAME: ReferenceFrame = { referenceWidth: 1000, referenceHeight: 1000, @@ -102,7 +100,8 @@ export function augmentScrollVisualizationResult( if (!contentDirection) return result; const amountValue = readNumber(merged.amount) ?? readNumber(positionals[1]); - const travelFraction = resolveScrollTravelFraction(amountValue); + const pixelValue = readNumber(merged.pixels); + const explicitTravel = readTravelCoordinates(merged, []); const explicitReferenceWidth = readNumber(merged.referenceWidth); const explicitReferenceHeight = readNumber(merged.referenceHeight); const fallbackReferenceFrame = @@ -115,16 +114,43 @@ export function augmentScrollVisualizationResult( referenceHeight: explicitReferenceHeight, } : (referenceFrame ?? DEFAULT_SCROLL_REFERENCE_FRAME); - const { start, end } = scrollPoints(contentDirection, fallbackReferenceFrame, travelFraction); + + if ( + explicitTravel && + (explicitTravel.x1 !== explicitTravel.x2 || explicitTravel.y1 !== explicitTravel.y2) + ) { + return { + ...merged, + x1: explicitTravel.x1, + y1: explicitTravel.y1, + x2: explicitTravel.x2, + y2: explicitTravel.y2, + contentDirection, + ...(amountValue !== undefined ? { amount: amountValue } : {}), + ...(pixelValue !== undefined ? { pixels: pixelValue } : {}), + referenceWidth: fallbackReferenceFrame.referenceWidth, + referenceHeight: fallbackReferenceFrame.referenceHeight, + durationMs: DEFAULT_SWIPE_DURATION_MS, + }; + } + + const plan = buildScrollGesturePlan({ + direction: contentDirection, + amount: amountValue, + pixels: pixelValue, + referenceWidth: fallbackReferenceFrame.referenceWidth, + referenceHeight: fallbackReferenceFrame.referenceHeight, + }); return { ...merged, - x1: start.x, - y1: start.y, - x2: end.x, - y2: end.y, + x1: plan.x1, + y1: plan.y1, + x2: plan.x2, + y2: plan.y2, contentDirection, - amount: amountValue, + ...(amountValue !== undefined ? { amount: amountValue } : {}), + ...(plan.pixels !== undefined ? { pixels: plan.pixels } : {}), referenceWidth: fallbackReferenceFrame.referenceWidth, referenceHeight: fallbackReferenceFrame.referenceHeight, durationMs: DEFAULT_SWIPE_DURATION_MS, @@ -318,6 +344,7 @@ function buildScrollEvents( const durationMs = resolveDurationMs(gestureDurationMs, [], DEFAULT_SWIPE_DURATION_MS); const amount = readNumber(result.amount) ?? readNumber(positionals[1]); + const pixels = readNumber(result.pixels); return [ { kind: 'scroll', @@ -330,6 +357,7 @@ function buildScrollEvents( durationMs, contentDirection, ...(amount !== undefined ? { amount } : {}), + ...(pixels !== undefined ? { pixels } : {}), }, ]; } @@ -440,49 +468,6 @@ function readDirection(value: unknown): 'up' | 'down' | 'left' | 'right' | undef } } -function resolveScrollTravelFraction(amount: number | undefined): number { - if (amount === undefined) return DEFAULT_SCROLL_FRACTION; - if (!Number.isFinite(amount) || amount <= 0) return DEFAULT_SCROLL_FRACTION; - if (amount <= 1) { - return clampNumber(amount, MIN_SCROLL_FRACTION, MAX_SCROLL_FRACTION); - } - return clampNumber(amount / 100, MIN_SCROLL_FRACTION, MAX_SCROLL_FRACTION); -} - -function scrollPoints( - contentDirection: 'up' | 'down' | 'left' | 'right', - referenceFrame: ReferenceFrame, - travelFraction: number, -): { start: { x: number; y: number }; end: { x: number; y: number } } { - const midX = Math.round(referenceFrame.referenceWidth / 2); - const midY = Math.round(referenceFrame.referenceHeight / 2); - const travelX = Math.round((referenceFrame.referenceWidth * travelFraction) / 2); - const travelY = Math.round((referenceFrame.referenceHeight * travelFraction) / 2); - - switch (contentDirection) { - case 'up': - return { - start: { x: midX, y: midY - travelY }, - end: { x: midX, y: midY + travelY }, - }; - case 'down': - return { - start: { x: midX, y: midY + travelY }, - end: { x: midX, y: midY - travelY }, - }; - case 'left': - return { - start: { x: midX - travelX, y: midY }, - end: { x: midX + travelX, y: midY }, - }; - case 'right': - return { - start: { x: midX + travelX, y: midY }, - end: { x: midX - travelX, y: midY }, - }; - } -} - function readNumber(value: unknown): number | undefined { if (typeof value === 'number' && Number.isFinite(value)) return value; if (typeof value !== 'string' || value.trim().length === 0) return undefined; @@ -496,10 +481,6 @@ function clampInt(value: number | undefined, min: number): number | undefined { return normalized >= min ? normalized : undefined; } -function clampNumber(value: number, min: number, max: number): number { - return Math.max(min, Math.min(max, value)); -} - function readCoordinates( result: Record, positionals: string[], diff --git a/src/daemon/types.ts b/src/daemon/types.ts index dc75ba756..c8bb04765 100644 --- a/src/daemon/types.ts +++ b/src/daemon/types.ts @@ -151,6 +151,7 @@ export type RecordingGestureEvent = kind: 'scroll'; contentDirection: 'up' | 'down' | 'left' | 'right'; amount?: number; + pixels?: number; }) | (RecordingTelemetryTravel & { kind: 'back-swipe'; diff --git a/src/platforms/android/__tests__/index.test.ts b/src/platforms/android/__tests__/index.test.ts index d76c63f52..e05d80e23 100644 --- a/src/platforms/android/__tests__/index.test.ts +++ b/src/platforms/android/__tests__/index.test.ts @@ -17,6 +17,7 @@ import { pushAndroidNotification, readAndroidClipboardText, setAndroidSetting, + scrollAndroid, swipeAndroid, typeAndroid, writeAndroidClipboardText, @@ -133,6 +134,35 @@ test('findBounds ignores bounds-like fragments inside other attribute values', ( assert.deepEqual(findBounds(xml, 'target'), { x: 200, y: 350 }); }); +test('scrollAndroid supports explicit pixel travel distance', async () => { + await withMockedAdb( + 'agent-device-android-scroll-pixels-', + [ + '#!/bin/sh', + 'printf "%s\\n" "$@" >> "$AGENT_DEVICE_TEST_ARGS_FILE"', + 'if [ "$1" = "-s" ]; then', + ' shift', + ' shift', + 'fi', + 'if [ "$1" = "shell" ] && [ "$2" = "wm" ] && [ "$3" = "size" ]; then', + ' echo "Physical size: 1080x1920"', + ' exit 0', + 'fi', + 'exit 0', + '', + ].join('\n'), + async ({ argsLogPath, device }) => { + const result = await scrollAndroid(device, 'down', { pixels: 240 }); + const args = await fs.readFile(argsLogPath, 'utf8'); + + assert.match(args, /shell\ninput\nswipe\n540\n1080\n540\n840\n300\n/); + assert.equal(result.pixels, 240); + assert.equal(result.referenceWidth, 1080); + assert.equal(result.referenceHeight, 1920); + }, + ); +}); + test('parseAndroidLaunchComponent extracts final resolved component', () => { const stdout = [ 'priority=0 preferredOrder=0 match=0x108000 specificIndex=-1 isDefault=true', diff --git a/src/platforms/android/input-actions.ts b/src/platforms/android/input-actions.ts index 49299b8db..bfba5f18e 100644 --- a/src/platforms/android/input-actions.ts +++ b/src/platforms/android/input-actions.ts @@ -1,6 +1,7 @@ import { runCmd } from '../../utils/exec.ts'; import { AppError } from '../../utils/errors.ts'; import type { DeviceInfo } from '../../utils/device.ts'; +import { buildScrollGesturePlan, type ScrollDirection } from '../../core/scroll-gesture.ts'; import { findBounds, parseBounds, readNodeAttributes } from './ui-hierarchy.ts'; import { dumpUiHierarchy } from './snapshot.ts'; import { adbArgs, isClipboardShellUnsupported, sleep } from './adb.ts'; @@ -154,46 +155,17 @@ export async function fillAndroid( export async function scrollAndroid( device: DeviceInfo, - direction: string, - amount = 0.6, -): Promise { + direction: ScrollDirection, + options?: { amount?: number; pixels?: number }, +): Promise> { const size = await getAndroidScreenSize(device); - const { width, height } = size; - const distanceX = Math.floor(width * amount); - const distanceY = Math.floor(height * amount); - - const centerX = Math.floor(width / 2); - const centerY = Math.floor(height / 2); - - let x1 = centerX; - let y1 = centerY; - let x2 = centerX; - let y2 = centerY; - - switch (direction) { - case 'up': - // Content moves up -> swipe down. - y1 = centerY - Math.floor(distanceY / 2); - y2 = centerY + Math.floor(distanceY / 2); - break; - case 'down': - // Content moves down -> swipe up. - y1 = centerY + Math.floor(distanceY / 2); - y2 = centerY - Math.floor(distanceY / 2); - break; - case 'left': - // Content moves left -> swipe right. - x1 = centerX - Math.floor(distanceX / 2); - x2 = centerX + Math.floor(distanceX / 2); - break; - case 'right': - // Content moves right -> swipe left. - x1 = centerX + Math.floor(distanceX / 2); - x2 = centerX - Math.floor(distanceX / 2); - break; - default: - throw new AppError('INVALID_ARGS', `Unknown direction: ${direction}`); - } + const plan = buildScrollGesturePlan({ + direction, + amount: options?.amount, + pixels: options?.pixels, + referenceWidth: size.width, + referenceHeight: size.height, + }); await runCmd( 'adb', @@ -201,13 +173,15 @@ export async function scrollAndroid( 'shell', 'input', 'swipe', - String(x1), - String(y1), - String(x2), - String(y2), + String(plan.x1), + String(plan.y1), + String(plan.x2), + String(plan.y2), '300', ]), ); + + return plan; } export async function scrollIntoViewAndroid(device: DeviceInfo, text: string): Promise { @@ -221,7 +195,7 @@ export async function scrollIntoViewAndroid(device: DeviceInfo, text: string): P throw new AppError('UNSUPPORTED_OPERATION', `uiautomator dump failed: ${message}`); } if (findBounds(xml, text)) return; - await scrollAndroid(device, 'down', 0.5); + await scrollAndroid(device, 'down', { amount: 0.5 }); } throw new AppError( 'COMMAND_FAILED', diff --git a/src/platforms/ios/__tests__/runner-client.test.ts b/src/platforms/ios/__tests__/runner-client.test.ts index bfeea8c14..485c1bdba 100644 --- a/src/platforms/ios/__tests__/runner-client.test.ts +++ b/src/platforms/ios/__tests__/runner-client.test.ts @@ -16,6 +16,7 @@ import { resolveRunnerSigningBuildSettings, shouldRetryRunnerConnectError, } from '../runner-client.ts'; +import { isReadOnlyRunnerCommand } from '../runner-errors.ts'; import { ensureXctestrun, resolveRunnerPerformanceBuildSettings, @@ -101,6 +102,10 @@ test('resolveRunnerBuildDestination uses tvOS destinations for tvOS devices and assert.equal(resolveRunnerBuildDestination(tvOsDevice), 'generic/platform=tvOS'); }); +test('isReadOnlyRunnerCommand treats interactionFrame as read-only', () => { + assert.equal(isReadOnlyRunnerCommand('interactionFrame'), true); +}); + test('resolveRunnerMaxConcurrentDestinationsFlag uses simulator flag for simulators', () => { assert.equal( resolveRunnerMaxConcurrentDestinationsFlag(iosSimulator), diff --git a/src/platforms/ios/runner-client.ts b/src/platforms/ios/runner-client.ts index d304252e4..6019c7efa 100644 --- a/src/platforms/ios/runner-client.ts +++ b/src/platforms/ios/runner-client.ts @@ -31,6 +31,7 @@ export type RunnerCommand = { | 'mouseClick' | 'tapSeries' | 'longPress' + | 'interactionFrame' | 'drag' | 'dragSeries' | 'type' diff --git a/src/platforms/ios/runner-errors.ts b/src/platforms/ios/runner-errors.ts index 286fca092..09adac7ed 100644 --- a/src/platforms/ios/runner-errors.ts +++ b/src/platforms/ios/runner-errors.ts @@ -109,6 +109,7 @@ export function resolveSigningFailureHint(error: AppError): string | undefined { export function isReadOnlyRunnerCommand(command: RunnerCommand['command']): boolean { return ( + command === 'interactionFrame' || command === 'snapshot' || command === 'screenshot' || command === 'findText' || diff --git a/src/utils/__tests__/args.test.ts b/src/utils/__tests__/args.test.ts index 3d2734fa4..42cf899ce 100644 --- a/src/utils/__tests__/args.test.ts +++ b/src/utils/__tests__/args.test.ts @@ -265,6 +265,13 @@ test('parseArgs accepts keyboard subcommands', () => { assert.deepEqual(dismiss.positionals, ['dismiss']); }); +test('parseArgs accepts scroll pixel distance flag', () => { + const parsed = parseArgs(['scroll', 'down', '--pixels', '240'], { strictFlags: true }); + assert.equal(parsed.command, 'scroll'); + assert.deepEqual(parsed.positionals, ['down']); + assert.equal(parsed.flags.pixels, 240); +}); + test('parseArgs recognizes --debug alias for verbose mode', () => { const parsed = parseArgs(['open', 'settings', '--debug']); assert.equal(parsed.command, 'open'); diff --git a/src/utils/__tests__/interactors.test.ts b/src/utils/__tests__/interactors.test.ts index 6797cbd22..5b441ce7a 100644 --- a/src/utils/__tests__/interactors.test.ts +++ b/src/utils/__tests__/interactors.test.ts @@ -1,6 +1,15 @@ import test from 'node:test'; import assert from 'node:assert/strict'; -import { resolveAppleBackRunnerCommand } from '../interactors.ts'; +import type { DeviceInfo } from '../device.ts'; +import { getInteractor, resolveAppleBackRunnerCommand } from '../interactors.ts'; + +const iosSimulator: DeviceInfo = { + platform: 'ios', + id: 'sim-1', + name: 'iPhone Simulator', + kind: 'simulator', + booted: true, +}; test('resolveAppleBackRunnerCommand defaults plain back to in-app navigation', () => { assert.equal(resolveAppleBackRunnerCommand(), 'backInApp'); @@ -10,3 +19,81 @@ test('resolveAppleBackRunnerCommand maps explicit back modes to runner commands' assert.equal(resolveAppleBackRunnerCommand('in-app'), 'backInApp'); assert.equal(resolveAppleBackRunnerCommand('system'), 'backSystem'); }); + +test('ios scrollIntoView reuses a single interactionFrame across a burst', async () => { + const commands: string[] = []; + let findTextCalls = 0; + const interactor = getInteractor( + iosSimulator, + { appBundleId: 'com.example.app' }, + { + runIosRunnerCommand: async (_device, command) => { + commands.push(command.command); + if (command.command === 'findText') { + findTextCalls += 1; + return { found: findTextCalls > 1 }; + } + if (command.command === 'interactionFrame') { + return { + x: 10, + y: 20, + referenceWidth: 200, + referenceHeight: 400, + }; + } + if (command.command === 'drag') { + return { + x: 110, + y: 300, + x2: 110, + y2: 100, + referenceWidth: 200, + referenceHeight: 400, + }; + } + throw new Error(`Unexpected runner command: ${command.command}`); + }, + sleepMs: async () => {}, + }, + ); + const result = await interactor.scrollIntoView('Target'); + + assert.deepEqual(result, { attempts: 2 }); + assert.equal(commands.filter((command) => command === 'interactionFrame').length, 1); + assert.equal(commands.filter((command) => command === 'drag').length, 4); +}); + +test('ios scroll reports planned pixels without recomputing from runner coordinates', async () => { + const interactor = getInteractor( + iosSimulator, + { appBundleId: 'com.example.app' }, + { + runIosRunnerCommand: async (_device, command) => { + if (command.command === 'interactionFrame') { + return { + x: 5, + y: 10, + referenceWidth: 300, + referenceHeight: 600, + }; + } + if (command.command === 'drag') { + return { + x: 155, + y: 420, + x2: 155, + y2: 301, + referenceWidth: 300, + referenceHeight: 600, + }; + } + throw new Error(`Unexpected runner command: ${command.command}`); + }, + }, + ); + const result = await interactor.scroll('down', { pixels: 120 }); + + const pixels = + result && typeof result === 'object' && 'pixels' in result ? result.pixels : undefined; + assert.equal(pixels, 120); +}); diff --git a/src/utils/command-schema.ts b/src/utils/command-schema.ts index 232ee81e7..944e11a90 100644 --- a/src/utils/command-schema.ts +++ b/src/utils/command-schema.ts @@ -62,6 +62,7 @@ export type CliFlags = { delayMs?: number; holdMs?: number; jitterPx?: number; + pixels?: number; doubleTap?: boolean; clickButton?: 'primary' | 'secondary' | 'middle'; backMode?: 'in-app' | 'system'; @@ -569,6 +570,15 @@ const FLAG_DEFINITIONS: readonly FlagDefinition[] = [ usageLabel: '--jitter-px ', usageDescription: 'Deterministic coordinate jitter radius for press', }, + { + key: 'pixels', + names: ['--pixels'], + type: 'int', + min: 1, + max: 100_000, + usageLabel: '--pixels ', + usageDescription: 'Scroll: explicit gesture distance in pixels', + }, { key: 'doubleTap', names: ['--double-tap'], @@ -1130,10 +1140,11 @@ const COMMAND_SCHEMAS: Record = { allowedFlags: [...SELECTOR_SNAPSHOT_FLAGS, 'delayMs'], }, scroll: { - helpDescription: 'Scroll in direction (0-1 amount)', + usageOverride: 'scroll [amount] [--pixels ]', + helpDescription: 'Scroll in direction (relative amount or explicit pixels)', summary: 'Scroll in a direction', positionalArgs: ['direction', 'amount?'], - allowedFlags: [], + allowedFlags: ['pixels'], }, scrollintoview: { usageOverride: 'scrollintoview ', diff --git a/src/utils/interactors.ts b/src/utils/interactors.ts index ec3367054..4b2112690 100644 --- a/src/utils/interactors.ts +++ b/src/utils/interactors.ts @@ -1,5 +1,6 @@ import { AppError } from './errors.ts'; import type { DeviceInfo } from './device.ts'; +import { buildScrollGesturePlan, type ScrollDirection } from '../core/scroll-gesture.ts'; import { appSwitcherAndroid, backAndroid, @@ -43,6 +44,7 @@ export type RunnerContext = { export type BackMode = 'in-app' | 'system'; export type AppleBackRunnerCommand = 'backInApp' | 'backSystem'; +type RunIosRunnerCommand = typeof runIosRunnerCommand; type Interactor = { open( @@ -69,7 +71,10 @@ type Interactor = { text: string, delayMs?: number, ): Promise | void>; - scroll(direction: string, amount?: number): Promise | void>; + scroll( + direction: ScrollDirection, + options?: { amount?: number; pixels?: number }, + ): Promise | void>; scrollIntoView(text: string): Promise<{ attempts?: number } | void>; screenshot(outPath: string, appBundleId?: string): Promise; back(mode?: BackMode): Promise; @@ -85,7 +90,11 @@ type Interactor = { ): Promise | void>; }; -export function getInteractor(device: DeviceInfo, runnerContext: RunnerContext): Interactor { +export function getInteractor( + device: DeviceInfo, + runnerContext: RunnerContext, + deps: InteractorDeps = {}, +): Interactor { switch (device.platform) { case 'android': return { @@ -102,7 +111,7 @@ export function getInteractor(device: DeviceInfo, runnerContext: RunnerContext): focus: (x, y) => focusAndroid(device, x, y), type: (text, delayMs) => typeAndroid(device, text, delayMs), fill: (x, y, text, delayMs) => fillAndroid(device, x, y, text, delayMs), - scroll: (direction, amount) => scrollAndroid(device, direction, amount), + scroll: (direction, options) => scrollAndroid(device, direction, options), scrollIntoView: (text) => scrollIntoViewAndroid(device, text), screenshot: (outPath, _appBundleId) => screenshotAndroid(device, outPath), back: (_mode) => backAndroid(device), @@ -115,7 +124,8 @@ export function getInteractor(device: DeviceInfo, runnerContext: RunnerContext): }; case 'ios': case 'macos': { - const { overrides, runnerOpts } = iosRunnerOverrides(device, runnerContext); + const runRunnerCommand = deps.runIosRunnerCommand ?? runIosRunnerCommand; + const { overrides, runnerOpts } = iosRunnerOverrides(device, runnerContext, deps); return { open: (app, options) => openIosApp(device, app, { appBundleId: options?.appBundleId, url: options?.url }), @@ -123,7 +133,7 @@ export function getInteractor(device: DeviceInfo, runnerContext: RunnerContext): close: (app) => closeIosApp(device, app), screenshot: (outPath, appBundleId) => screenshotIos(device, outPath, appBundleId), back: async (mode) => { - await runIosRunnerCommand( + await runRunnerCommand( device, { command: resolveAppleBackRunnerCommand(mode), @@ -133,14 +143,14 @@ export function getInteractor(device: DeviceInfo, runnerContext: RunnerContext): ); }, home: async () => { - await runIosRunnerCommand( + await runRunnerCommand( device, { command: 'home', appBundleId: runnerContext.appBundleId }, runnerOpts, ); }, appSwitcher: async () => { - await runIosRunnerCommand( + await runRunnerCommand( device, { command: 'appSwitcher', appBundleId: runnerContext.appBundleId }, runnerOpts, @@ -170,6 +180,19 @@ type RunnerOpts = { requestId?: string; }; +type InteractionFrame = { + originX: number; + originY: number; + referenceWidth: number; + referenceHeight: number; +}; + +type NormalizedScrollOptions = { + amount?: number; + pixels?: number; + preferProvidedPixels?: boolean; +}; + type IoRunnerOverrides = Pick< Interactor, | 'tap' @@ -183,10 +206,19 @@ type IoRunnerOverrides = Pick< | 'scrollIntoView' >; +type InteractorDeps = { + runIosRunnerCommand?: RunIosRunnerCommand; + sleepMs?: (ms: number) => Promise; +}; + function iosRunnerOverrides( device: DeviceInfo, ctx: RunnerContext, + deps: InteractorDeps, ): { overrides: IoRunnerOverrides; runnerOpts: RunnerOpts } { + const runRunnerCommand = deps.runIosRunnerCommand ?? runIosRunnerCommand; + const sleepMs = + deps.sleepMs ?? ((ms: number) => new Promise((resolve) => setTimeout(resolve, ms))); const runnerOpts = { verbose: ctx.verbose, logPath: ctx.logPath, @@ -202,14 +234,14 @@ function iosRunnerOverrides( runnerOpts, overrides: { tap: async (x, y) => { - return await runIosRunnerCommand( + return await runRunnerCommand( device, { command: 'tap', x, y, appBundleId: ctx.appBundleId }, runnerOpts, ); }, doubleTap: async (x, y) => { - return await runIosRunnerCommand( + return await runRunnerCommand( device, { command: 'tapSeries', @@ -224,60 +256,52 @@ function iosRunnerOverrides( ); }, swipe: async (x1, y1, x2, y2, durationMs) => { - return await runIosRunnerCommand( + return await runRunnerCommand( device, { command: 'drag', x: x1, y: y1, x2, y2, durationMs, appBundleId: ctx.appBundleId }, runnerOpts, ); }, longPress: async (x, y, durationMs) => { - return await runIosRunnerCommand( + return await runRunnerCommand( device, { command: 'longPress', x, y, durationMs, appBundleId: ctx.appBundleId }, runnerOpts, ); }, focus: async (x, y) => { - return await runIosRunnerCommand( + return await runRunnerCommand( device, { command: 'tap', x, y, appBundleId: ctx.appBundleId }, runnerOpts, ); }, type: async (text, delayMs) => { - await runIosRunnerCommand( + await runRunnerCommand( device, { command: 'type', text, delayMs, appBundleId: ctx.appBundleId }, runnerOpts, ); }, fill: async (x, y, text, delayMs) => { - const tapResult = await runIosRunnerCommand( + const tapResult = await runRunnerCommand( device, { command: 'tap', x, y, appBundleId: ctx.appBundleId }, runnerOpts, ); - await runIosRunnerCommand( + await runRunnerCommand( device, { command: 'type', text, clearFirst: true, delayMs, appBundleId: ctx.appBundleId }, runnerOpts, ); return tapResult; }, - scroll: async (direction, _amount) => { - if (!['up', 'down', 'left', 'right'].includes(direction)) { - throw new AppError('INVALID_ARGS', `Unknown direction: ${direction}`); - } - const inverted = invertScrollDirection(direction as 'up' | 'down' | 'left' | 'right'); - return (await runIosRunnerCommand( - device, - { command: 'swipe', direction: inverted, appBundleId: ctx.appBundleId }, - runnerOpts, - )) as Record; + scroll: async (direction, options) => { + return await runAppleScroll(runRunnerCommand, device, ctx, runnerOpts, direction, options); }, scrollIntoView: async (text) => { // Check once, then scroll in bursts to avoid slow find->swipe->find cadence on heavy screens. - const initial = (await runIosRunnerCommand( + const initial = (await runRunnerCommand( device, { command: 'findText', text, appBundleId: ctx.appBundleId }, runnerOpts, @@ -286,19 +310,30 @@ function iosRunnerOverrides( const maxBursts = 12; const swipesPerBurst = 4; + let cachedInteractionFrame: InteractionFrame | undefined; for (let burst = 0; burst < maxBursts; burst += 1) { for (let i = 0; i < swipesPerBurst; i += 1) { throwIfCanceled(); - await runIosRunnerCommand( + cachedInteractionFrame ??= await resolveAppleInteractionFrame( + runRunnerCommand, device, - { command: 'swipe', direction: 'up', appBundleId: ctx.appBundleId }, + ctx, runnerOpts, ); + await runAppleScroll( + runRunnerCommand, + device, + ctx, + runnerOpts, + 'down', + undefined, + cachedInteractionFrame, + ); // Small settle keeps gesture chain stable without long visible pauses. - await new Promise((resolve) => setTimeout(resolve, 80)); + await sleepMs(80); } throwIfCanceled(); - const found = (await runIosRunnerCommand( + const found = (await runRunnerCommand( device, { command: 'findText', text, appBundleId: ctx.appBundleId }, runnerOpts, @@ -311,9 +346,7 @@ function iosRunnerOverrides( }; } -function invertScrollDirection( - direction: 'up' | 'down' | 'left' | 'right', -): 'up' | 'down' | 'left' | 'right' { +function invertScrollDirection(direction: ScrollDirection): ScrollDirection { switch (direction) { case 'up': return 'down'; @@ -323,5 +356,136 @@ function invertScrollDirection( return 'right'; case 'right': return 'left'; + default: { + const _exhaustive: never = direction; + return _exhaustive; + } + } +} + +async function runAppleScroll( + runRunnerCommand: RunIosRunnerCommand, + device: DeviceInfo, + ctx: RunnerContext, + runnerOpts: RunnerOpts, + direction: ScrollDirection, + options?: { amount?: number; pixels?: number }, + interactionFrame?: InteractionFrame, +): Promise> { + if (device.target === 'tv') { + const runnerResult = await runRunnerCommand( + device, + { + command: 'swipe', + direction: invertScrollDirection(direction), + appBundleId: ctx.appBundleId, + }, + runnerOpts, + ); + return normalizeIosScrollResult(runnerResult, options); } + + const frame = + interactionFrame ?? + (await resolveAppleInteractionFrame(runRunnerCommand, device, ctx, runnerOpts)); + const plan = buildScrollGesturePlan({ + direction, + amount: options?.amount, + pixels: options?.pixels, + referenceWidth: frame.referenceWidth, + referenceHeight: frame.referenceHeight, + }); + const runnerResult = await runRunnerCommand( + device, + { + command: 'drag', + x: frame.originX + plan.x1, + y: frame.originY + plan.y1, + x2: frame.originX + plan.x2, + y2: frame.originY + plan.y2, + appBundleId: ctx.appBundleId, + }, + runnerOpts, + ); + return normalizeIosScrollResult(runnerResult, { + amount: plan.amount, + pixels: plan.pixels, + preferProvidedPixels: true, + }); +} + +async function resolveAppleInteractionFrame( + runRunnerCommand: RunIosRunnerCommand, + device: DeviceInfo, + ctx: RunnerContext, + runnerOpts: RunnerOpts, +): Promise { + const runnerResult = await runRunnerCommand( + device, + { command: 'interactionFrame', appBundleId: ctx.appBundleId }, + runnerOpts, + ); + const originX = readFiniteNumber(runnerResult.x); + const originY = readFiniteNumber(runnerResult.y); + const referenceWidth = readFiniteNumber(runnerResult.referenceWidth); + const referenceHeight = readFiniteNumber(runnerResult.referenceHeight); + if ( + originX === undefined || + originY === undefined || + referenceWidth === undefined || + referenceHeight === undefined + ) { + throw new AppError('COMMAND_FAILED', 'interactionFrame did not return a usable frame'); + } + return { originX, originY, referenceWidth, referenceHeight }; +} + +function readFiniteNumber(value: unknown): number | undefined { + return typeof value === 'number' && Number.isFinite(value) ? value : undefined; +} + +function normalizeIosScrollResult( + runnerResult: Record, + options?: NormalizedScrollOptions, +): Record { + const { x1, y1, x2, y2 } = remapRunnerCoordinates(runnerResult); + const referenceWidth = readFiniteNumber(runnerResult.referenceWidth); + const referenceHeight = readFiniteNumber(runnerResult.referenceHeight); + const horizontalTravel = + x1 !== undefined && x2 !== undefined ? Math.round(Math.abs(x2 - x1)) : undefined; + const verticalTravel = + y1 !== undefined && y2 !== undefined ? Math.round(Math.abs(y2 - y1)) : undefined; + const travelPixels = + options?.preferProvidedPixels && options.pixels !== undefined + ? options.pixels + : horizontalTravel && horizontalTravel > 0 + ? horizontalTravel + : verticalTravel && verticalTravel > 0 + ? verticalTravel + : undefined; + + return { + ...(x1 !== undefined ? { x1 } : {}), + ...(y1 !== undefined ? { y1 } : {}), + ...(x2 !== undefined ? { x2 } : {}), + ...(y2 !== undefined ? { y2 } : {}), + ...(referenceWidth !== undefined ? { referenceWidth } : {}), + ...(referenceHeight !== undefined ? { referenceHeight } : {}), + ...(options?.amount !== undefined ? { amount: options.amount } : {}), + ...(travelPixels !== undefined ? { pixels: travelPixels } : {}), + }; +} + +function remapRunnerCoordinates(runnerResult: Record): { + x1?: number; + y1?: number; + x2?: number; + y2?: number; +} { + return { + x1: readFiniteNumber(runnerResult.x), + y1: readFiniteNumber(runnerResult.y), + x2: readFiniteNumber(runnerResult.x2), + y2: readFiniteNumber(runnerResult.y2), + }; } diff --git a/website/docs/docs/commands.md b/website/docs/docs/commands.md index a544fe6fb..08cf54cd1 100644 --- a/website/docs/docs/commands.md +++ b/website/docs/docs/commands.md @@ -218,6 +218,7 @@ agent-device swipe 540 1500 540 500 120 agent-device swipe 540 1500 540 500 120 --count 8 --pause-ms 30 --pattern ping-pong agent-device longpress 300 500 800 agent-device scroll down 0.5 +agent-device scroll down --pixels 320 agent-device scrollintoview "Sign in" agent-device scrollintoview @e42 agent-device pinch 2.0 # zoom in 2x (iOS simulator) @@ -233,6 +234,7 @@ Some Android images cannot enter non-ASCII text over shell input; in that case u `click --button middle` is reserved for future runner support and currently returns an explicit unsupported-operation error on macOS. `swipe` accepts an optional `durationMs` argument (default `250ms`, range `16..10000`). On iOS, swipe duration is clamped to a safe range (`16..60ms`) to avoid longpress side effects. +`scroll` accepts either a relative amount (`0.5` means roughly half of the viewport on that axis) or `--pixels ` for a fixed-distance gesture. Large distances are clamped to the usable drag band so the gesture stays reliable across Android, iOS, and macOS. `scrollintoview` accepts plain text or a snapshot ref (`@eN`); ref mode uses best-effort geometry-based scrolling without post-scroll verification. Run `snapshot` again before follow-up `@ref` commands. `longpress` is supported on iOS and Android. `pinch` is iOS simulator-only. diff --git a/website/docs/docs/quick-start.md b/website/docs/docs/quick-start.md index 396874601..054deeb36 100644 --- a/website/docs/docs/quick-start.md +++ b/website/docs/docs/quick-start.md @@ -96,6 +96,7 @@ Navigate content that extends beyond the viewport: ```bash agent-device scroll down 0.5 # Scroll down half screen agent-device scroll up 0.3 # Scroll up 30% +agent-device scroll down --pixels 320 # Scroll down by a fixed distance ``` ## Settings helpers