diff --git a/android-multitouch-helper/src/main/java/com/callstack/agentdevice/multitouchhelper/MultiTouchInstrumentation.java b/android-multitouch-helper/src/main/java/com/callstack/agentdevice/multitouchhelper/MultiTouchInstrumentation.java index 31fe96d83..4f5d839bd 100644 --- a/android-multitouch-helper/src/main/java/com/callstack/agentdevice/multitouchhelper/MultiTouchInstrumentation.java +++ b/android-multitouch-helper/src/main/java/com/callstack/agentdevice/multitouchhelper/MultiTouchInstrumentation.java @@ -18,6 +18,7 @@ public final class MultiTouchInstrumentation extends Instrumentation { private static final int MAX_RADIUS = 1200; private static final int MIN_DURATION_MS = 16; private static final int MAX_DURATION_MS = 10_000; + private static final int MOVE_FRAME_INTERVAL_MS = 16; private Bundle arguments; @Override @@ -91,50 +92,66 @@ private int injectGesture(GestureSpec spec) { long eventTime = downTime; PointerPair start = pointerPairAt(spec, 0); PointerPair end = pointerPairAt(spec, 1); + PointerPair activePointers = start.firstOnly(); int count = 0; - inject( - automation, - motionEvent(downTime, eventTime, MotionEvent.ACTION_DOWN, start.firstOnly())); - count += 1; - eventTime += 8; - inject( - automation, - motionEvent( - downTime, - eventTime, - MotionEvent.ACTION_POINTER_DOWN | (1 << MotionEvent.ACTION_POINTER_INDEX_SHIFT), - start)); - count += 1; + try { + inject( + automation, + motionEvent(downTime, eventTime, MotionEvent.ACTION_DOWN, activePointers), + true); + count += 1; + eventTime += 8; + inject( + automation, + motionEvent( + downTime, + eventTime, + MotionEvent.ACTION_POINTER_DOWN | (1 << MotionEvent.ACTION_POINTER_INDEX_SHIFT), + start), + true); + count += 1; + activePointers = start; + + int frameCount = + Math.max(3, Math.round(spec.durationMs / (float) MOVE_FRAME_INTERVAL_MS)); + for (int index = 1; index < frameCount; index += 1) { + double t = (double) index / (double) frameCount; + PointerPair frame = pointerPairAt(spec, t); + eventTime = downTime + Math.round(spec.durationMs * t); + inject(automation, motionEvent(downTime, eventTime, MotionEvent.ACTION_MOVE, frame), false); + count += 1; + activePointers = frame; + } - int frameCount = Math.max(3, Math.round(spec.durationMs / 16.0f)); - for (int index = 1; index < frameCount; index += 1) { - double t = (double) index / (double) frameCount; - PointerPair frame = pointerPairAt(spec, t); - eventTime = downTime + Math.round(spec.durationMs * t); - inject(automation, motionEvent(downTime, eventTime, MotionEvent.ACTION_MOVE, frame)); + eventTime = downTime + spec.durationMs; + inject( + automation, + motionEvent( + downTime, + eventTime, + MotionEvent.ACTION_POINTER_UP | (1 << MotionEvent.ACTION_POINTER_INDEX_SHIFT), + end), + true); + count += 1; + activePointers = end.firstOnly(); + inject( + automation, + motionEvent(downTime, eventTime + 8, MotionEvent.ACTION_UP, activePointers), + true); count += 1; + return count; + } catch (RuntimeException error) { + if (count > 0) { + injectCancel(automation, downTime, eventTime + 16, activePointers); + } + throw error; } - - eventTime = downTime + spec.durationMs; - inject( - automation, - motionEvent( - downTime, - eventTime, - MotionEvent.ACTION_POINTER_UP | (1 << MotionEvent.ACTION_POINTER_INDEX_SHIFT), - end)); - count += 1; - inject( - automation, - motionEvent(downTime, eventTime + 8, MotionEvent.ACTION_UP, end.firstOnly())); - count += 1; - return count; } - private static void inject(UiAutomation automation, MotionEvent event) { + private static void inject(UiAutomation automation, MotionEvent event, boolean waitForDispatch) { try { - if (!automation.injectInputEvent(event, true)) { + if (!automation.injectInputEvent(event, waitForDispatch)) { throw new IllegalStateException("injectInputEvent returned false"); } } finally { @@ -142,6 +159,15 @@ private static void inject(UiAutomation automation, MotionEvent event) { } } + private static void injectCancel( + UiAutomation automation, long downTime, long eventTime, PointerPair pair) { + try { + inject(automation, motionEvent(downTime, eventTime, MotionEvent.ACTION_CANCEL, pair), true); + } catch (RuntimeException ignored) { + // Best-effort cleanup; preserve the original injection failure. + } + } + private static MotionEvent motionEvent(long downTime, long eventTime, int action, PointerPair pair) { MotionEvent.PointerProperties[] properties = new MotionEvent.PointerProperties[pair.pointerCount]; diff --git a/examples/test-app/README.md b/examples/test-app/README.md index dc167d119..3bae04fdd 100644 --- a/examples/test-app/README.md +++ b/examples/test-app/README.md @@ -91,7 +91,9 @@ These run the `.ad` replay suite in `examples/test-app/replays`. `gesture-lab.ad` verifies `gesture pan`, `gesture fling`, `gesture pinch`, and `gesture rotate` against the gesture metrics rendered by the Home screen on iOS and Android. Android and iOS simulator sessions also support `gesture transform` -for a combined pan/zoom/rotate gesture. +for a combined pan/zoom/rotate gesture. On Android, treat combined transform +assertions as qualitative because recognizers can report non-exact centroid, +scale, and rotation values for one simultaneous two-finger gesture. To target a specific iOS simulator or an installed Expo development build, run the underlying command directly so global flags stay before replay inputs: diff --git a/examples/test-app/src/screens/GestureLab.tsx b/examples/test-app/src/screens/GestureLab.tsx index 4fcaf6c71..53a3863e3 100644 --- a/examples/test-app/src/screens/GestureLab.tsx +++ b/examples/test-app/src/screens/GestureLab.tsx @@ -60,14 +60,23 @@ export function GestureLab() { const pinchRef = useRef(null); const rotationRef = useRef(null); const flingRefs = [flingLeftRef, flingRightRef, flingUpRef, flingDownRef]; + const activeTransformHandlerTagsRef = useRef(new Set()); function updateTransform(nextTransform: TransformState) { transformRef.current = nextTransform; setTransform(nextTransform); } - function beginTransformGesture() { - gestureStartRef.current = transformRef.current; + function beginTransformGesture(handlerTag: number) { + const activeHandlerTags = activeTransformHandlerTagsRef.current; + if (activeHandlerTags.size === 0) { + gestureStartRef.current = transformRef.current; + } + activeHandlerTags.add(handlerTag); + } + + function endTransformGesture(handlerTag: number) { + activeTransformHandlerTagsRef.current.delete(handlerTag); } function handlePan(event: PanGestureHandlerGestureEvent) { @@ -102,7 +111,16 @@ export function GestureLab() { | RotationGestureHandlerStateChangeEvent, ) { if (event.nativeEvent.state === State.BEGAN) { - beginTransformGesture(); + beginTransformGesture(event.nativeEvent.handlerTag); + return; + } + + if ( + event.nativeEvent.state === State.END || + event.nativeEvent.state === State.FAILED || + event.nativeEvent.state === State.CANCELLED + ) { + endTransformGesture(event.nativeEvent.handlerTag); } } @@ -163,6 +181,7 @@ export function GestureLab() { simultaneousHandlers={[panRef, rotationRef, ...flingRefs]} > { let capturedArgs: string[] | undefined; + let capturedOptions: Parameters[1]; const result = await runAndroidMultiTouchHelperGesture({ - adb: async (args) => { + adb: async (args, options) => { capturedArgs = args; + capturedOptions = options; return { exitCode: 0, stdout: [resultRecord({ ok: 'true', kind: 'rotate' }), 'INSTRUMENTATION_CODE: 0'].join( @@ -96,6 +98,41 @@ test('runAndroidMultiTouchHelperGesture encodes protocol payload for instrumenta durationMs: 250, }); assert.equal(capturedArgs.at(-1), manifest.instrumentationRunner); + assert.equal(capturedOptions?.timeoutMs, 45_000); +}); + +test('parseAndroidMultiTouchHelperOutput distinguishes missing final results', () => { + assert.throws(() => parseAndroidMultiTouchHelperOutput('INSTRUMENTATION_CODE: 0'), { + code: 'ANDROID_MULTITOUCH_HELPER_NO_FINAL_RESULT', + message: 'Android multi-touch helper did not return a final result', + }); +}); + +test('runAndroidMultiTouchHelperGesture preserves helper failure messages', async () => { + await assert.rejects( + () => + runAndroidMultiTouchHelperGesture({ + adb: async () => ({ + exitCode: 1, + stdout: [ + resultRecord({ + ok: 'false', + errorType: 'java.lang.IllegalStateException', + message: 'injectInputEvent returned false', + }), + 'INSTRUMENTATION_CODE: 1', + ].join('\n'), + stderr: '', + }), + request: { kind: 'pinch', x: 100, y: 200, scale: 1.5, radius: 120, durationMs: 250 }, + packageName: manifest.packageName, + instrumentationRunner: manifest.instrumentationRunner, + }), + { + code: 'COMMAND_FAILED', + message: 'injectInputEvent returned false', + }, + ); }); test('pinchAndroid, rotateGestureAndroid, and transformGestureAndroid prefer provider-native touch injection', async () => { diff --git a/src/platforms/android/multitouch-helper.ts b/src/platforms/android/multitouch-helper.ts index c939fd6de..69d3c6ac4 100644 --- a/src/platforms/android/multitouch-helper.ts +++ b/src/platforms/android/multitouch-helper.ts @@ -22,12 +22,14 @@ const ANDROID_MULTITOUCH_HELPER_RUNNER = 'com.callstack.agentdevice.multitouchhelper/.MultiTouchInstrumentation'; const ANDROID_MULTITOUCH_HELPER_PROTOCOL = 'android-multitouch-helper-v1'; const ANDROID_MULTITOUCH_HELPER_INSTALL_TIMEOUT_MS = 30_000; -const ANDROID_MULTITOUCH_HELPER_GESTURE_TIMEOUT_MS = 15_000; +const ANDROID_MULTITOUCH_HELPER_GESTURE_TIMEOUT_MS = 45_000; const ANDROID_MULTITOUCH_HELPER_DEFAULT_DURATION_MS = 300; const ANDROID_MULTITOUCH_HELPER_DEFAULT_RADIUS = 160; const ANDROID_MULTITOUCH_HELPER_ROTATE_MAX_DEGREES_PER_FRAME = 3; const ANDROID_MULTITOUCH_HELPER_ROTATE_FRAME_INTERVAL_MS = 16; const ANDROID_MULTITOUCH_HELPER_ROTATE_MAX_DURATION_MS = 2_400; +const ANDROID_MULTITOUCH_HELPER_NO_FINAL_RESULT = 'ANDROID_MULTITOUCH_HELPER_NO_FINAL_RESULT'; +const ANDROID_MULTITOUCH_HELPER_REPORTED_FAILURE = 'ANDROID_MULTITOUCH_HELPER_REPORTED_FAILURE'; type AndroidMultiTouchHelperManifest = { name: 'android-multitouch-helper'; @@ -309,6 +311,14 @@ export async function runAndroidMultiTouchHelperGesture(options: { try { output = parseAndroidMultiTouchHelperOutput(`${result.stdout}\n${result.stderr}`); } catch (error) { + if (error instanceof AppError) { + if (error.code === ANDROID_MULTITOUCH_HELPER_REPORTED_FAILURE) { + throw new AppError('COMMAND_FAILED', error.message, error.details, error); + } + if (error.code !== ANDROID_MULTITOUCH_HELPER_NO_FINAL_RESULT) { + throw error; + } + } throw new AppError( 'COMMAND_FAILED', result.exitCode === 0 @@ -339,15 +349,19 @@ export function parseAndroidMultiTouchHelperOutput(output: string): Record { assert.match(help, /wait for a concrete result before returning to chat\/form state/); assert.match(help, /choose a point near the center of the intended app-owned target/); assert.match(help, /Avoid screen edges, tab bars, navigation bars, and home indicators/); + assert.match(help, /Android transform injects a geometric two-finger path/); + assert.match(help, /verify qualitative state such as "pan changed yes"/); + assert.match(help, /prefer isolated gesture pan, gesture pinch, or gesture rotate/); assert.match(help, /longpress accepts coordinates, @refs, or selectors/); assert.match(help, /use help react-native for Metro\/Fast Refresh/); assert.match(help, /iOS Allow Paste prompt cannot be exercised under XCUITest/); diff --git a/src/utils/command-schema.ts b/src/utils/command-schema.ts index 28710d73c..a557bda99 100644 --- a/src/utils/command-schema.ts +++ b/src/utils/command-schema.ts @@ -324,6 +324,8 @@ Navigation and gestures: agent-device gesture rotate 35 200 420 agent-device gesture transform 200 420 80 -40 2 35 700 iOS simulator transform uses XCTest gesture primitives; verify app metrics instead of assuming requested degrees map exactly to recognizer output. + Android transform injects a geometric two-finger path; app recognizers may report non-exact pan/scale/rotation. For Android combined transforms, verify qualitative state such as "pan changed yes" / "pinch changed yes" / "rotate changed yes" unless the app explicitly promises exact centroid metrics. + If Android needs exact app-state values, prefer isolated gesture pan, gesture pinch, or gesture rotate commands over one combined transform. Validation and evidence: Nearby mutation diff: agent-device diff snapshot -i. diff --git a/test/skillgym/suites/agent-device-smoke-suite.ts b/test/skillgym/suites/agent-device-smoke-suite.ts index a2ab28153..493617877 100644 --- a/test/skillgym/suites/agent-device-smoke-suite.ts +++ b/test/skillgym/suites/agent-device-smoke-suite.ts @@ -1530,14 +1530,25 @@ const SKILL_GUIDANCE_CASES: Case[] = [ 'Zoom scale is 2', 'Rotation is 35 degrees', 'Duration is 700ms', + 'After the command, verify Android changed qualitatively instead of asserting exact x, y, scale, or rotate values', + ], + task: 'Plan the direct agent-device command for the combined pan, zoom, and rotate gesture, then verify qualitative state.', + outputs: [ + plannedCommand('gesture transform'), + /200\s+420\s+80\s+-40\s+2\s+35\s+700/i, + plannedCommand('wait'), + /pan changed yes/i, + /pinch changed yes/i, + /rotate changed yes/i, ], - task: 'Plan the direct agent-device command for the combined pan, zoom, and rotate gesture.', - outputs: [plannedCommand('gesture transform'), /200\s+420\s+80\s+-40\s+2\s+35\s+700/i], forbiddenOutputs: [ plannedCommand('gesture pan'), plannedCommand('gesture pinch'), plannedCommand('gesture rotate'), plannedCommand('compose-gestures'), + /wait\s+["']?x\s/i, + /wait\s+["']?scale\s/i, + /wait\s+["']?rotate\s+\d/i, ], }), makeCase({ diff --git a/website/docs/docs/commands.md b/website/docs/docs/commands.md index 75ad3bc77..c1d922c2a 100644 --- a/website/docs/docs/commands.md +++ b/website/docs/docs/commands.md @@ -281,6 +281,7 @@ On iOS, swipe duration is clamped to a safe range (`16..60ms`) to avoid longpres `gesture rotate` accepts `degrees [x] [y] [velocity]`; the degree sign controls direction and velocity controls speed. `gesture transform` accepts `x y dx dy scale degrees [durationMs]` for one combined pan/zoom/rotate gesture on Android and iOS simulators. On iOS simulators it is implemented with XCTest gesture primitives, so verify app-level metrics instead of assuming the requested degrees map exactly to recognizer output. +On Android, `gesture transform` injects a geometric two-finger path. App recognizers may report non-exact pan, scale, and rotation values, so verify qualitative state such as `pan changed yes`, `pinch changed yes`, and `rotate changed yes` unless the app explicitly promises exact centroid metrics. If exact app-state values matter, prefer isolated `gesture pan`, `gesture pinch`, or `gesture rotate` commands. `scroll` accepts either a relative amount (`0.5` means roughly half of the viewport on that axis) or `--pixels ` for a fixed-distance gesture. Large distances are clamped to the usable drag band so the gesture stays reliable across Android, iOS, and macOS. Default snapshot text output is visible-first, so off-screen interactive content is summarized instead of shown as tappable refs. When a target only appears in an off-screen summary, use `scroll ` and then take a fresh `snapshot -i`. For repeated checks, a small shell loop is enough: