From 864d85ac324427878ec99c88cec1052f24888685 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 1 Apr 2026 06:43:12 +0000 Subject: [PATCH 1/8] feat: add --wait-stable for Android AX tree and --first/--last for find Address the #1 token efficiency issue on Android: stale accessibility trees after navigation. `snapshot --wait-stable ` polls until two consecutive uiautomator dumps match (tree stabilized) or the timeout expires, eliminating retry loops caused by AX lag after screen transitions. Also adds `find --first` / `find --last` flags to disambiguate when multiple elements match, avoiding the fallback to raw snapshot + coordinate taps that burned 4-6 extra tool calls. https://claude.ai/code/session_01FzX5t9qATyT9iWnYjssShC --- src/cli/commands/snapshot.ts | 1 + src/client-normalizers.ts | 1 + src/client-types.ts | 2 ++ src/core/dispatch.ts | 2 ++ src/daemon/context.ts | 2 ++ src/daemon/handlers/find.ts | 8 ++++++- src/platforms/android/snapshot.ts | 24 ++++++++++++++++++++- src/utils/command-schema.ts | 35 ++++++++++++++++++++++++++++--- src/utils/snapshot.ts | 1 + 9 files changed, 71 insertions(+), 5 deletions(-) diff --git a/src/cli/commands/snapshot.ts b/src/cli/commands/snapshot.ts index 6ae1d8872..e7a8e8ee0 100644 --- a/src/cli/commands/snapshot.ts +++ b/src/cli/commands/snapshot.ts @@ -11,6 +11,7 @@ export const snapshotCommand: ClientCommandHandler = async ({ flags, client }) = depth: flags.snapshotDepth, scope: flags.snapshotScope, raw: flags.snapshotRaw, + waitStableMs: flags.snapshotWaitStableMs, }); const data = serializeSnapshotResult(result); writeCommandOutput(flags, data, () => diff --git a/src/client-normalizers.ts b/src/client-normalizers.ts index 100001f6f..614ee1f4c 100644 --- a/src/client-normalizers.ts +++ b/src/client-normalizers.ts @@ -266,6 +266,7 @@ export function buildFlags(options: InternalRequestOptions): CommandFlags { snapshotDepth: options.depth, snapshotScope: options.scope, snapshotRaw: options.raw, + snapshotWaitStableMs: options.waitStableMs, overlayRefs: options.overlayRefs, verbose: options.debug, }) as CommandFlags; diff --git a/src/client-types.ts b/src/client-types.ts index f3e30a98a..c5cac469b 100644 --- a/src/client-types.ts +++ b/src/client-types.ts @@ -247,6 +247,7 @@ export type CaptureSnapshotOptions = AgentDeviceRequestOverrides & depth?: number; scope?: string; raw?: boolean; + waitStableMs?: number; }; export type CaptureSnapshotResult = { @@ -291,6 +292,7 @@ export type InternalRequestOptions = AgentDeviceClientConfig & depth?: number; scope?: string; raw?: boolean; + waitStableMs?: number; installSource?: DaemonInstallSource; retainMaterializedPaths?: boolean; materializedPathRetentionMs?: number; diff --git a/src/core/dispatch.ts b/src/core/dispatch.ts index 720964604..3a5fdc27f 100644 --- a/src/core/dispatch.ts +++ b/src/core/dispatch.ts @@ -63,6 +63,7 @@ export async function dispatchCommand( snapshotDepth?: number; snapshotScope?: string; snapshotRaw?: boolean; + snapshotWaitStableMs?: number; screenshotFullscreen?: boolean; count?: number; intervalMs?: number; @@ -712,6 +713,7 @@ export async function dispatchCommand( depth: context?.snapshotDepth, scope: context?.snapshotScope, raw: context?.snapshotRaw, + waitStableMs: context?.snapshotWaitStableMs, }), { backend: 'android', diff --git a/src/daemon/context.ts b/src/daemon/context.ts index c4a063f94..7e49c8206 100644 --- a/src/daemon/context.ts +++ b/src/daemon/context.ts @@ -15,6 +15,7 @@ export type DaemonCommandContext = { snapshotDepth?: number; snapshotScope?: string; snapshotRaw?: boolean; + snapshotWaitStableMs?: number; screenshotFullscreen?: boolean; count?: number; intervalMs?: number; @@ -51,6 +52,7 @@ export function contextFromFlags( snapshotDepth: flags?.snapshotDepth, snapshotScope: flags?.snapshotScope, snapshotRaw: flags?.snapshotRaw, + snapshotWaitStableMs: flags?.snapshotWaitStableMs, screenshotFullscreen: flags?.screenshotFullscreen, count: flags?.count, intervalMs: flags?.intervalMs, diff --git a/src/daemon/handlers/find.ts b/src/daemon/handlers/find.ts index a804125b2..38e44d5f8 100644 --- a/src/daemon/handlers/find.ts +++ b/src/daemon/handlers/find.ts @@ -122,7 +122,13 @@ export async function handleFindCommands(params: { }); if (requiresRect && bestMatches.matches.length > 1) { - return buildAmbiguousMatchError(bestMatches.matches, locator, query); + if (req.flags?.findFirst) { + bestMatches.matches.length = 1; + } else if (req.flags?.findLast) { + bestMatches.matches = [bestMatches.matches[bestMatches.matches.length - 1]]; + } else { + return buildAmbiguousMatchError(bestMatches.matches, locator, query); + } } const node = bestMatches.matches[0] ?? null; diff --git a/src/platforms/android/snapshot.ts b/src/platforms/android/snapshot.ts index 18bf81b5a..79c7f8b7a 100644 --- a/src/platforms/android/snapshot.ts +++ b/src/platforms/android/snapshot.ts @@ -14,10 +14,32 @@ export async function snapshotAndroid( truncated?: boolean; analysis: AndroidSnapshotAnalysis; }> { - const xml = await dumpUiHierarchy(device); + const xml = options.waitStableMs + ? await dumpUiHierarchyStable(device, options.waitStableMs) + : await dumpUiHierarchy(device); return parseUiHierarchy(xml, 800, options); } +/** + * Poll until the AX tree stabilizes: two consecutive dumps produce identical XML, + * or the timeout is reached. Returns the last captured XML. + */ +async function dumpUiHierarchyStable(device: DeviceInfo, timeoutMs: number): Promise { + const POLL_INTERVAL = 200; + const start = Date.now(); + let previousXml = await dumpUiHierarchy(device); + while (Date.now() - start < timeoutMs) { + await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL)); + const currentXml = await dumpUiHierarchy(device); + if (currentXml === previousXml) { + return currentXml; + } + previousXml = currentXml; + } + // Timeout reached — return the last dump even though it may still be changing. + return previousXml; +} + export async function dumpUiHierarchy(device: DeviceInfo): Promise { return withRetry(() => dumpUiHierarchyOnce(device), { shouldRetry: isRetryableAdbError, diff --git a/src/utils/command-schema.ts b/src/utils/command-schema.ts index dc275927d..82d4403cb 100644 --- a/src/utils/command-schema.ts +++ b/src/utils/command-schema.ts @@ -53,6 +53,7 @@ export type CliFlags = { snapshotDepth?: number; snapshotScope?: string; snapshotRaw?: boolean; + snapshotWaitStableMs?: number; networkInclude?: 'summary' | 'headers' | 'body' | 'all'; overlayRefs?: boolean; screenshotFullscreen?: boolean; @@ -92,6 +93,8 @@ export type CliFlags = { reportJunit?: string; steps?: string; stepsFile?: string; + findFirst?: boolean; + findLast?: boolean; batchOnError?: 'stop'; batchMaxSteps?: number; batchSteps?: Array<{ @@ -137,6 +140,7 @@ const SNAPSHOT_FLAGS = [ 'snapshotDepth', 'snapshotScope', 'snapshotRaw', + 'snapshotWaitStableMs', ] as const satisfies readonly FlagKey[]; const SELECTOR_SNAPSHOT_FLAGS = [ @@ -146,6 +150,7 @@ const SELECTOR_SNAPSHOT_FLAGS = [ ] as const satisfies readonly FlagKey[]; const FIND_SNAPSHOT_FLAGS = ['snapshotDepth', 'snapshotRaw'] as const satisfies readonly FlagKey[]; +const FIND_DISAMBIGUATE_FLAGS = ['findFirst', 'findLast'] as const satisfies readonly FlagKey[]; const AGENT_SKILLS = [ { label: 'agent-device', description: 'Canonical mobile automation flows' }, @@ -867,6 +872,30 @@ const FLAG_DEFINITIONS: readonly FlagDefinition[] = [ usageLabel: '--raw', usageDescription: 'Snapshot: raw node output', }, + { + key: 'snapshotWaitStableMs', + names: ['--wait-stable'], + type: 'int', + min: 0, + max: 10000, + usageLabel: '--wait-stable ', + usageDescription: + 'Snapshot: poll until AX tree stabilizes (Android). Retries until two consecutive dumps match or timeout.', + }, + { + key: 'findFirst', + names: ['--first'], + type: 'boolean', + usageLabel: '--first', + usageDescription: 'Find: pick the first match when ambiguous', + }, + { + key: 'findLast', + names: ['--last'], + type: 'boolean', + usageLabel: '--last', + usageDescription: 'Find: pick the last match when ambiguous', + }, { key: 'out', names: ['--out'], @@ -980,7 +1009,7 @@ const COMMAND_SCHEMAS: Record = { allowedFlags: [], }, snapshot: { - usageOverride: 'snapshot [--diff] [-i] [-c] [-d ] [-s ] [--raw]', + usageOverride: 'snapshot [--diff] [-i] [-c] [-d ] [-s ] [--raw] [--wait-stable ]', helpDescription: 'Capture accessibility tree or diff against the previous session baseline', positionalArgs: [], allowedFlags: ['snapshotDiff', ...SNAPSHOT_FLAGS], @@ -1267,12 +1296,12 @@ const COMMAND_SCHEMAS: Record = { allowedFlags: ['networkInclude'], }, find: { - usageOverride: 'find [value]', + usageOverride: 'find [value] [--first|--last]', helpDescription: 'Find by text/label/value/role/id and run action', summary: 'Find an element and act', positionalArgs: ['query', 'action', 'value?'], allowsExtraPositionals: true, - allowedFlags: [...FIND_SNAPSHOT_FLAGS], + allowedFlags: [...FIND_SNAPSHOT_FLAGS, ...FIND_DISAMBIGUATE_FLAGS], }, is: { helpDescription: 'Assert UI state (visible|hidden|exists|editable|selected|text)', diff --git a/src/utils/snapshot.ts b/src/utils/snapshot.ts index 40ed70732..2f78c8208 100644 --- a/src/utils/snapshot.ts +++ b/src/utils/snapshot.ts @@ -16,6 +16,7 @@ export type SnapshotOptions = { depth?: number; scope?: string; raw?: boolean; + waitStableMs?: number; }; export type RawSnapshotNode = { From daf1668c43952d6be5ec797f453f26d4a12c5607 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 1 Apr 2026 07:00:48 +0000 Subject: [PATCH 2/8] cleanup: simplify find disambiguation and tighten flag descriptions - Use consistent array reassignment for --first/--last instead of .length mutation - Inline FIND_DISAMBIGUATE_FLAGS (only 2 entries, no reuse) - Shorten --wait-stable usage description https://claude.ai/code/session_01FzX5t9qATyT9iWnYjssShC --- src/daemon/handlers/find.ts | 2 +- src/utils/command-schema.ts | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/daemon/handlers/find.ts b/src/daemon/handlers/find.ts index 38e44d5f8..d9c83f3d8 100644 --- a/src/daemon/handlers/find.ts +++ b/src/daemon/handlers/find.ts @@ -123,7 +123,7 @@ export async function handleFindCommands(params: { if (requiresRect && bestMatches.matches.length > 1) { if (req.flags?.findFirst) { - bestMatches.matches.length = 1; + bestMatches.matches = [bestMatches.matches[0]]; } else if (req.flags?.findLast) { bestMatches.matches = [bestMatches.matches[bestMatches.matches.length - 1]]; } else { diff --git a/src/utils/command-schema.ts b/src/utils/command-schema.ts index 82d4403cb..d15c9b68d 100644 --- a/src/utils/command-schema.ts +++ b/src/utils/command-schema.ts @@ -150,7 +150,6 @@ const SELECTOR_SNAPSHOT_FLAGS = [ ] as const satisfies readonly FlagKey[]; const FIND_SNAPSHOT_FLAGS = ['snapshotDepth', 'snapshotRaw'] as const satisfies readonly FlagKey[]; -const FIND_DISAMBIGUATE_FLAGS = ['findFirst', 'findLast'] as const satisfies readonly FlagKey[]; const AGENT_SKILLS = [ { label: 'agent-device', description: 'Canonical mobile automation flows' }, @@ -879,8 +878,7 @@ const FLAG_DEFINITIONS: readonly FlagDefinition[] = [ min: 0, max: 10000, usageLabel: '--wait-stable ', - usageDescription: - 'Snapshot: poll until AX tree stabilizes (Android). Retries until two consecutive dumps match or timeout.', + usageDescription: 'Snapshot: wait for AX tree to stabilize (Android)', }, { key: 'findFirst', @@ -1301,7 +1299,7 @@ const COMMAND_SCHEMAS: Record = { summary: 'Find an element and act', positionalArgs: ['query', 'action', 'value?'], allowsExtraPositionals: true, - allowedFlags: [...FIND_SNAPSHOT_FLAGS, ...FIND_DISAMBIGUATE_FLAGS], + allowedFlags: [...FIND_SNAPSHOT_FLAGS, 'findFirst', 'findLast'], }, is: { helpDescription: 'Assert UI state (visible|hidden|exists|editable|selected|text)', From ef39621b5e2447884b58d520ea25c51a4cbf74ba Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 1 Apr 2026 07:25:54 +0000 Subject: [PATCH 3/8] docs: add flag plumbing checklist and --wait-stable/--first/--last guidance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a 7-step checklist to AGENTS.md for adding new CLI flags — this was the main non-obvious cost in the implementation (7 files for one flag). Update exploration skill to recommend --wait-stable for Android AX staleness recovery and --first/--last for ambiguous find matches. https://claude.ai/code/session_01FzX5t9qATyT9iWnYjssShC --- AGENTS.md | 14 ++++++++++++++ skills/agent-device/references/exploration.md | 5 +++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 7df14db64..d6a92fa7e 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -84,6 +84,20 @@ Minimal operating guide for AI coding agents in this repo. - `runner-transport.ts` must not import back from `runner-client.ts`. - If changing runner connect errors, retry policy, or command typing, start in `src/platforms/ios/runner-contract.ts` before touching client/transport files. +## Adding a New CLI Flag + +A new snapshot/command flag touches up to 7 files in a fixed order. Follow this checklist: + +1. `src/utils/command-schema.ts`: add to `CliFlags` type, `FLAG_DEFINITIONS` array, and the relevant `*_FLAGS` constant (e.g. `SNAPSHOT_FLAGS`). Update the command's `usageOverride` string. +2. `src/utils/snapshot.ts` (or the relevant options type): add to `SnapshotOptions` or equivalent. +3. `src/client-types.ts`: add to `CaptureSnapshotOptions` (or equivalent public options type) **and** `InternalRequestOptions`. +4. `src/client-normalizers.ts`: map the public option name to the internal flag name in `buildFlags`. +5. `src/daemon/context.ts`: add to `DaemonCommandContext` type and `contextFromFlags` function. +6. `src/core/dispatch.ts`: add to the inline context type on `dispatchCommand` and thread it to the platform call. +7. `src/cli/commands/.ts`: pass the flag from `flags.*` to the client call. + +Command-only flags (like `find --first`) that don't flow to the platform layer only need steps 1 and the handler file. + ## Hard Rules - Use `runCmd`/`runCmdSync` from `src/utils/exec.ts` for process execution. - Use daemon session flow for interactions (`open` before interactions, `close` after). diff --git a/skills/agent-device/references/exploration.md b/skills/agent-device/references/exploration.md index db8aaec09..9493badda 100644 --- a/skills/agent-device/references/exploration.md +++ b/skills/agent-device/references/exploration.md @@ -46,7 +46,7 @@ Open this file when the app or screen is already running and you need to discove Do not treat `@ref` values as durable after navigation or dynamic updates. Re-snapshot after the UI changes, and switch to selectors when the flow must stay stable. -On Android after submits, route changes, or composer transitions, the accessibility tree can lag behind the visible UI for a short window. If `snapshot -i` and `screenshot` disagree, trust the screenshot as the visual source of truth, wait briefly, then take one fresh snapshot instead of looping snapshots immediately. +On Android after submits, route changes, or composer transitions, the accessibility tree can lag behind the visible UI for a short window. If `snapshot -i` and `screenshot` disagree, trust the screenshot as the visual source of truth, wait briefly, then take one fresh snapshot instead of looping snapshots immediately. Use `snapshot -i --wait-stable 2000` to let the command itself poll until the tree stabilizes, which avoids manual wait-then-retry loops. In React Native dev or debug builds, do not ignore visible warning or error overlays. They can block taps, change the focused element, or hide the real UI state. Check for them near app open and after major transitions. @@ -173,6 +173,7 @@ Use this rule of thumb: - Use `is` for assertions. - Use `wait` when the UI needs time to settle after a mutation. - Use `find "" click --json` when you need search-driven targeting plus matched-target metadata. +- Use `find "" click --first` or `--last` when ambiguous matches are expected and you want the first or last occurrence without falling back to raw coordinates. - If you are forced onto raw coordinates, open [coordinate-system.md](coordinate-system.md) first. Example: @@ -211,7 +212,7 @@ Avoid this escalation path for visible-text questions: - Do not jump from `snapshot -i` to `get text @ref`, then to web search, then to typing into a search box just to force the app to reveal the answer. - Start with `snapshot`. If the text is not visible or exposed, report that directly. -- After Android submit or navigation-heavy actions, prefer this recovery order when the UI looks wrong: `screenshot`, short `wait`, one fresh `snapshot -i`. +- After Android submit or navigation-heavy actions, prefer this recovery order when the UI looks wrong: `screenshot`, then `snapshot -i --wait-stable 2000` to let the tree settle, or short `wait` then one fresh `snapshot -i`. Canonical QA loop: From 45ea091267a30b2c57fbb61831ada5c61a80f3a5 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 1 Apr 2026 07:28:38 +0000 Subject: [PATCH 4/8] docs: improve skill clarity for weaker models - Split "Most common mistake" into three labeled subsections (stale refs, Android AX lag, React Native overlays) so each is scannable independently - Convert Android AX recovery from prose into a numbered list - Add --wait-stable to debugging.md for consistency with exploration.md - Replace vague "meaningful UI changes" with concrete triggers - Tighten QA recovery sentence that was getting unwieldy https://claude.ai/code/session_01FzX5t9qATyT9iWnYjssShC --- skills/agent-device/SKILL.md | 2 +- skills/agent-device/references/debugging.md | 2 +- skills/agent-device/references/exploration.md | 20 ++++++++++--------- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/skills/agent-device/SKILL.md b/skills/agent-device/SKILL.md index c9a8a48f3..12c2fb719 100644 --- a/skills/agent-device/SKILL.md +++ b/skills/agent-device/SKILL.md @@ -17,7 +17,7 @@ Use this skill as a router with mandatory defaults. Read this file first. For no - Avoid speculative mutations. You may take the smallest reversible UI action needed to unblock inspection or complete the requested task, such as dismissing a popup, closing an alert, or clearing an unintended surface. - In React Native dev or debug builds, check early for visible warning or error overlays, tooltips, and toasts that can steal focus or intercept taps. If they are not part of the requested behavior, dismiss them and continue. If you saw them, report them in the final summary. - Do not browse the web or use external sources unless the user explicitly asks. -- Re-snapshot after meaningful UI changes instead of reusing stale refs. +- Re-snapshot after navigation, taps, or form submits instead of reusing stale refs. - Prefer `@ref` or selector targeting over raw coordinates. - Ensure the correct target is pinned and an app session is open before interacting. - Keep the loop short: `open` -> inspect/act -> verify if needed -> `close`. diff --git a/skills/agent-device/references/debugging.md b/skills/agent-device/references/debugging.md index 47f6ad188..93ad24580 100644 --- a/skills/agent-device/references/debugging.md +++ b/skills/agent-device/references/debugging.md @@ -105,7 +105,7 @@ agent-device alert accept - `snapshot` returns 0 nodes: the app may no longer be foregrounded or the UI is not stable yet. Re-open the app or retry when state settles. - Logs are empty: confirm you opened an app session before `logs clear --restart`. - Android logs look stale after relaunch: retry the repro window after the process rebinds. -- Android accessibility snapshots can lag behind visible screen transitions. If the tree looks stale after navigation, capture a `screenshot`, wait briefly, then re-run `snapshot -i`. +- Android accessibility snapshots can lag behind visible screen transitions. If the tree looks stale after navigation, use `snapshot -i --wait-stable 2000`, or capture a `screenshot`, wait briefly, then re-run `snapshot -i`. - React Native dev warnings or errors keep reappearing: treat them as part of the app state, not as disposable chrome. Capture one clean repro and include them in the summary. - Permission prompts block the flow: wait for the alert and handle it explicitly. - If snapshots keep returning 0 nodes on an iOS simulator, restart Simulator and re-open the app. diff --git a/skills/agent-device/references/exploration.md b/skills/agent-device/references/exploration.md index 9493badda..a53f50b96 100644 --- a/skills/agent-device/references/exploration.md +++ b/skills/agent-device/references/exploration.md @@ -42,19 +42,21 @@ Open this file when the app or screen is already running and you need to discove - `wait` - `keyboard dismiss` when the keyboard obscures the next target -## Most common mistake to avoid +## Common mistakes to avoid -Do not treat `@ref` values as durable after navigation or dynamic updates. Re-snapshot after the UI changes, and switch to selectors when the flow must stay stable. +**Stale refs.** Do not treat `@ref` values as durable after navigation or dynamic updates. Re-snapshot after the UI changes, and switch to selectors when the flow must stay stable. -On Android after submits, route changes, or composer transitions, the accessibility tree can lag behind the visible UI for a short window. If `snapshot -i` and `screenshot` disagree, trust the screenshot as the visual source of truth, wait briefly, then take one fresh snapshot instead of looping snapshots immediately. Use `snapshot -i --wait-stable 2000` to let the command itself poll until the tree stabilizes, which avoids manual wait-then-retry loops. +**Android AX tree lag.** After submits, route changes, or composer transitions, the accessibility tree can lag behind the visible UI. If `snapshot -i` and `screenshot` disagree: -In React Native dev or debug builds, do not ignore visible warning or error overlays. They can block taps, change the focused element, or hide the real UI state. Check for them near app open and after major transitions. +1. Trust the screenshot as visual truth. +2. Use `snapshot -i --wait-stable 2000` to let the command poll until the tree settles. +3. If you cannot use `--wait-stable`, wait briefly, then take one fresh snapshot. Do not loop snapshots immediately. -Default rule: +**React Native dev overlays.** In dev or debug builds, warning or error overlays can block taps, change focus, or hide the real UI. Check for them near app open and after major transitions. -- If the overlay is not part of the requested behavior, dismiss it and continue. -- If it is blocking, recurring, or likely related to the task, switch to [debugging.md](debugging.md) and collect a short evidence window. -- If you saw a visible warning or error at any point, mention it in the final summary even if you dismissed it. +- Not blocking the task: dismiss and continue. +- Blocking or recurring: switch to [debugging.md](debugging.md) and collect evidence. +- Seen at any point: mention in the final summary even if dismissed. ## Common example loops @@ -212,7 +214,7 @@ Avoid this escalation path for visible-text questions: - Do not jump from `snapshot -i` to `get text @ref`, then to web search, then to typing into a search box just to force the app to reveal the answer. - Start with `snapshot`. If the text is not visible or exposed, report that directly. -- After Android submit or navigation-heavy actions, prefer this recovery order when the UI looks wrong: `screenshot`, then `snapshot -i --wait-stable 2000` to let the tree settle, or short `wait` then one fresh `snapshot -i`. +- After Android submit or navigation-heavy actions when the UI looks wrong: `screenshot` first, then `snapshot -i --wait-stable 2000`. Canonical QA loop: From 94813debc87468cf2b04aaab7b81764e8937bf87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Wed, 1 Apr 2026 11:27:41 +0200 Subject: [PATCH 5/8] fix: make Android snapshot freshness recovery internal --- skills/agent-device/references/debugging.md | 2 +- skills/agent-device/references/exploration.md | 6 +- src/cli/commands/snapshot.ts | 1 - src/client-normalizers.ts | 1 - src/client-types.ts | 2 - src/core/dispatch.ts | 2 - src/daemon/android-snapshot-freshness.ts | 100 +++++++++ src/daemon/context.ts | 2 - src/daemon/handlers/__tests__/find.test.ts | 114 +++++++++- .../__tests__/snapshot-handler.test.ts | 206 +++++++++++++++--- src/daemon/handlers/find.ts | 10 +- src/daemon/handlers/interaction-common.ts | 7 + src/daemon/handlers/session-open.ts | 2 + src/daemon/handlers/snapshot-capture.ts | 107 ++++++++- src/daemon/handlers/snapshot-wait.ts | 14 +- src/daemon/handlers/snapshot.ts | 73 +------ src/daemon/request-router.ts | 7 + src/daemon/types.ts | 2 + src/platforms/android/snapshot.ts | 24 +- src/utils/command-schema.ts | 13 +- src/utils/snapshot.ts | 1 - 21 files changed, 540 insertions(+), 156 deletions(-) create mode 100644 src/daemon/android-snapshot-freshness.ts diff --git a/skills/agent-device/references/debugging.md b/skills/agent-device/references/debugging.md index 93ad24580..4c962eb53 100644 --- a/skills/agent-device/references/debugging.md +++ b/skills/agent-device/references/debugging.md @@ -105,7 +105,7 @@ agent-device alert accept - `snapshot` returns 0 nodes: the app may no longer be foregrounded or the UI is not stable yet. Re-open the app or retry when state settles. - Logs are empty: confirm you opened an app session before `logs clear --restart`. - Android logs look stale after relaunch: retry the repro window after the process rebinds. -- Android accessibility snapshots can lag behind visible screen transitions. If the tree looks stale after navigation, use `snapshot -i --wait-stable 2000`, or capture a `screenshot`, wait briefly, then re-run `snapshot -i`. +- Android accessibility snapshots can lag behind visible screen transitions. The next snapshot now retries briefly after navigation-sensitive actions, but if the tree still looks stale, use `screenshot` as visual truth, wait briefly, then re-run `snapshot -i`. - React Native dev warnings or errors keep reappearing: treat them as part of the app state, not as disposable chrome. Capture one clean repro and include them in the summary. - Permission prompts block the flow: wait for the alert and handle it explicitly. - If snapshots keep returning 0 nodes on an iOS simulator, restart Simulator and re-open the app. diff --git a/skills/agent-device/references/exploration.md b/skills/agent-device/references/exploration.md index a53f50b96..08a0c537d 100644 --- a/skills/agent-device/references/exploration.md +++ b/skills/agent-device/references/exploration.md @@ -49,8 +49,8 @@ Open this file when the app or screen is already running and you need to discove **Android AX tree lag.** After submits, route changes, or composer transitions, the accessibility tree can lag behind the visible UI. If `snapshot -i` and `screenshot` disagree: 1. Trust the screenshot as visual truth. -2. Use `snapshot -i --wait-stable 2000` to let the command poll until the tree settles. -3. If you cannot use `--wait-stable`, wait briefly, then take one fresh snapshot. Do not loop snapshots immediately. +2. Take one fresh `snapshot -i`. Android retries briefly after navigation-sensitive actions. +3. If the tree still disagrees with the screenshot, wait briefly, then take one more fresh snapshot. Do not loop snapshots immediately. **React Native dev overlays.** In dev or debug builds, warning or error overlays can block taps, change focus, or hide the real UI. Check for them near app open and after major transitions. @@ -214,7 +214,7 @@ Avoid this escalation path for visible-text questions: - Do not jump from `snapshot -i` to `get text @ref`, then to web search, then to typing into a search box just to force the app to reveal the answer. - Start with `snapshot`. If the text is not visible or exposed, report that directly. -- After Android submit or navigation-heavy actions when the UI looks wrong: `screenshot` first, then `snapshot -i --wait-stable 2000`. +- After Android submit or navigation-heavy actions when the UI looks wrong: `screenshot` first, then `snapshot -i`. Canonical QA loop: diff --git a/src/cli/commands/snapshot.ts b/src/cli/commands/snapshot.ts index e7a8e8ee0..6ae1d8872 100644 --- a/src/cli/commands/snapshot.ts +++ b/src/cli/commands/snapshot.ts @@ -11,7 +11,6 @@ export const snapshotCommand: ClientCommandHandler = async ({ flags, client }) = depth: flags.snapshotDepth, scope: flags.snapshotScope, raw: flags.snapshotRaw, - waitStableMs: flags.snapshotWaitStableMs, }); const data = serializeSnapshotResult(result); writeCommandOutput(flags, data, () => diff --git a/src/client-normalizers.ts b/src/client-normalizers.ts index 614ee1f4c..100001f6f 100644 --- a/src/client-normalizers.ts +++ b/src/client-normalizers.ts @@ -266,7 +266,6 @@ export function buildFlags(options: InternalRequestOptions): CommandFlags { snapshotDepth: options.depth, snapshotScope: options.scope, snapshotRaw: options.raw, - snapshotWaitStableMs: options.waitStableMs, overlayRefs: options.overlayRefs, verbose: options.debug, }) as CommandFlags; diff --git a/src/client-types.ts b/src/client-types.ts index c5cac469b..f3e30a98a 100644 --- a/src/client-types.ts +++ b/src/client-types.ts @@ -247,7 +247,6 @@ export type CaptureSnapshotOptions = AgentDeviceRequestOverrides & depth?: number; scope?: string; raw?: boolean; - waitStableMs?: number; }; export type CaptureSnapshotResult = { @@ -292,7 +291,6 @@ export type InternalRequestOptions = AgentDeviceClientConfig & depth?: number; scope?: string; raw?: boolean; - waitStableMs?: number; installSource?: DaemonInstallSource; retainMaterializedPaths?: boolean; materializedPathRetentionMs?: number; diff --git a/src/core/dispatch.ts b/src/core/dispatch.ts index 3a5fdc27f..720964604 100644 --- a/src/core/dispatch.ts +++ b/src/core/dispatch.ts @@ -63,7 +63,6 @@ export async function dispatchCommand( snapshotDepth?: number; snapshotScope?: string; snapshotRaw?: boolean; - snapshotWaitStableMs?: number; screenshotFullscreen?: boolean; count?: number; intervalMs?: number; @@ -713,7 +712,6 @@ export async function dispatchCommand( depth: context?.snapshotDepth, scope: context?.snapshotScope, raw: context?.snapshotRaw, - waitStableMs: context?.snapshotWaitStableMs, }), { backend: 'android', diff --git a/src/daemon/android-snapshot-freshness.ts b/src/daemon/android-snapshot-freshness.ts new file mode 100644 index 000000000..ef7f732f3 --- /dev/null +++ b/src/daemon/android-snapshot-freshness.ts @@ -0,0 +1,100 @@ +import type { SnapshotState } from '../utils/snapshot.ts'; +import type { SessionState } from './types.ts'; + +const ANDROID_FRESHNESS_WINDOW_MS = 2_500; + +export const ANDROID_FRESHNESS_RETRY_DELAYS_MS = [250, 400] as const; + +export type AndroidSnapshotFreshness = { + action: string; + markedAt: number; + baselineCount: number; + baselineSignatures: string[]; +}; + +export type AndroidFreshnessCaptureMeta = { + action: string; + retryCount: number; + staleAfterRetries: boolean; +}; + +export function markAndroidSnapshotFreshness(session: SessionState, action: string): void { + if (session.device.platform !== 'android') return; + session.androidSnapshotFreshness = { + action, + markedAt: Date.now(), + baselineCount: session.snapshot?.nodes.length ?? 0, + baselineSignatures: buildSnapshotSignatures(session.snapshot?.nodes ?? []), + }; +} + +export function getActiveAndroidSnapshotFreshness( + session: SessionState | undefined, +): AndroidSnapshotFreshness | undefined { + if (!session || session.device.platform !== 'android') return undefined; + const freshness = session.androidSnapshotFreshness; + if (!freshness) return undefined; + if (Date.now() - freshness.markedAt > ANDROID_FRESHNESS_WINDOW_MS) { + delete session.androidSnapshotFreshness; + return undefined; + } + return freshness; +} + +export function clearAndroidSnapshotFreshness(session: SessionState | undefined): void { + if (!session || session.device.platform !== 'android') return; + delete session.androidSnapshotFreshness; +} + +export function isNavigationSensitiveAction(command: string): boolean { + return command === 'press' || command === 'click' || command === 'back' || command === 'open'; +} + +export function buildSnapshotSignatures(nodes: SnapshotState['nodes']): string[] { + return nodes.map((node) => + [ + node.depth ?? 0, + node.type ?? '', + node.role ?? '', + node.label ?? '', + node.value ?? '', + node.identifier ?? '', + node.enabled === false ? 'disabled' : 'enabled', + node.selected === true ? 'selected' : 'unselected', + node.hittable === true ? 'hittable' : 'not-hittable', + ].join('|'), + ); +} + +export function isLikelyStaleSnapshotDrop(previousCount: number, currentCount: number): boolean { + if (previousCount < 12) { + return false; + } + return currentCount <= Math.floor(previousCount * 0.2); +} + +export function isLikelySnapshotStuckOnPreviousRoute( + previousSignatures: string[], + currentNodes: SnapshotState['nodes'], +): boolean { + const total = Math.max(previousSignatures.length, currentNodes.length); + if (total < 12) { + return false; + } + const currentSignatures = buildSnapshotSignatures(currentNodes); + const comparableLength = Math.min(previousSignatures.length, currentSignatures.length); + let unchanged = 0; + for (let index = 0; index < comparableLength; index += 1) { + if (previousSignatures[index] === currentSignatures[index]) { + unchanged += 1; + } + } + const additions = Math.max(0, currentSignatures.length - previousSignatures.length); + const removals = Math.max(0, previousSignatures.length - currentSignatures.length); + const toleratedDelta = Math.max(3, Math.floor(total * 0.15)); + return ( + unchanged >= Math.floor(total * 0.9) && + additions <= toleratedDelta && + removals <= toleratedDelta + ); +} diff --git a/src/daemon/context.ts b/src/daemon/context.ts index 7e49c8206..c4a063f94 100644 --- a/src/daemon/context.ts +++ b/src/daemon/context.ts @@ -15,7 +15,6 @@ export type DaemonCommandContext = { snapshotDepth?: number; snapshotScope?: string; snapshotRaw?: boolean; - snapshotWaitStableMs?: number; screenshotFullscreen?: boolean; count?: number; intervalMs?: number; @@ -52,7 +51,6 @@ export function contextFromFlags( snapshotDepth: flags?.snapshotDepth, snapshotScope: flags?.snapshotScope, snapshotRaw: flags?.snapshotRaw, - snapshotWaitStableMs: flags?.snapshotWaitStableMs, screenshotFullscreen: flags?.screenshotFullscreen, count: flags?.count, intervalMs: flags?.intervalMs, diff --git a/src/daemon/handlers/__tests__/find.test.ts b/src/daemon/handlers/__tests__/find.test.ts index c7a78a508..c370479b2 100644 --- a/src/daemon/handlers/__tests__/find.test.ts +++ b/src/daemon/handlers/__tests__/find.test.ts @@ -1,4 +1,4 @@ -import { test, expect, vi } from 'vitest'; +import { test, expect, vi, beforeEach } from 'vitest'; import fs from 'node:fs'; import os from 'node:os'; import path from 'node:path'; @@ -6,6 +6,7 @@ import { parseFindArgs, handleFindCommands } from '../find.ts'; import { SessionStore } from '../../session-store.ts'; import type { DaemonRequest, DaemonResponse, SessionState } from '../../types.ts'; import { withMockedMacOsHelper } from '../../../platforms/ios/__tests__/macos-helper-test-utils.ts'; +import { buildSnapshotSignatures } from '../../android-snapshot-freshness.ts'; vi.mock('../../../core/dispatch.ts', async (importOriginal) => { const actual = await importOriginal(); @@ -22,6 +23,13 @@ import { dispatchCommand } from '../../../core/dispatch.ts'; const mockDispatch = vi.mocked(dispatchCommand); +beforeEach(() => { + mockDispatch.mockReset(); + mockDispatch.mockImplementation(async (_device: unknown, command: string) => { + return command === 'snapshot' ? { nodes: [] } : {}; + }); +}); + function makeSessionStore(): SessionStore { const root = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-device-find-handler-')); return new SessionStore(path.join(root, 'sessions')); @@ -68,7 +76,9 @@ const INCREMENT_NODE = { async function runFindClickScenario(options: { positionals: string[]; - nodes: Array>; + nodes?: Array>; + flags?: DaemonRequest['flags']; + session?: SessionState; invoke?: (req: DaemonRequest) => Promise>; }): Promise<{ response: NonNullable>>; @@ -76,14 +86,16 @@ async function runFindClickScenario(options: { }> { const sessionStore = makeSessionStore(); const sessionName = 'default'; - sessionStore.set(sessionName, makeSession(sessionName)); + sessionStore.set(sessionName, options.session ?? makeSession(sessionName)); - mockDispatch.mockImplementation(async (_device, command) => { - if (command === 'snapshot') { - return { nodes: options.nodes }; - } - return {}; - }); + if (options.nodes !== undefined) { + mockDispatch.mockImplementation(async (_device, command) => { + if (command === 'snapshot') { + return { nodes: options.nodes }; + } + return {}; + }); + } const invokeCalls: DaemonRequest[] = []; const response = await handleFindCommands({ @@ -92,7 +104,7 @@ async function runFindClickScenario(options: { session: sessionName, command: 'find', positionals: options.positionals, - flags: {}, + flags: options.flags ?? {}, }, sessionName, logPath: '/tmp/test.log', @@ -201,6 +213,20 @@ test('parseFindArgs with bare locator yields empty query', () => { expect(parsed.action).toBe('click'); }); +test('handleFindCommands rejects --first with --last', async () => { + const { response } = await runFindClickScenario({ + positionals: ['Increment', 'click'], + nodes: [INCREMENT_NODE], + flags: { findFirst: true, findLast: true }, + }); + + expect(response.ok).toBe(false); + if (!response.ok) { + expect(response.error.code).toBe('INVALID_ARGS'); + expect(response.error.message).toContain('only one of --first or --last'); + } +}); + test('handleFindCommands click returns deterministic metadata across locator variants', async () => { const hittableParentNoRect = { index: 0, type: 'View', hittable: true, depth: 0 }; const nonHittableChildWithRect = { @@ -267,6 +293,74 @@ test('handleFindCommands click returns deterministic metadata across locator var } }); +test('handleFindCommands wait bypasses snapshot cache while Android freshness recovery is active', async () => { + const sessionName = 'android-find-wait'; + const session: SessionState = { + name: sessionName, + device: { + platform: 'android', + id: 'emulator-5554', + name: 'Pixel 9 Pro XL', + kind: 'emulator', + target: 'mobile', + booted: true, + }, + createdAt: Date.now(), + actions: [], + }; + const baselineNodes = Array.from({ length: 16 }, (_, index) => ({ + ref: `e${index + 1}`, + index, + depth: 0, + type: 'android.widget.TextView', + label: `Inbox row ${index + 1}`, + })); + session.snapshot = { + nodes: baselineNodes, + createdAt: Date.now(), + backend: 'android', + }; + session.androidSnapshotFreshness = { + action: 'press', + markedAt: Date.now(), + baselineCount: baselineNodes.length, + baselineSignatures: buildSnapshotSignatures(baselineNodes), + }; + + mockDispatch + .mockResolvedValueOnce({ + nodes: Array.from({ length: 16 }, (_, index) => ({ + index, + depth: 0, + type: 'android.widget.TextView', + label: `Inbox row ${index + 1}`, + })), + truncated: false, + backend: 'android', + analysis: { rawNodeCount: 16, maxDepth: 1 }, + }) + .mockResolvedValueOnce({ + nodes: [ + { index: 0, depth: 0, type: 'android.widget.TextView', label: 'Create expense' }, + { index: 1, depth: 0, type: 'android.widget.Button', label: 'Submit', hittable: true }, + ], + truncated: false, + backend: 'android', + analysis: { rawNodeCount: 2, maxDepth: 1 }, + }); + + const { response } = await runFindClickScenario({ + positionals: ['text', 'Create expense', 'wait', '700'], + session, + }); + + expect(response.ok).toBe(true); + if (response.ok) { + expect(response.data?.found).toBe(true); + } + expect(mockDispatch).toHaveBeenCalledTimes(2); +}); + test('handleFindCommands uses helper-backed snapshots for macOS desktop sessions', async () => { await withMockedMacOsHelper( [ diff --git a/src/daemon/handlers/__tests__/snapshot-handler.test.ts b/src/daemon/handlers/__tests__/snapshot-handler.test.ts index 3c0b08a21..24090a286 100644 --- a/src/daemon/handlers/__tests__/snapshot-handler.test.ts +++ b/src/daemon/handlers/__tests__/snapshot-handler.test.ts @@ -7,6 +7,7 @@ import { SessionStore } from '../../session-store.ts'; import type { SessionState } from '../../types.ts'; import { AppError } from '../../../utils/errors.ts'; import { withMockedMacOsHelper } from '../../../platforms/ios/__tests__/macos-helper-test-utils.ts'; +import { buildSnapshotSignatures } from '../../android-snapshot-freshness.ts'; vi.mock('../../../core/dispatch.ts', async (importOriginal) => { const actual = await importOriginal(); @@ -197,27 +198,98 @@ test('snapshot warns when recent snapshot node count collapses sharply', async ( } }); -test('snapshot warns when a recent Android press is followed by a nearly identical tree', async () => { +test('snapshot automatically retries stale Android trees after recent navigation', async () => { const sessionStore = makeSessionStore(); - const sessionName = 'android-stale-after-press'; + const sessionName = 'android-stale-retries-to-fresh'; const session = makeSession(sessionName, androidDevice); + const baselineNodes = Array.from({ length: 24 }, (_, index) => ({ + ref: `e${index + 1}`, + index, + depth: 0, + type: 'android.widget.TextView', + label: `Inbox row ${index + 1}`, + })); session.snapshot = { - nodes: Array.from({ length: 24 }, (_, index) => ({ - ref: `e${index + 1}`, - index, - depth: 0, - type: 'android.widget.TextView', - label: `Inbox row ${index + 1}`, - })), + nodes: baselineNodes, createdAt: Date.now(), backend: 'android', }; - session.actions.push({ - ts: Date.now(), - command: 'press', - positionals: ['@e4'], - flags: {}, + session.androidSnapshotFreshness = { + action: 'press', + markedAt: Date.now(), + baselineCount: baselineNodes.length, + baselineSignatures: buildSnapshotSignatures(baselineNodes), + }; + sessionStore.set(sessionName, session); + + mockDispatch + .mockResolvedValueOnce({ + nodes: Array.from({ length: 24 }, (_, index) => ({ + index, + depth: 0, + type: 'android.widget.TextView', + label: `Inbox row ${index + 1}`, + })), + truncated: false, + backend: 'android', + analysis: { rawNodeCount: 24, maxDepth: 2 }, + }) + .mockResolvedValueOnce({ + nodes: [ + { index: 0, depth: 0, type: 'android.widget.TextView', label: 'Create expense' }, + { index: 1, depth: 0, type: 'android.widget.Button', label: 'Submit', hittable: true }, + ], + truncated: false, + backend: 'android', + analysis: { rawNodeCount: 2, maxDepth: 1 }, + }); + + const response = await handleSnapshotCommands({ + req: { + token: 't', + session: sessionName, + command: 'snapshot', + positionals: [], + flags: { snapshotInteractiveOnly: true }, + }, + sessionName, + logPath: '/tmp/daemon.log', + sessionStore, }); + + expect(response?.ok).toBe(true); + if (response?.ok) { + expect(response.data?.warnings).toBeUndefined(); + expect(response.data?.nodes).toEqual( + expect.arrayContaining([expect.objectContaining({ label: 'Create expense' })]), + ); + } + expect(mockDispatch).toHaveBeenCalledTimes(2); + expect(sessionStore.get(sessionName)?.androidSnapshotFreshness).toBeUndefined(); +}); + +test('snapshot warns when Android freshness retries still return the previous route', async () => { + const sessionStore = makeSessionStore(); + const sessionName = 'android-stale-after-press'; + const session = makeSession(sessionName, androidDevice); + const baselineNodes = Array.from({ length: 24 }, (_, index) => ({ + ref: `e${index + 1}`, + index, + depth: 0, + type: 'android.widget.TextView', + label: `Inbox row ${index + 1}`, + })); + session.snapshot = { + nodes: baselineNodes, + createdAt: Date.now(), + backend: 'android', + }; + session.androidSnapshotFreshness = { + action: 'press', + markedAt: Date.now(), + baselineCount: baselineNodes.length, + baselineSignatures: buildSnapshotSignatures(baselineNodes), + }; sessionStore.set(sessionName, session); mockDispatch.mockResolvedValue({ @@ -248,32 +320,36 @@ test('snapshot warns when a recent Android press is followed by a nearly identic expect(response?.ok).toBe(true); if (response?.ok) { expect(response.data?.warnings).toEqual([ - expect.stringContaining('Recent press was followed by a nearly identical snapshot'), + expect.stringContaining( + 'Recent press was followed by a nearly identical snapshot after 2 automatic retries', + ), ]); } + expect(mockDispatch).toHaveBeenCalledTimes(3); }); test('diff snapshot carries stale-tree warnings for recent Android presses', async () => { const sessionStore = makeSessionStore(); const sessionName = 'android-diff-stale-after-press'; const session = makeSession(sessionName, androidDevice); + const baselineNodes = Array.from({ length: 24 }, (_, index) => ({ + ref: `e${index + 1}`, + index, + depth: 0, + type: 'android.widget.TextView', + label: `Inbox row ${index + 1}`, + })); session.snapshot = { - nodes: Array.from({ length: 24 }, (_, index) => ({ - ref: `e${index + 1}`, - index, - depth: 0, - type: 'android.widget.TextView', - label: `Inbox row ${index + 1}`, - })), + nodes: baselineNodes, createdAt: Date.now(), backend: 'android', }; - session.actions.push({ - ts: Date.now(), - command: 'press', - positionals: ['@e4'], - flags: {}, - }); + session.androidSnapshotFreshness = { + action: 'press', + markedAt: Date.now(), + baselineCount: baselineNodes.length, + baselineSignatures: buildSnapshotSignatures(baselineNodes), + }; sessionStore.set(sessionName, session); mockDispatch.mockResolvedValue({ @@ -304,9 +380,81 @@ test('diff snapshot carries stale-tree warnings for recent Android presses', asy expect(response?.ok).toBe(true); if (response?.ok) { expect(response.data?.warnings).toEqual([ - expect.stringContaining('Recent press was followed by a nearly identical snapshot'), + expect.stringContaining( + 'Recent press was followed by a nearly identical snapshot after 2 automatic retries', + ), ]); } + expect(mockDispatch).toHaveBeenCalledTimes(3); +}); + +test('wait text on Android uses freshness-aware capture instead of one-shot snapshot polling', async () => { + const sessionStore = makeSessionStore(); + const sessionName = 'android-wait-freshness'; + const session = makeSession(sessionName, androidDevice); + const baselineNodes = Array.from({ length: 18 }, (_, index) => ({ + ref: `e${index + 1}`, + index, + depth: 0, + type: 'android.widget.TextView', + label: `Inbox row ${index + 1}`, + })); + session.snapshot = { + nodes: baselineNodes, + createdAt: Date.now(), + backend: 'android', + }; + session.androidSnapshotFreshness = { + action: 'press', + markedAt: Date.now(), + baselineCount: baselineNodes.length, + baselineSignatures: buildSnapshotSignatures(baselineNodes), + }; + sessionStore.set(sessionName, session); + + mockDispatch + .mockResolvedValueOnce({ + nodes: Array.from({ length: 18 }, (_, index) => ({ + index, + depth: 0, + type: 'android.widget.TextView', + label: `Inbox row ${index + 1}`, + })), + truncated: false, + backend: 'android', + analysis: { rawNodeCount: 18, maxDepth: 1 }, + }) + .mockResolvedValueOnce({ + nodes: [ + { index: 0, depth: 0, type: 'android.widget.TextView', label: 'Create expense' }, + { index: 1, depth: 0, type: 'android.widget.TextView', label: 'Done' }, + ], + truncated: false, + backend: 'android', + analysis: { rawNodeCount: 2, maxDepth: 1 }, + }); + + const response = await handleSnapshotCommands({ + req: { + token: 't', + session: sessionName, + command: 'wait', + positionals: ['Create expense', '50'], + flags: {}, + }, + sessionName, + logPath: '/tmp/daemon.log', + sessionStore, + }); + + expect(response?.ok).toBe(true); + if (response?.ok) { + expect(response.data?.text).toBe('Create expense'); + } + expect(mockDispatch).toHaveBeenCalledTimes(2); + expect(sessionStore.get(sessionName)?.snapshot?.nodes).toEqual( + expect.arrayContaining([expect.objectContaining({ label: 'Create expense' })]), + ); }); test('settings rejects unsupported iOS physical devices', async () => { diff --git a/src/daemon/handlers/find.ts b/src/daemon/handlers/find.ts index d9c83f3d8..c9383ef3e 100644 --- a/src/daemon/handlers/find.ts +++ b/src/daemon/handlers/find.ts @@ -11,6 +11,10 @@ import { parseTimeout } from './parse-utils.ts'; import { readTextForNode } from './interaction-read.ts'; import { captureSnapshot } from './snapshot-capture.ts'; import { errorResponse } from './response.ts'; +import { + getActiveAndroidSnapshotFreshness, + markAndroidSnapshotFreshness, +} from '../android-snapshot-freshness.ts'; type FindContext = { req: DaemonRequest; @@ -52,6 +56,9 @@ export async function handleFindCommands(params: { if (!query) { return errorResponse('INVALID_ARGS', 'find requires a value'); } + if (req.flags?.findFirst && req.flags?.findLast) { + return errorResponse('INVALID_ARGS', 'find accepts only one of --first or --last'); + } const session = sessionStore.get(sessionName); const isReadOnly = action === 'exists' || action === 'wait' || action === 'get_text' || action === 'get_attrs'; @@ -74,7 +81,7 @@ export async function handleFindCommands(params: { backend?: SnapshotState['backend']; }> => { const now = Date.now(); - if (lastNodes && now - lastSnapshotAt < 750) { + if (lastNodes && now - lastSnapshotAt < 750 && !getActiveAndroidSnapshotFreshness(session)) { return { nodes: lastNodes }; } const { snapshot } = await captureSnapshot({ @@ -256,6 +263,7 @@ async function handleFindClick(ctx: FindContext, match: ResolvedMatch): Promise< matchData.y = matchCoords.y; } if (session) { + markAndroidSnapshotFreshness(session, 'click'); sessionStore.recordAction(session, { command, positionals: req.positionals ?? [], diff --git a/src/daemon/handlers/interaction-common.ts b/src/daemon/handlers/interaction-common.ts index ada797930..fe5f7235a 100644 --- a/src/daemon/handlers/interaction-common.ts +++ b/src/daemon/handlers/interaction-common.ts @@ -4,6 +4,10 @@ import { recordTouchVisualizationEvent } from '../recording-gestures.ts'; import type { DaemonRequest, DaemonResponse, SessionState } from '../types.ts'; import { SessionStore } from '../session-store.ts'; import { successText } from '../../utils/success-text.ts'; +import { + isNavigationSensitiveAction, + markAndroidSnapshotFreshness, +} from '../android-snapshot-freshness.ts'; export type ContextFromFlags = ( flags: CommandFlags | undefined, @@ -172,6 +176,9 @@ function finalizeTouchInteraction(params: { flags: flags ?? {}, result, }); + if (isNavigationSensitiveAction(command)) { + markAndroidSnapshotFreshness(session, command); + } recordTouchVisualizationEvent( session, command, diff --git a/src/daemon/handlers/session-open.ts b/src/daemon/handlers/session-open.ts index e99c9ce39..7c8053dbb 100644 --- a/src/daemon/handlers/session-open.ts +++ b/src/daemon/handlers/session-open.ts @@ -17,6 +17,7 @@ import { import { countConfiguredRuntimeHints, setSessionRuntimeHintsForOpen } from './session-runtime.ts'; import { STARTUP_SAMPLE_METHOD, type StartupPerfSample } from './session-startup-metrics.ts'; import { buildNextOpenSession, buildOpenResult } from './session-open-surface.ts'; +import { markAndroidSnapshotFreshness } from '../android-snapshot-freshness.ts'; import { invalidOpenArgs, prepareOpenCommandDetails, @@ -185,6 +186,7 @@ async function completeOpenCommand(params: { runtime: req.runtime !== undefined ? runtime : undefined, result: openResult, }); + markAndroidSnapshotFreshness(nextSession, 'open'); sessionStore.set(sessionName, nextSession); return { ok: true, data: openResult }; } diff --git a/src/daemon/handlers/snapshot-capture.ts b/src/daemon/handlers/snapshot-capture.ts index 64dd7c591..a16713969 100644 --- a/src/daemon/handlers/snapshot-capture.ts +++ b/src/daemon/handlers/snapshot-capture.ts @@ -9,6 +9,15 @@ import { type SnapshotState, } from '../../utils/snapshot.ts'; import type { DaemonResponse, SessionState } from '../types.ts'; +import { + ANDROID_FRESHNESS_RETRY_DELAYS_MS, + clearAndroidSnapshotFreshness, + getActiveAndroidSnapshotFreshness, + isLikelySnapshotStuckOnPreviousRoute, + isLikelyStaleSnapshotDrop, + isNavigationSensitiveAction, + type AndroidFreshnessCaptureMeta, +} from '../android-snapshot-freshness.ts'; import { contextFromFlags } from '../context.ts'; import { findNodeByLabel, pruneGroupNodes, resolveRefLabel } from '../snapshot-processing.ts'; @@ -28,10 +37,17 @@ type SnapshotData = { analysis?: AndroidSnapshotAnalysis; }; -export async function captureSnapshot( - params: CaptureSnapshotParams, -): Promise<{ snapshot: SnapshotState; analysis?: AndroidSnapshotAnalysis }> { +export async function captureSnapshot(params: CaptureSnapshotParams): Promise<{ + snapshot: SnapshotState; + analysis?: AndroidSnapshotAnalysis; + freshness?: AndroidFreshnessCaptureMeta; +}> { + const freshness = getActiveAndroidSnapshotFreshness(params.session); + if (freshness && params.device.platform === 'android') { + return await captureAndroidFreshnessAwareSnapshot(params, freshness); + } const data = await captureSnapshotData(params); + clearAndroidSnapshotFreshness(params.session); return { snapshot: buildSnapshotState(data, params.flags?.snapshotRaw), analysis: data.analysis, @@ -60,6 +76,91 @@ export async function captureSnapshotData(params: CaptureSnapshotParams): Promis })) as SnapshotData; } +async function captureAndroidFreshnessAwareSnapshot( + params: CaptureSnapshotParams, + freshness: NonNullable, +): Promise<{ + snapshot: SnapshotState; + analysis?: AndroidSnapshotAnalysis; + freshness?: AndroidFreshnessCaptureMeta; +}> { + let latest = await captureSnapshotAttempt(params); + let suspicious = isSuspiciousAndroidFreshnessCapture(latest, freshness, params.flags); + let retryCount = 0; + + for (const delayMs of ANDROID_FRESHNESS_RETRY_DELAYS_MS) { + if (!suspicious) break; + await new Promise((resolve) => setTimeout(resolve, delayMs)); + latest = await captureSnapshotAttempt(params); + retryCount += 1; + suspicious = isSuspiciousAndroidFreshnessCapture(latest, freshness, params.flags); + } + + if (!suspicious) { + clearAndroidSnapshotFreshness(params.session); + } + + return { + snapshot: latest.snapshot, + analysis: latest.data.analysis, + freshness: + retryCount > 0 || suspicious + ? { + action: freshness.action, + retryCount, + staleAfterRetries: suspicious, + } + : undefined, + }; +} + +async function captureSnapshotAttempt( + params: CaptureSnapshotParams, +): Promise<{ data: SnapshotData; snapshot: SnapshotState }> { + const data = await captureSnapshotData(params); + return { + data, + snapshot: buildSnapshotState(data, params.flags?.snapshotRaw), + }; +} + +function isSuspiciousAndroidFreshnessCapture( + attempt: { data: SnapshotData; snapshot: SnapshotState }, + freshness: NonNullable, + flags: CommandFlags | undefined, +): boolean { + const interactiveOnly = flags?.snapshotInteractiveOnly === true; + const analysis = attempt.data.analysis; + + if ( + interactiveOnly && + attempt.snapshot.nodes.length === 0 && + analysis && + analysis.rawNodeCount >= 12 + ) { + return true; + } + + if (isLikelyStaleSnapshotDrop(freshness.baselineCount, attempt.snapshot.nodes.length)) { + return !hasMeaningfulSnapshotContent(attempt.snapshot); + } + + return ( + isNavigationSensitiveAction(freshness.action) && + isLikelySnapshotStuckOnPreviousRoute(freshness.baselineSignatures, attempt.snapshot.nodes) + ); +} + +function hasMeaningfulSnapshotContent(snapshot: SnapshotState): boolean { + return snapshot.nodes.some( + (node) => + node.hittable === true || + Boolean(node.label?.trim()) || + Boolean(node.value?.trim()) || + Boolean(node.identifier?.trim()), + ); +} + export function buildSnapshotState( data: { nodes?: RawSnapshotNode[]; diff --git a/src/daemon/handlers/snapshot-wait.ts b/src/daemon/handlers/snapshot-wait.ts index 83248607c..bacfaf48e 100644 --- a/src/daemon/handlers/snapshot-wait.ts +++ b/src/daemon/handlers/snapshot-wait.ts @@ -1,8 +1,7 @@ import { isCommandSupportedOnDevice } from '../../core/capabilities.ts'; import { runIosRunnerCommand } from '../../platforms/ios/runner-client.ts'; -import { snapshotAndroid } from '../../platforms/android/index.ts'; import { isApplePlatform } from '../../utils/device.ts'; -import { attachRefs, findNodeByRef, normalizeRef } from '../../utils/snapshot.ts'; +import { findNodeByRef, normalizeRef } from '../../utils/snapshot.ts'; import { findNodeByLabel, resolveRefLabel } from '../snapshot-processing.ts'; import { SessionStore } from '../session-store.ts'; import { @@ -249,8 +248,15 @@ async function waitForText(params: { return { ok: true, data: { text, waitedMs: Date.now() - start } }; } } else if (device.platform === 'android') { - const androidResult = await snapshotAndroid(device, { scope: text }); - if (findNodeByLabel(attachRefs(androidResult.nodes ?? []), text)) { + const snapshot = await captureWaitSnapshot({ + device, + logPath, + req, + session, + sessionName: session?.name ?? req.session ?? 'default', + sessionStore, + }); + if (findNodeByLabel(snapshot.nodes, text)) { recordIfSession(sessionStore, session, req, { text, waitedMs: Date.now() - start }); return { ok: true, data: { text, waitedMs: Date.now() - start } }; } diff --git a/src/daemon/handlers/snapshot.ts b/src/daemon/handlers/snapshot.ts index 18cf67f5b..a33043c73 100644 --- a/src/daemon/handlers/snapshot.ts +++ b/src/daemon/handlers/snapshot.ts @@ -13,6 +13,11 @@ import { handleWaitCommand, parseWaitArgs, waitNeedsRunnerCleanup } from './snap import { handleAlertCommand } from './snapshot-alert.ts'; import { handleSettingsCommand, parseSettingsArgs } from './snapshot-settings.ts'; import { uniqueStrings } from '../action-utils.ts'; +import { + buildSnapshotSignatures, + isLikelySnapshotStuckOnPreviousRoute, + isLikelyStaleSnapshotDrop, +} from '../android-snapshot-freshness.ts'; const SNAPSHOT_COMMANDS = new Set(['snapshot', 'diff', 'wait', 'alert', 'settings']); @@ -185,6 +190,7 @@ function buildSnapshotWarnings(params: { const previousSnapshot = session?.snapshot; if ( + !capture.freshness && previousSnapshot && Date.now() - previousSnapshot.createdAt <= 2_000 && isLikelyStaleSnapshotDrop(previousSnapshot.nodes.length, capture.snapshot.nodes.length) @@ -194,78 +200,23 @@ function buildSnapshotWarnings(params: { ); } - const recentAction = session?.actions.at(-1); if ( + capture.freshness?.staleAfterRetries && capture.snapshot.backend === 'android' && - interactiveOnly && previousSnapshot && - recentAction && - Date.now() - recentAction.ts <= 2_000 && - isLikelySnapshotStuckOnPreviousRoute(previousSnapshot.nodes, capture.snapshot.nodes) && - isNavigationSensitiveAction(recentAction.command) + isLikelySnapshotStuckOnPreviousRoute( + buildSnapshotSignatures(previousSnapshot.nodes), + capture.snapshot.nodes, + ) ) { warnings.push( - `Recent ${recentAction.command} was followed by a nearly identical snapshot. If you expected navigation or submit, the tree may still be stale. Use screenshot as visual truth, wait briefly, then re-snapshot once.`, + `Recent ${capture.freshness.action} was followed by a nearly identical snapshot after ${capture.freshness.retryCount} automatic retr${capture.freshness.retryCount === 1 ? 'y' : 'ies'}. If you expected navigation or submit, the tree may still be stale. Use screenshot as visual truth, wait briefly, then re-snapshot once.`, ); } return uniqueStrings(warnings); } -function isLikelyStaleSnapshotDrop(previousCount: number, currentCount: number): boolean { - if (previousCount < 12) { - return false; - } - return currentCount <= Math.floor(previousCount * 0.2); -} - -function isNavigationSensitiveAction(command: string): boolean { - return command === 'press' || command === 'click' || command === 'back' || command === 'open'; -} - -function isLikelySnapshotStuckOnPreviousRoute( - previousNodes: NonNullable['nodes'], - currentNodes: NonNullable['nodes'], -): boolean { - const total = Math.max(previousNodes.length, currentNodes.length); - if (total < 12) { - return false; - } - const comparableLength = Math.min(previousNodes.length, currentNodes.length); - let unchanged = 0; - for (let index = 0; index < comparableLength; index += 1) { - if ( - snapshotNodeSignature(previousNodes[index]) === snapshotNodeSignature(currentNodes[index]) - ) { - unchanged += 1; - } - } - const additions = Math.max(0, currentNodes.length - previousNodes.length); - const removals = Math.max(0, previousNodes.length - currentNodes.length); - const toleratedDelta = Math.max(3, Math.floor(total * 0.15)); - return ( - unchanged >= Math.floor(total * 0.9) && - additions <= toleratedDelta && - removals <= toleratedDelta - ); -} - -function snapshotNodeSignature( - node: NonNullable['nodes'][number], -): string { - return [ - node.depth ?? 0, - node.type ?? '', - node.role ?? '', - node.label ?? '', - node.value ?? '', - node.identifier ?? '', - node.enabled === false ? 'disabled' : 'enabled', - node.selected === true ? 'selected' : 'unselected', - node.hittable === true ? 'hittable' : 'not-hittable', - ].join('|'); -} - async function handleSnapshotDiffRequest(params: { req: DaemonRequest; sessionName: string; diff --git a/src/daemon/request-router.ts b/src/daemon/request-router.ts index cd6797c5c..430f084bc 100644 --- a/src/daemon/request-router.ts +++ b/src/daemon/request-router.ts @@ -41,6 +41,10 @@ import { import { recoverAndroidBlockingSystemDialog } from './android-system-dialog.ts'; import { getRunnerSessionSnapshot } from '../platforms/ios/runner-client.ts'; import { annotateScreenshotWithRefs } from './screenshot-overlay.ts'; +import { + isNavigationSensitiveAction, + markAndroidSnapshotFreshness, +} from './android-snapshot-freshness.ts'; const selectorValidationExemptCommands = new Set([ 'session_list', @@ -484,6 +488,9 @@ export function createRequestHandler( flags: recordedFlags, result: data ?? {}, }); + if (isNavigationSensitiveAction(command)) { + markAndroidSnapshotFreshness(session, command); + } return finalize({ ok: true, data: data ?? {} }); } catch (error) { emitDiagnostic({ diff --git a/src/daemon/types.ts b/src/daemon/types.ts index c8bb04765..63b980599 100644 --- a/src/daemon/types.ts +++ b/src/daemon/types.ts @@ -4,6 +4,7 @@ import type { SessionSurface } from '../core/session-surface.ts'; import type { DeviceInfo, Platform, PlatformSelector } from '../utils/device.ts'; import type { ExecResult } from '../utils/exec.ts'; import type { SnapshotState } from '../utils/snapshot.ts'; +import type { AndroidSnapshotFreshness } from './android-snapshot-freshness.ts'; export type DaemonInstallSource = MaterializeInstallSource; export type DaemonLockPolicy = 'reject' | 'strip'; @@ -189,6 +190,7 @@ export type SessionState = { appBundleId?: string; appName?: string; snapshot?: SnapshotState; + androidSnapshotFreshness?: AndroidSnapshotFreshness; trace?: { outPath: string; startedAt: number; diff --git a/src/platforms/android/snapshot.ts b/src/platforms/android/snapshot.ts index 79c7f8b7a..18bf81b5a 100644 --- a/src/platforms/android/snapshot.ts +++ b/src/platforms/android/snapshot.ts @@ -14,32 +14,10 @@ export async function snapshotAndroid( truncated?: boolean; analysis: AndroidSnapshotAnalysis; }> { - const xml = options.waitStableMs - ? await dumpUiHierarchyStable(device, options.waitStableMs) - : await dumpUiHierarchy(device); + const xml = await dumpUiHierarchy(device); return parseUiHierarchy(xml, 800, options); } -/** - * Poll until the AX tree stabilizes: two consecutive dumps produce identical XML, - * or the timeout is reached. Returns the last captured XML. - */ -async function dumpUiHierarchyStable(device: DeviceInfo, timeoutMs: number): Promise { - const POLL_INTERVAL = 200; - const start = Date.now(); - let previousXml = await dumpUiHierarchy(device); - while (Date.now() - start < timeoutMs) { - await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL)); - const currentXml = await dumpUiHierarchy(device); - if (currentXml === previousXml) { - return currentXml; - } - previousXml = currentXml; - } - // Timeout reached — return the last dump even though it may still be changing. - return previousXml; -} - export async function dumpUiHierarchy(device: DeviceInfo): Promise { return withRetry(() => dumpUiHierarchyOnce(device), { shouldRetry: isRetryableAdbError, diff --git a/src/utils/command-schema.ts b/src/utils/command-schema.ts index d15c9b68d..376f8c73e 100644 --- a/src/utils/command-schema.ts +++ b/src/utils/command-schema.ts @@ -53,7 +53,6 @@ export type CliFlags = { snapshotDepth?: number; snapshotScope?: string; snapshotRaw?: boolean; - snapshotWaitStableMs?: number; networkInclude?: 'summary' | 'headers' | 'body' | 'all'; overlayRefs?: boolean; screenshotFullscreen?: boolean; @@ -140,7 +139,6 @@ const SNAPSHOT_FLAGS = [ 'snapshotDepth', 'snapshotScope', 'snapshotRaw', - 'snapshotWaitStableMs', ] as const satisfies readonly FlagKey[]; const SELECTOR_SNAPSHOT_FLAGS = [ @@ -871,15 +869,6 @@ const FLAG_DEFINITIONS: readonly FlagDefinition[] = [ usageLabel: '--raw', usageDescription: 'Snapshot: raw node output', }, - { - key: 'snapshotWaitStableMs', - names: ['--wait-stable'], - type: 'int', - min: 0, - max: 10000, - usageLabel: '--wait-stable ', - usageDescription: 'Snapshot: wait for AX tree to stabilize (Android)', - }, { key: 'findFirst', names: ['--first'], @@ -1007,7 +996,7 @@ const COMMAND_SCHEMAS: Record = { allowedFlags: [], }, snapshot: { - usageOverride: 'snapshot [--diff] [-i] [-c] [-d ] [-s ] [--raw] [--wait-stable ]', + usageOverride: 'snapshot [--diff] [-i] [-c] [-d ] [-s ] [--raw]', helpDescription: 'Capture accessibility tree or diff against the previous session baseline', positionalArgs: [], allowedFlags: ['snapshotDiff', ...SNAPSHOT_FLAGS], diff --git a/src/utils/snapshot.ts b/src/utils/snapshot.ts index 2f78c8208..40ed70732 100644 --- a/src/utils/snapshot.ts +++ b/src/utils/snapshot.ts @@ -16,7 +16,6 @@ export type SnapshotOptions = { depth?: number; scope?: string; raw?: boolean; - waitStableMs?: number; }; export type RawSnapshotNode = { From 89a907cf795b8968d30608bf32b7f78db8caacd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Wed, 1 Apr 2026 11:37:15 +0200 Subject: [PATCH 6/8] fix: tighten Android freshness baselines --- src/daemon/android-snapshot-freshness.ts | 23 ++++++-- src/daemon/handlers/__tests__/find.test.ts | 2 + .../handlers/__tests__/interaction.test.ts | 2 +- src/daemon/handlers/__tests__/session.test.ts | 58 +++++++++++++++++++ .../__tests__/snapshot-handler.test.ts | 8 +++ src/daemon/handlers/find.ts | 6 +- src/daemon/handlers/session-open.ts | 4 +- src/daemon/handlers/snapshot-capture.ts | 47 ++++++++++----- src/daemon/handlers/snapshot.ts | 28 ++++----- src/daemon/request-router.ts | 2 +- src/utils/snapshot.ts | 1 + 11 files changed, 136 insertions(+), 45 deletions(-) diff --git a/src/daemon/android-snapshot-freshness.ts b/src/daemon/android-snapshot-freshness.ts index ef7f732f3..8ce1a19a6 100644 --- a/src/daemon/android-snapshot-freshness.ts +++ b/src/daemon/android-snapshot-freshness.ts @@ -9,22 +9,32 @@ export type AndroidSnapshotFreshness = { action: string; markedAt: number; baselineCount: number; - baselineSignatures: string[]; + baselineSignatures?: string[]; + routeComparable: boolean; }; export type AndroidFreshnessCaptureMeta = { action: string; retryCount: number; staleAfterRetries: boolean; + reason?: 'empty-interactive' | 'sharp-drop' | 'stuck-route'; }; -export function markAndroidSnapshotFreshness(session: SessionState, action: string): void { +export function markAndroidSnapshotFreshness( + session: SessionState, + action: string, + baseline = session.snapshot, +): void { if (session.device.platform !== 'android') return; + const routeComparable = baseline?.comparisonSafe === true; session.androidSnapshotFreshness = { action, markedAt: Date.now(), - baselineCount: session.snapshot?.nodes.length ?? 0, - baselineSignatures: buildSnapshotSignatures(session.snapshot?.nodes ?? []), + baselineCount: baseline?.nodes.length ?? 0, + baselineSignatures: routeComparable + ? buildSnapshotSignatures(baseline?.nodes ?? []) + : undefined, + routeComparable, }; } @@ -74,9 +84,12 @@ export function isLikelyStaleSnapshotDrop(previousCount: number, currentCount: n } export function isLikelySnapshotStuckOnPreviousRoute( - previousSignatures: string[], + previousSignatures: string[] | undefined, currentNodes: SnapshotState['nodes'], ): boolean { + if (!previousSignatures || previousSignatures.length === 0) { + return false; + } const total = Math.max(previousSignatures.length, currentNodes.length); if (total < 12) { return false; diff --git a/src/daemon/handlers/__tests__/find.test.ts b/src/daemon/handlers/__tests__/find.test.ts index c370479b2..9891d7dd1 100644 --- a/src/daemon/handlers/__tests__/find.test.ts +++ b/src/daemon/handlers/__tests__/find.test.ts @@ -319,12 +319,14 @@ test('handleFindCommands wait bypasses snapshot cache while Android freshness re nodes: baselineNodes, createdAt: Date.now(), backend: 'android', + comparisonSafe: true, }; session.androidSnapshotFreshness = { action: 'press', markedAt: Date.now(), baselineCount: baselineNodes.length, baselineSignatures: buildSnapshotSignatures(baselineNodes), + routeComparable: true, }; mockDispatch diff --git a/src/daemon/handlers/__tests__/interaction.test.ts b/src/daemon/handlers/__tests__/interaction.test.ts index 1354da708..b3483a1d5 100644 --- a/src/daemon/handlers/__tests__/interaction.test.ts +++ b/src/daemon/handlers/__tests__/interaction.test.ts @@ -69,7 +69,7 @@ async function emulateCaptureSnapshotForSession( effectiveFlags.out, contextFromFlags(effectiveFlags, session.appBundleId, session.trace?.outPath), )) as { nodes?: never[]; truncated?: boolean; backend?: 'xctest' | 'android' | 'macos-helper' }; - const snapshot = buildSnapshotState(snapshotData ?? {}, effectiveFlags.snapshotRaw); + const snapshot = buildSnapshotState(snapshotData ?? {}, effectiveFlags); session.snapshot = snapshot; sessionStore.set(session.name, session); return snapshot; diff --git a/src/daemon/handlers/__tests__/session.test.ts b/src/daemon/handlers/__tests__/session.test.ts index 9f9b800e9..adb77ba88 100644 --- a/src/daemon/handlers/__tests__/session.test.ts +++ b/src/daemon/handlers/__tests__/session.test.ts @@ -78,6 +78,7 @@ import fs from 'node:fs'; import os from 'node:os'; import path from 'node:path'; import { handleSessionCommands } from '../session.ts'; +import { buildSnapshotSignatures } from '../../android-snapshot-freshness.ts'; import { retainMaterializedPaths, cleanupRetainedMaterializedPathsForSession, @@ -2624,6 +2625,63 @@ test('open intent target on existing Android session clears stale package contex expect(dispatchedContext?.appBundleId).toBe(undefined); }); +test('open on existing Android session preserves a comparable freshness baseline', async () => { + const sessionStore = makeSessionStore(); + const sessionName = 'android-open-freshness'; + const baselineNodes = Array.from({ length: 14 }, (_, index) => ({ + ref: `e${index + 1}`, + index, + depth: 0, + type: 'android.widget.TextView', + label: `Inbox row ${index + 1}`, + })); + sessionStore.set(sessionName, { + ...makeSession(sessionName, { + platform: 'android', + id: 'emulator-5554', + name: 'Pixel Emulator', + kind: 'emulator', + booted: true, + }), + appBundleId: 'com.example.old', + appName: 'Old App', + snapshot: { + nodes: baselineNodes, + createdAt: Date.now(), + backend: 'android', + comparisonSafe: true, + }, + }); + + mockDispatch.mockResolvedValue({}); + mockResolveAndroidPackage.mockResolvedValue('com.android.settings'); + + const response = await handleSessionCommands({ + req: { + token: 't', + session: sessionName, + command: 'open', + positionals: ['settings'], + flags: {}, + }, + sessionName, + logPath: path.join(os.tmpdir(), 'daemon.log'), + sessionStore, + invoke: noopInvoke, + }); + + expect(response?.ok).toBe(true); + const updated = sessionStore.get(sessionName); + expect(updated?.snapshot).toBeUndefined(); + expect(updated?.androidSnapshotFreshness).toEqual({ + action: 'open', + markedAt: expect.any(Number), + baselineCount: baselineNodes.length, + baselineSignatures: buildSnapshotSignatures(baselineNodes), + routeComparable: true, + }); +}); + test('open --relaunch closes and reopens active session app', async () => { const sessionStore = makeSessionStore(); const sessionName = 'android-session'; diff --git a/src/daemon/handlers/__tests__/snapshot-handler.test.ts b/src/daemon/handlers/__tests__/snapshot-handler.test.ts index 24090a286..72236383a 100644 --- a/src/daemon/handlers/__tests__/snapshot-handler.test.ts +++ b/src/daemon/handlers/__tests__/snapshot-handler.test.ts @@ -213,12 +213,14 @@ test('snapshot automatically retries stale Android trees after recent navigation nodes: baselineNodes, createdAt: Date.now(), backend: 'android', + comparisonSafe: true, }; session.androidSnapshotFreshness = { action: 'press', markedAt: Date.now(), baselineCount: baselineNodes.length, baselineSignatures: buildSnapshotSignatures(baselineNodes), + routeComparable: true, }; sessionStore.set(sessionName, session); @@ -283,12 +285,14 @@ test('snapshot warns when Android freshness retries still return the previous ro nodes: baselineNodes, createdAt: Date.now(), backend: 'android', + comparisonSafe: true, }; session.androidSnapshotFreshness = { action: 'press', markedAt: Date.now(), baselineCount: baselineNodes.length, baselineSignatures: buildSnapshotSignatures(baselineNodes), + routeComparable: true, }; sessionStore.set(sessionName, session); @@ -343,12 +347,14 @@ test('diff snapshot carries stale-tree warnings for recent Android presses', asy nodes: baselineNodes, createdAt: Date.now(), backend: 'android', + comparisonSafe: true, }; session.androidSnapshotFreshness = { action: 'press', markedAt: Date.now(), baselineCount: baselineNodes.length, baselineSignatures: buildSnapshotSignatures(baselineNodes), + routeComparable: true, }; sessionStore.set(sessionName, session); @@ -403,12 +409,14 @@ test('wait text on Android uses freshness-aware capture instead of one-shot snap nodes: baselineNodes, createdAt: Date.now(), backend: 'android', + comparisonSafe: true, }; session.androidSnapshotFreshness = { action: 'press', markedAt: Date.now(), baselineCount: baselineNodes.length, baselineSignatures: buildSnapshotSignatures(baselineNodes), + routeComparable: true, }; sessionStore.set(sessionName, session); diff --git a/src/daemon/handlers/find.ts b/src/daemon/handlers/find.ts index c9383ef3e..a33772833 100644 --- a/src/daemon/handlers/find.ts +++ b/src/daemon/handlers/find.ts @@ -11,10 +11,7 @@ import { parseTimeout } from './parse-utils.ts'; import { readTextForNode } from './interaction-read.ts'; import { captureSnapshot } from './snapshot-capture.ts'; import { errorResponse } from './response.ts'; -import { - getActiveAndroidSnapshotFreshness, - markAndroidSnapshotFreshness, -} from '../android-snapshot-freshness.ts'; +import { getActiveAndroidSnapshotFreshness } from '../android-snapshot-freshness.ts'; type FindContext = { req: DaemonRequest; @@ -263,7 +260,6 @@ async function handleFindClick(ctx: FindContext, match: ResolvedMatch): Promise< matchData.y = matchCoords.y; } if (session) { - markAndroidSnapshotFreshness(session, 'click'); sessionStore.recordAction(session, { command, positionals: req.positionals ?? [], diff --git a/src/daemon/handlers/session-open.ts b/src/daemon/handlers/session-open.ts index 7c8053dbb..c2f1bccb5 100644 --- a/src/daemon/handlers/session-open.ts +++ b/src/daemon/handlers/session-open.ts @@ -157,6 +157,9 @@ async function completeOpenCommand(params: { }; } + if (existingSession) { + markAndroidSnapshotFreshness(existingSession, 'open', existingSession.snapshot); + } const nextSession = buildNextOpenSession({ existingSession, sessionName, @@ -186,7 +189,6 @@ async function completeOpenCommand(params: { runtime: req.runtime !== undefined ? runtime : undefined, result: openResult, }); - markAndroidSnapshotFreshness(nextSession, 'open'); sessionStore.set(sessionName, nextSession); return { ok: true, data: openResult }; } diff --git a/src/daemon/handlers/snapshot-capture.ts b/src/daemon/handlers/snapshot-capture.ts index a16713969..519d5ebd6 100644 --- a/src/daemon/handlers/snapshot-capture.ts +++ b/src/daemon/handlers/snapshot-capture.ts @@ -37,6 +37,8 @@ type SnapshotData = { analysis?: AndroidSnapshotAnalysis; }; +type AndroidFreshnessReason = 'empty-interactive' | 'sharp-drop' | 'stuck-route'; + export async function captureSnapshot(params: CaptureSnapshotParams): Promise<{ snapshot: SnapshotState; analysis?: AndroidSnapshotAnalysis; @@ -49,7 +51,7 @@ export async function captureSnapshot(params: CaptureSnapshotParams): Promise<{ const data = await captureSnapshotData(params); clearAndroidSnapshotFreshness(params.session); return { - snapshot: buildSnapshotState(data, params.flags?.snapshotRaw), + snapshot: buildSnapshotState(data, params.flags), analysis: data.analysis, }; } @@ -85,18 +87,18 @@ async function captureAndroidFreshnessAwareSnapshot( freshness?: AndroidFreshnessCaptureMeta; }> { let latest = await captureSnapshotAttempt(params); - let suspicious = isSuspiciousAndroidFreshnessCapture(latest, freshness, params.flags); + let suspiciousReason = getAndroidFreshnessReason(latest, freshness, params.flags); let retryCount = 0; for (const delayMs of ANDROID_FRESHNESS_RETRY_DELAYS_MS) { - if (!suspicious) break; + if (!suspiciousReason) break; await new Promise((resolve) => setTimeout(resolve, delayMs)); latest = await captureSnapshotAttempt(params); retryCount += 1; - suspicious = isSuspiciousAndroidFreshnessCapture(latest, freshness, params.flags); + suspiciousReason = getAndroidFreshnessReason(latest, freshness, params.flags); } - if (!suspicious) { + if (!suspiciousReason) { clearAndroidSnapshotFreshness(params.session); } @@ -104,11 +106,12 @@ async function captureAndroidFreshnessAwareSnapshot( snapshot: latest.snapshot, analysis: latest.data.analysis, freshness: - retryCount > 0 || suspicious + retryCount > 0 || Boolean(suspiciousReason) ? { action: freshness.action, retryCount, - staleAfterRetries: suspicious, + staleAfterRetries: Boolean(suspiciousReason), + reason: suspiciousReason ?? undefined, } : undefined, }; @@ -120,15 +123,15 @@ async function captureSnapshotAttempt( const data = await captureSnapshotData(params); return { data, - snapshot: buildSnapshotState(data, params.flags?.snapshotRaw), + snapshot: buildSnapshotState(data, params.flags), }; } -function isSuspiciousAndroidFreshnessCapture( +function getAndroidFreshnessReason( attempt: { data: SnapshotData; snapshot: SnapshotState }, freshness: NonNullable, flags: CommandFlags | undefined, -): boolean { +): AndroidFreshnessReason | null { const interactiveOnly = flags?.snapshotInteractiveOnly === true; const analysis = attempt.data.analysis; @@ -138,17 +141,18 @@ function isSuspiciousAndroidFreshnessCapture( analysis && analysis.rawNodeCount >= 12 ) { - return true; + return 'empty-interactive'; } if (isLikelyStaleSnapshotDrop(freshness.baselineCount, attempt.snapshot.nodes.length)) { - return !hasMeaningfulSnapshotContent(attempt.snapshot); + return !hasMeaningfulSnapshotContent(attempt.snapshot) ? 'sharp-drop' : null; } - return ( + return freshness.routeComparable && isNavigationSensitiveAction(freshness.action) && isLikelySnapshotStuckOnPreviousRoute(freshness.baselineSignatures, attempt.snapshot.nodes) - ); + ? 'stuck-route' + : null; } function hasMeaningfulSnapshotContent(snapshot: SnapshotState): boolean { @@ -167,15 +171,28 @@ export function buildSnapshotState( truncated?: boolean; backend?: 'xctest' | 'android' | 'macos-helper'; }, - snapshotRaw: boolean | undefined, + flags: + | (Pick< + CommandFlags, + 'snapshotCompact' | 'snapshotDepth' | 'snapshotInteractiveOnly' | 'snapshotRaw' + > & + Partial>) + | undefined, ): SnapshotState { const rawNodes = data?.nodes ?? []; + const snapshotRaw = flags?.snapshotRaw; const nodes = attachRefs(snapshotRaw ? rawNodes : pruneGroupNodes(rawNodes)); return { nodes, truncated: data?.truncated, createdAt: Date.now(), backend: data?.backend, + comparisonSafe: + data?.backend === 'android' && + flags?.snapshotInteractiveOnly !== true && + flags?.snapshotCompact !== true && + typeof flags?.snapshotDepth !== 'number' && + !flags?.snapshotScope, }; } diff --git a/src/daemon/handlers/snapshot.ts b/src/daemon/handlers/snapshot.ts index a33043c73..b33ad47a7 100644 --- a/src/daemon/handlers/snapshot.ts +++ b/src/daemon/handlers/snapshot.ts @@ -13,11 +13,7 @@ import { handleWaitCommand, parseWaitArgs, waitNeedsRunnerCleanup } from './snap import { handleAlertCommand } from './snapshot-alert.ts'; import { handleSettingsCommand, parseSettingsArgs } from './snapshot-settings.ts'; import { uniqueStrings } from '../action-utils.ts'; -import { - buildSnapshotSignatures, - isLikelySnapshotStuckOnPreviousRoute, - isLikelyStaleSnapshotDrop, -} from '../android-snapshot-freshness.ts'; +import { isLikelyStaleSnapshotDrop } from '../android-snapshot-freshness.ts'; const SNAPSHOT_COMMANDS = new Set(['snapshot', 'diff', 'wait', 'alert', 'settings']); @@ -200,18 +196,16 @@ function buildSnapshotWarnings(params: { ); } - if ( - capture.freshness?.staleAfterRetries && - capture.snapshot.backend === 'android' && - previousSnapshot && - isLikelySnapshotStuckOnPreviousRoute( - buildSnapshotSignatures(previousSnapshot.nodes), - capture.snapshot.nodes, - ) - ) { - warnings.push( - `Recent ${capture.freshness.action} was followed by a nearly identical snapshot after ${capture.freshness.retryCount} automatic retr${capture.freshness.retryCount === 1 ? 'y' : 'ies'}. If you expected navigation or submit, the tree may still be stale. Use screenshot as visual truth, wait briefly, then re-snapshot once.`, - ); + if (capture.freshness?.staleAfterRetries && capture.snapshot.backend === 'android') { + if (capture.freshness.reason === 'stuck-route') { + warnings.push( + `Recent ${capture.freshness.action} was followed by a nearly identical snapshot after ${capture.freshness.retryCount} automatic retr${capture.freshness.retryCount === 1 ? 'y' : 'ies'}. If you expected navigation or submit, the tree may still be stale. Use screenshot as visual truth, wait briefly, then re-snapshot once.`, + ); + } else if (capture.freshness.reason === 'sharp-drop') { + warnings.push( + 'Recent snapshots dropped sharply in node count, which suggests stale or mid-transition UI. Use screenshot as visual truth, wait briefly, then re-snapshot once.', + ); + } } return uniqueStrings(warnings); diff --git a/src/daemon/request-router.ts b/src/daemon/request-router.ts index 430f084bc..73de4d183 100644 --- a/src/daemon/request-router.ts +++ b/src/daemon/request-router.ts @@ -458,7 +458,7 @@ export function createRequestHandler( logPath, snapshotScope: undefined, }); - const overlaySnapshot = buildSnapshotState(overlaySnapshotData, false); + const overlaySnapshot = buildSnapshotState(overlaySnapshotData, undefined); session.snapshot = overlaySnapshot; const overlayRefs = await annotateScreenshotWithRefs({ screenshotPath: data.path, diff --git a/src/utils/snapshot.ts b/src/utils/snapshot.ts index 40ed70732..6f296abd9 100644 --- a/src/utils/snapshot.ts +++ b/src/utils/snapshot.ts @@ -48,6 +48,7 @@ export type SnapshotState = { createdAt: number; truncated?: boolean; backend?: 'xctest' | 'android' | 'macos-helper'; + comparisonSafe?: boolean; }; export type ScreenshotOverlayRef = { From 3e3a990abc8d09177d75400fd9d21ef68f4220ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Wed, 1 Apr 2026 11:42:26 +0200 Subject: [PATCH 7/8] docs: clarify Android freshness tradeoffs --- src/daemon/android-snapshot-freshness.ts | 6 ++++++ src/daemon/handlers/session-open.ts | 2 ++ src/daemon/handlers/snapshot-capture.ts | 3 +++ 3 files changed, 11 insertions(+) diff --git a/src/daemon/android-snapshot-freshness.ts b/src/daemon/android-snapshot-freshness.ts index 8ce1a19a6..4e7c22449 100644 --- a/src/daemon/android-snapshot-freshness.ts +++ b/src/daemon/android-snapshot-freshness.ts @@ -26,6 +26,9 @@ export function markAndroidSnapshotFreshness( baseline = session.snapshot, ): void { if (session.device.platform !== 'android') return; + // Route-stuck recovery only makes sense against a baseline captured in a broad, comparable + // shape. Interactive/scoped/depth-limited snapshots are still useful for users, but they are + // too pruned to serve as a reliable "same route vs new route" baseline. const routeComparable = baseline?.comparisonSafe === true; session.androidSnapshotFreshness = { action, @@ -57,6 +60,9 @@ export function clearAndroidSnapshotFreshness(session: SessionState | undefined) } export function isNavigationSensitiveAction(command: string): boolean { + // Keep this set intentionally narrow. `type`, `fill`, and generic `swipe` happen far more + // often than real route changes, so marking freshness for them would add retry latency to + // common steady-state loops. We only opt in commands that regularly move to a new screen. return command === 'press' || command === 'click' || command === 'back' || command === 'open'; } diff --git a/src/daemon/handlers/session-open.ts b/src/daemon/handlers/session-open.ts index c2f1bccb5..487f703ce 100644 --- a/src/daemon/handlers/session-open.ts +++ b/src/daemon/handlers/session-open.ts @@ -158,6 +158,8 @@ async function completeOpenCommand(params: { } if (existingSession) { + // Mark freshness before buildNextOpenSession clears the stored snapshot. `open` is one of + // the few nav-sensitive commands that would otherwise lose its pre-action baseline. markAndroidSnapshotFreshness(existingSession, 'open', existingSession.snapshot); } const nextSession = buildNextOpenSession({ diff --git a/src/daemon/handlers/snapshot-capture.ts b/src/daemon/handlers/snapshot-capture.ts index 519d5ebd6..be54956c1 100644 --- a/src/daemon/handlers/snapshot-capture.ts +++ b/src/daemon/handlers/snapshot-capture.ts @@ -187,6 +187,9 @@ export function buildSnapshotState( truncated: data?.truncated, createdAt: Date.now(), backend: data?.backend, + // Only broad Android snapshots become freshness baselines. If the user asked for a scoped + // or filtered view, preserve that output contract but avoid pretending it is safe for + // route-level comparisons on the next capture. comparisonSafe: data?.backend === 'android' && flags?.snapshotInteractiveOnly !== true && From 9e8c5609b1f9512d9aba9cc7a22aba71b31a257b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Wed, 1 Apr 2026 12:22:04 +0200 Subject: [PATCH 8/8] docs: clarify empty interactive freshness warning --- src/daemon/handlers/snapshot.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/daemon/handlers/snapshot.ts b/src/daemon/handlers/snapshot.ts index b33ad47a7..b0df84ae5 100644 --- a/src/daemon/handlers/snapshot.ts +++ b/src/daemon/handlers/snapshot.ts @@ -197,6 +197,9 @@ function buildSnapshotWarnings(params: { } if (capture.freshness?.staleAfterRetries && capture.snapshot.backend === 'android') { + // `empty-interactive` intentionally relies on the generic empty-interactive warning above. + // Freshness recovery may resolve a transient filtered-zero tree, but if retries still end + // empty we want one final warning, not a second freshness-specific variant of the same issue. if (capture.freshness.reason === 'stuck-route') { warnings.push( `Recent ${capture.freshness.action} was followed by a nearly identical snapshot after ${capture.freshness.retryCount} automatic retr${capture.freshness.retryCount === 1 ? 'y' : 'ies'}. If you expected navigation or submit, the tree may still be stale. Use screenshot as visual truth, wait briefly, then re-snapshot once.`,