diff --git a/src/daemon/handlers/__tests__/interaction-read.test.ts b/src/daemon/handlers/__tests__/interaction-read.test.ts new file mode 100644 index 000000000..07e8ce3f5 --- /dev/null +++ b/src/daemon/handlers/__tests__/interaction-read.test.ts @@ -0,0 +1,70 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import type { SnapshotNode } from '../../../utils/snapshot.ts'; + +vi.mock('../../../core/dispatch.ts', async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + dispatchCommand: vi.fn(async () => ({ text: 'backend-text' })), + }; +}); + +import { dispatchCommand } from '../../../core/dispatch.ts'; +import { readTextForNode } from '../interaction-read.ts'; + +const mockDispatch = vi.mocked(dispatchCommand); + +function node(overrides: Partial): SnapshotNode { + return { + ref: 'e1', + index: 0, + rect: { x: 0, y: 0, width: 100, height: 40 }, + ...overrides, + } as SnapshotNode; +} + +const baseParams = { + device: { platform: 'ios' } as never, + flags: undefined, + contextFromFlags: () => ({}) as never, +}; + +describe('readTextForNode', () => { + beforeEach(() => mockDispatch.mockClear()); + + it('returns snapshot text without a backend read for non-editable nodes', async () => { + const text = await readTextForNode({ ...baseParams, node: node({ type: 'button', label: 'General' }) }); + expect(text).toBe('General'); + expect(mockDispatch).not.toHaveBeenCalled(); + }); + + it('still re-reads via the backend for editable text inputs (live value may exceed snapshot)', async () => { + const text = await readTextForNode({ ...baseParams, node: node({ type: 'textfield', value: 'snap' }) }); + expect(mockDispatch).toHaveBeenCalledOnce(); + expect(text).toBe('backend-text'); + }); + + it('re-reads when the snapshot node has no readable text', async () => { + await readTextForNode({ ...baseParams, node: node({ type: 'other' }) }); + expect(mockDispatch).toHaveBeenCalledOnce(); + }); + + it('returns snapshot text without a backend read when the node has no resolvable center', async () => { + const text = await readTextForNode({ ...baseParams, node: node({ type: 'button', label: 'General', rect: undefined }) }); + expect(text).toBe('General'); + expect(mockDispatch).not.toHaveBeenCalled(); + }); + + it('does NOT skip the backend read on non-iOS platforms (value-first read semantics differ)', async () => { + for (const platform of ['android', 'macos', 'linux'] as const) { + mockDispatch.mockClear(); + const text = await readTextForNode({ + ...baseParams, + device: { platform } as never, + node: node({ type: 'button', label: 'General' }), + }); + expect(mockDispatch).toHaveBeenCalledOnce(); + expect(text).toBe('backend-text'); + } + }); +}); diff --git a/src/daemon/handlers/interaction-read.ts b/src/daemon/handlers/interaction-read.ts index 409ec887d..cc416757f 100644 --- a/src/daemon/handlers/interaction-read.ts +++ b/src/daemon/handlers/interaction-read.ts @@ -3,6 +3,7 @@ import { emitDiagnostic } from '../../utils/diagnostics.ts'; import { extractNodeReadText } from '../snapshot-processing.ts'; import type { SessionState } from '../types.ts'; import type { SnapshotNode } from '../../utils/snapshot.ts'; +import { prefersValueForReadableText } from '../../utils/text-surface.ts'; import type { ContextFromFlags } from './interaction-common.ts'; import { resolveRectCenter } from './interaction-targeting.ts'; @@ -22,6 +23,22 @@ export async function readTextForNode(params: { return fallbackText; } + // iOS only: the XCUITest backend `read` re-resolves the element at a point by enumerating + // the full element tree (allElementsBoundByIndex), which is ~20x slower than the snapshot we + // already captured to resolve this node. That re-read only recovers fuller text for + // editable/expandable inputs (textField/searchField/textView/…), where the live value can + // exceed the snapshot; for every other element type the snapshot node text is authoritative. + // Restricted to iOS because other backends read differently — macOS helper and Linux reads + // are value-first (AXValue/title/description), unlike the label-first snapshot readable text, + // so skipping their backend read would change the returned text. + if ( + device.platform === 'ios' && + fallbackText && + !prefersValueForReadableText(node.type ?? '') + ) { + return fallbackText; + } + try { const rawData = await dispatchCommand( device, diff --git a/src/utils/text-surface.ts b/src/utils/text-surface.ts index 6ea12d0e3..03208c784 100644 --- a/src/utils/text-surface.ts +++ b/src/utils/text-surface.ts @@ -85,7 +85,12 @@ export function normalizeType(type: string): string { return normalized; } -function prefersValueForReadableText(type: string): boolean { +/** + * Editable / expandable text-bearing element types whose live on-screen value can exceed + * the captured snapshot text. For these the readable text prefers `value`, and a backend + * (e.g. iOS XCUITest) re-read at the element can recover fuller text than the snapshot node. + */ +export function prefersValueForReadableText(type: string): boolean { const normalized = normalizeType(type); return ( normalized.includes('textfield') ||