diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandExecution.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandExecution.swift index a91175782..210c20cb6 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandExecution.swift +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandExecution.swift @@ -3,6 +3,16 @@ import XCTest extension RunnerTests { // MARK: - Main Thread Dispatch + private func currentUptimeMs() -> Double { + ProcessInfo.processInfo.systemUptime * 1000 + } + + private func measureGesture(_ action: () -> Void) -> (gestureStartUptimeMs: Double, gestureEndUptimeMs: Double) { + let gestureStartUptimeMs = currentUptimeMs() + action() + return (gestureStartUptimeMs, currentUptimeMs()) + } + func execute(command: Command) throws -> Response { if Thread.isMainThread { return try executeOnMainSafely(command: command) @@ -175,7 +185,7 @@ extension RunnerTests { } do { let resolvedOutPath = resolveRecordingOutPath(requestedOutPath) - let fpsLabel = command.fps.map(String.init) ?? "max" + let fpsLabel = command.fps.map(String.init) ?? String(RunnerTests.defaultRecordingFps) NSLog( "AGENT_DEVICE_RUNNER_RECORD_START requestedOutPath=%@ resolvedOutPath=%@ fps=%@", requestedOutPath, @@ -204,26 +214,80 @@ extension RunnerTests { activeRecording = nil return Response(ok: false, error: ErrorPayload(message: "failed to stop recording: \(error.localizedDescription)")) } + case .uptime: + return Response( + ok: true, + data: DataPayload(currentUptimeMs: currentUptimeMs()) + ) case .tap: if let text = command.text { if let element = findElement(app: activeApp, text: text) { - element.tap() - return Response(ok: true, data: DataPayload(message: "tapped")) + let timing = measureGesture { + withTemporaryScrollIdleTimeoutIfSupported(activeApp) { + element.tap() + } + } + return Response( + ok: true, + data: DataPayload( + message: "tapped", + gestureStartUptimeMs: timing.gestureStartUptimeMs, + gestureEndUptimeMs: timing.gestureEndUptimeMs + ) + ) } return Response(ok: false, error: ErrorPayload(message: "element not found")) } if let x = command.x, let y = command.y { - tapAt(app: activeApp, x: x, y: y) - return Response(ok: true, data: DataPayload(message: "tapped")) + let touchFrame = resolvedTouchVisualizationFrame(app: activeApp, x: x, y: y) + let timing = measureGesture { + withTemporaryScrollIdleTimeoutIfSupported(activeApp) { + tapAt(app: activeApp, x: x, y: y) + } + } + return Response( + ok: true, + data: DataPayload( + message: "tapped", + gestureStartUptimeMs: timing.gestureStartUptimeMs, + gestureEndUptimeMs: timing.gestureEndUptimeMs, + x: touchFrame.x, + y: touchFrame.y, + referenceWidth: touchFrame.referenceWidth, + referenceHeight: touchFrame.referenceHeight + ) + ) } return Response(ok: false, error: ErrorPayload(message: "tap requires text or x/y")) case .mouseClick: guard let x = command.x, let y = command.y else { return Response(ok: false, error: ErrorPayload(message: "mouseClick requires x and y")) } + let touchFrame = resolvedTouchVisualizationFrame(app: activeApp, x: x, y: y) do { - try mouseClickAt(app: activeApp, x: x, y: y, button: command.button ?? "primary") - return Response(ok: true, data: DataPayload(message: "clicked")) + var clickError: Error? + let timing = measureGesture { + do { + try mouseClickAt(app: activeApp, x: x, y: y, button: command.button ?? "primary") + } catch { + clickError = error + } + } + if let clickError { + throw clickError + } + return Response( + ok: true, + data: DataPayload( + message: "clicked", + gestureStartUptimeMs: timing.gestureStartUptimeMs, + gestureEndUptimeMs: timing.gestureEndUptimeMs, + x: touchFrame.x, + y: touchFrame.y, + referenceWidth: touchFrame.referenceWidth, + referenceHeight: touchFrame.referenceHeight + ) + ) } catch { return Response(ok: false, error: ErrorPayload(message: error.localizedDescription)) } @@ -234,32 +298,95 @@ extension RunnerTests { let count = max(Int(command.count ?? 1), 1) let intervalMs = max(command.intervalMs ?? 0, 0) let doubleTap = command.doubleTap ?? false + let touchFrame = resolvedTouchVisualizationFrame(app: activeApp, x: x, y: y) if doubleTap { - runSeries(count: count, pauseMs: intervalMs) { _ in - doubleTapAt(app: activeApp, x: x, y: y) + let timing = measureGesture { + withTemporaryScrollIdleTimeoutIfSupported(activeApp) { + runSeries(count: count, pauseMs: intervalMs) { _ in + doubleTapAt(app: activeApp, x: x, y: y) + } + } } - return Response(ok: true, data: DataPayload(message: "tap series")) + return Response( + ok: true, + data: DataPayload( + message: "tap series", + gestureStartUptimeMs: timing.gestureStartUptimeMs, + gestureEndUptimeMs: timing.gestureEndUptimeMs, + x: touchFrame.x, + y: touchFrame.y, + referenceWidth: touchFrame.referenceWidth, + referenceHeight: touchFrame.referenceHeight + ) + ) } - runSeries(count: count, pauseMs: intervalMs) { _ in - tapAt(app: activeApp, x: x, y: y) + let timing = measureGesture { + withTemporaryScrollIdleTimeoutIfSupported(activeApp) { + runSeries(count: count, pauseMs: intervalMs) { _ in + tapAt(app: activeApp, x: x, y: y) + } + } } - return Response(ok: true, data: DataPayload(message: "tap series")) + return Response( + ok: true, + data: DataPayload( + message: "tap series", + gestureStartUptimeMs: timing.gestureStartUptimeMs, + gestureEndUptimeMs: timing.gestureEndUptimeMs, + x: touchFrame.x, + y: touchFrame.y, + referenceWidth: touchFrame.referenceWidth, + referenceHeight: touchFrame.referenceHeight + ) + ) case .longPress: guard let x = command.x, let y = command.y else { return Response(ok: false, error: ErrorPayload(message: "longPress requires x and y")) } let duration = (command.durationMs ?? 800) / 1000.0 - longPressAt(app: activeApp, x: x, y: y, duration: duration) - return Response(ok: true, data: DataPayload(message: "long pressed")) + let touchFrame = resolvedTouchVisualizationFrame(app: activeApp, x: x, y: y) + let timing = measureGesture { + withTemporaryScrollIdleTimeoutIfSupported(activeApp) { + longPressAt(app: activeApp, x: x, y: y, duration: duration) + } + } + return Response( + ok: true, + data: DataPayload( + message: "long pressed", + gestureStartUptimeMs: timing.gestureStartUptimeMs, + gestureEndUptimeMs: timing.gestureEndUptimeMs, + x: touchFrame.x, + y: touchFrame.y, + referenceWidth: touchFrame.referenceWidth, + referenceHeight: touchFrame.referenceHeight + ) + ) case .drag: guard let x = command.x, let y = command.y, let x2 = command.x2, let y2 = command.y2 else { return Response(ok: false, error: ErrorPayload(message: "drag requires x, y, x2, and y2")) } let holdDuration = min(max((command.durationMs ?? 60) / 1000.0, 0.016), 10.0) - withTemporaryScrollIdleTimeoutIfSupported(activeApp) { - dragAt(app: activeApp, x: x, y: y, x2: x2, y2: y2, holdDuration: holdDuration) + let dragFrame = resolvedDragVisualizationFrame(app: activeApp, x: x, y: y, x2: x2, y2: y2) + let timing = measureGesture { + withTemporaryScrollIdleTimeoutIfSupported(activeApp) { + dragAt(app: activeApp, x: x, y: y, x2: x2, y2: y2, holdDuration: holdDuration) + } } - return Response(ok: true, data: DataPayload(message: "dragged")) + return Response( + ok: true, + data: DataPayload( + message: "dragged", + gestureStartUptimeMs: timing.gestureStartUptimeMs, + gestureEndUptimeMs: timing.gestureEndUptimeMs, + x: dragFrame.x, + y: dragFrame.y, + x2: dragFrame.x2, + y2: dragFrame.y2, + referenceWidth: dragFrame.referenceWidth, + referenceHeight: dragFrame.referenceHeight + ) + ) case .dragSeries: guard let x = command.x, let y = command.y, let x2 = command.x2, let y2 = command.y2 else { return Response(ok: false, error: ErrorPayload(message: "dragSeries requires x, y, x2, and y2")) @@ -271,17 +398,26 @@ extension RunnerTests { return Response(ok: false, error: ErrorPayload(message: "dragSeries pattern must be one-way or ping-pong")) } let holdDuration = min(max((command.durationMs ?? 60) / 1000.0, 0.016), 10.0) - withTemporaryScrollIdleTimeoutIfSupported(activeApp) { - runSeries(count: count, pauseMs: pauseMs) { idx in - let reverse = pattern == "ping-pong" && (idx % 2 == 1) - if reverse { - dragAt(app: activeApp, x: x2, y: y2, x2: x, y2: y, holdDuration: holdDuration) - } else { - dragAt(app: activeApp, x: x, y: y, x2: x2, y2: y2, holdDuration: holdDuration) + let timing = measureGesture { + withTemporaryScrollIdleTimeoutIfSupported(activeApp) { + runSeries(count: count, pauseMs: pauseMs) { idx in + let reverse = pattern == "ping-pong" && (idx % 2 == 1) + if reverse { + dragAt(app: activeApp, x: x2, y: y2, x2: x, y2: y, holdDuration: holdDuration) + } else { + dragAt(app: activeApp, x: x, y: y, x2: x2, y2: y2, holdDuration: holdDuration) + } } } } - return Response(ok: true, data: DataPayload(message: "drag series")) + return Response( + ok: true, + data: DataPayload( + message: "drag series", + gestureStartUptimeMs: timing.gestureStartUptimeMs, + gestureEndUptimeMs: timing.gestureEndUptimeMs + ) + ) case .type: guard let text = command.text else { return Response(ok: false, error: ErrorPayload(message: "type requires text")) @@ -304,10 +440,22 @@ extension RunnerTests { guard let direction = command.direction else { return Response(ok: false, error: ErrorPayload(message: "swipe requires direction")) } - withTemporaryScrollIdleTimeoutIfSupported(activeApp) { - swipe(app: activeApp, direction: direction) + let referenceFrame = resolvedGestureReferenceFrame(app: activeApp) + let timing = measureGesture { + withTemporaryScrollIdleTimeoutIfSupported(activeApp) { + swipe(app: activeApp, direction: direction) + } } - return Response(ok: true, data: DataPayload(message: "swiped")) + return Response( + ok: true, + data: DataPayload( + message: "swiped", + gestureStartUptimeMs: timing.gestureStartUptimeMs, + gestureEndUptimeMs: timing.gestureEndUptimeMs, + referenceWidth: referenceFrame.referenceWidth, + referenceHeight: referenceFrame.referenceHeight + ) + ) case .findText: guard let text = command.text else { return Response(ok: false, error: ErrorPayload(message: "findText requires text")) @@ -358,30 +506,15 @@ extension RunnerTests { if tapNavigationBack(app: activeApp) { return Response(ok: true, data: DataPayload(message: "back")) } -#if os(macOS) - return Response(ok: false, error: ErrorPayload(message: "back button is not available on macOS")) -#else performBackGesture(app: activeApp) return Response(ok: true, data: DataPayload(message: "back")) -#endif case .home: -#if os(macOS) - return Response(ok: false, error: ErrorPayload(message: "home is not supported on macOS")) -#else pressHomeButton() return Response(ok: true, data: DataPayload(message: "home")) -#endif case .appSwitcher: -#if os(macOS) - return Response(ok: false, error: ErrorPayload(message: "appSwitcher is not supported on macOS")) -#else performAppSwitcherGesture(app: activeApp) return Response(ok: true, data: DataPayload(message: "appSwitcher")) -#endif case .alert: -#if os(macOS) - return Response(ok: false, error: ErrorPayload(message: "alert is not supported on macOS")) -#else let action = (command.action ?? "get").lowercased() let alert = activeApp.alerts.firstMatch if !alert.exists { @@ -399,17 +532,21 @@ extension RunnerTests { } let buttonLabels = alert.buttons.allElementsBoundByIndex.map { $0.label } return Response(ok: true, data: DataPayload(message: alert.label, items: buttonLabels)) -#endif case .pinch: -#if os(macOS) - return Response(ok: false, error: ErrorPayload(message: "pinch is not supported on macOS")) -#else guard let scale = command.scale, scale > 0 else { return Response(ok: false, error: ErrorPayload(message: "pinch requires scale > 0")) } - pinch(app: activeApp, scale: scale, x: command.x, y: command.y) - return Response(ok: true, data: DataPayload(message: "pinched")) -#endif + let timing = measureGesture { + pinch(app: activeApp, scale: scale, x: command.x, y: command.y) + } + return Response( + ok: true, + data: DataPayload( + message: "pinched", + gestureStartUptimeMs: timing.gestureStartUptimeMs, + gestureEndUptimeMs: timing.gestureEndUptimeMs + ) + ) } } } diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Interaction.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Interaction.swift index 7c6ad4a1d..5563f5ae7 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Interaction.swift +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Interaction.swift @@ -1,6 +1,27 @@ import XCTest extension RunnerTests { + struct TouchVisualizationFrame { + let x: Double + let y: Double + let referenceWidth: Double + let referenceHeight: Double + } + + struct DragVisualizationFrame { + let x: Double + let y: Double + let x2: Double + let y2: Double + let referenceWidth: Double + let referenceHeight: Double + } + + struct GestureReferenceFrame { + let referenceWidth: Double + let referenceHeight: Double + } + // MARK: - Navigation Gestures func tapNavigationBack(app: XCUIApplication) -> Bool { @@ -209,6 +230,58 @@ extension RunnerTests { start.press(forDuration: holdDuration, thenDragTo: end) } + func resolvedTouchVisualizationFrame(app: XCUIApplication, x: Double, y: Double) -> TouchVisualizationFrame { + let appFrame = app.frame + let referenceFrame = resolvedTouchReferenceFrame(app: app, appFrame: appFrame) + let originX = appFrame.isEmpty ? referenceFrame.minX : appFrame.minX + let originY = appFrame.isEmpty ? referenceFrame.minY : appFrame.minY + return TouchVisualizationFrame( + x: originX + x, + y: originY + y, + referenceWidth: referenceFrame.width, + referenceHeight: referenceFrame.height + ) + } + + func resolvedDragVisualizationFrame( + app: XCUIApplication, + x: Double, + y: Double, + x2: Double, + y2: Double + ) -> DragVisualizationFrame { + let start = resolvedTouchVisualizationFrame(app: app, x: x, y: y) + let end = resolvedTouchVisualizationFrame(app: app, x: x2, y: y2) + return DragVisualizationFrame( + x: start.x, + y: start.y, + x2: end.x, + y2: end.y, + referenceWidth: start.referenceWidth, + referenceHeight: start.referenceHeight + ) + } + + private func resolvedTouchReferenceFrame(app: XCUIApplication, appFrame: CGRect) -> CGRect { + let window = app.windows.firstMatch + let windowFrame = window.frame + if window.exists && !windowFrame.isEmpty { + return windowFrame + } + if !appFrame.isEmpty { + return appFrame + } + return CGRect(x: 0, y: 0, width: 0, height: 0) + } + + func resolvedGestureReferenceFrame(app: XCUIApplication) -> GestureReferenceFrame { + let frame = resolvedTouchReferenceFrame(app: app, appFrame: app.frame) + return GestureReferenceFrame( + referenceWidth: frame.width, + referenceHeight: frame.height + ) + } + func runSeries(count: Int, pauseMs: Double, operation: (Int) -> Void) { let total = max(count, 1) let pause = max(pauseMs, 0) diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Models.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Models.swift index b60afc3ae..036812e31 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Models.swift +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Models.swift @@ -19,6 +19,7 @@ enum CommandType: String, Codable { case pinch case recordStart case recordStop + case uptime case shutdown } @@ -75,19 +76,46 @@ struct DataPayload: Codable { let items: [String]? let nodes: [SnapshotNode]? let truncated: Bool? + let gestureStartUptimeMs: Double? + let gestureEndUptimeMs: Double? + let x: Double? + let y: Double? + let x2: Double? + let y2: Double? + let referenceWidth: Double? + let referenceHeight: Double? + let currentUptimeMs: Double? init( message: String? = nil, found: Bool? = nil, items: [String]? = nil, nodes: [SnapshotNode]? = nil, - truncated: Bool? = nil + truncated: Bool? = nil, + gestureStartUptimeMs: Double? = nil, + gestureEndUptimeMs: Double? = nil, + x: Double? = nil, + y: Double? = nil, + x2: Double? = nil, + y2: Double? = nil, + referenceWidth: Double? = nil, + referenceHeight: Double? = nil, + currentUptimeMs: Double? = nil ) { self.message = message self.found = found self.items = items self.nodes = nodes self.truncated = truncated + self.gestureStartUptimeMs = gestureStartUptimeMs + self.gestureEndUptimeMs = gestureEndUptimeMs + self.x = x + self.y = y + self.x2 = x2 + self.y2 = y2 + self.referenceWidth = referenceWidth + self.referenceHeight = referenceHeight + self.currentUptimeMs = currentUptimeMs } } diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+ScreenRecorder.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+ScreenRecorder.swift index 6e0b4780f..9c9db51f3 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+ScreenRecorder.swift +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+ScreenRecorder.swift @@ -7,13 +7,11 @@ extension RunnerTests { final class ScreenRecorder { private let outputPath: String private let fps: Int32? - private let uncappedFrameInterval: TimeInterval = 0.001 - private var uncappedTimestampTimescale: Int32 { - Int32(max(1, Int((1.0 / uncappedFrameInterval).rounded()))) + private var effectiveFps: Int32 { + max(1, fps ?? RunnerTests.defaultRecordingFps) } private var frameInterval: TimeInterval { - guard let fps else { return uncappedFrameInterval } - return 1.0 / Double(fps) + 1.0 / Double(effectiveFps) } private let queue = DispatchQueue(label: "agent-device.runner.recorder") private let lock = NSLock() @@ -206,7 +204,7 @@ extension RunnerTests { recordingStartUptime = nowUptime } let elapsed = max(0, nowUptime - (recordingStartUptime ?? nowUptime)) - let timescale = fps ?? uncappedTimestampTimescale + let timescale = effectiveFps var timestampValue = Int64((elapsed * Double(timescale)).rounded(.down)) if timestampValue <= lastTimestampValue { timestampValue = lastTimestampValue + 1 diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests.swift index cf1049736..df3c983be 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests.swift +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests.swift @@ -29,6 +29,7 @@ final class RunnerTests: XCTestCase { } static let springboardBundleId = "com.apple.springboard" + static let defaultRecordingFps: Int32 = 15 var listener: NWListener? var doneExpectation: XCTestExpectation? let app = XCUIApplication() diff --git a/ios-runner/AgentDeviceRunner/RecordingScripts/recording-overlay.swift b/ios-runner/AgentDeviceRunner/RecordingScripts/recording-overlay.swift new file mode 100644 index 000000000..984bbdb91 --- /dev/null +++ b/ios-runner/AgentDeviceRunner/RecordingScripts/recording-overlay.swift @@ -0,0 +1,571 @@ +import AppKit +import AVFoundation +import Foundation +import QuartzCore + +let touchDotColor = NSColor(calibratedRed: 0.20, green: 0.63, blue: 0.98, alpha: 0.48).cgColor +let touchDotBorderColor = NSColor(calibratedRed: 0.94, green: 0.98, blue: 1.0, alpha: 0.68).cgColor +let minimumTapVisibility: CFTimeInterval = 0.45 +let minimumSwipeVisibility: CFTimeInterval = 0.5 +let minimumPinchVisibility: CFTimeInterval = 0.5 +let swipeVisibilityTail: CFTimeInterval = 0.16 +let trailOpacityKeyTimes: [NSNumber] = [0.0, 0.08, 0.62, 1.0] + +struct GestureEnvelope: Decodable { + let events: [GestureEvent] +} + +struct GestureEvent: Decodable { + let kind: String + let tMs: Double + let x: Double + let y: Double + let x2: Double? + let y2: Double? + let referenceWidth: Double? + let referenceHeight: Double? + let durationMs: Double? + let scale: Double? + let contentDirection: String? + let edge: String? +} + +enum OverlayError: Error, CustomStringConvertible { + case invalidArgs(String) + case missingVideoTrack + case exportFailed(String) + + var description: String { + switch self { + case .invalidArgs(let message): + return message + case .missingVideoTrack: + return "Input video does not contain a video track." + case .exportFailed(let message): + return message + } + } +} + +do { + try run() +} catch { + fputs("recording-overlay: \(error)\n", stderr) + exit(1) +} + +func run() throws { + let arguments = Array(CommandLine.arguments.dropFirst()) + let parsedArgs = try parseArguments(arguments) + let inputURL = URL(fileURLWithPath: parsedArgs.inputPath) + let outputURL = URL(fileURLWithPath: parsedArgs.outputPath) + let eventsURL = URL(fileURLWithPath: parsedArgs.eventsPath) + + if FileManager.default.fileExists(atPath: outputURL.path) { + try FileManager.default.removeItem(at: outputURL) + } + + let payload = try Data(contentsOf: eventsURL) + let envelope = try JSONDecoder().decode(GestureEnvelope.self, from: payload) + + if envelope.events.isEmpty { + try FileManager.default.copyItem(at: inputURL, to: outputURL) + return + } + + let asset = AVURLAsset(url: inputURL) + guard let sourceVideoTrack = asset.tracks(withMediaType: .video).first else { + throw OverlayError.missingVideoTrack + } + + let composition = AVMutableComposition() + guard let compositionVideoTrack = composition.addMutableTrack( + withMediaType: .video, + preferredTrackID: kCMPersistentTrackID_Invalid + ) else { + throw OverlayError.exportFailed("Failed to create composition video track.") + } + + let fullRange = CMTimeRange(start: .zero, duration: asset.duration) + try compositionVideoTrack.insertTimeRange(fullRange, of: sourceVideoTrack, at: .zero) + + if let sourceAudioTrack = asset.tracks(withMediaType: .audio).first, + let compositionAudioTrack = composition.addMutableTrack( + withMediaType: .audio, + preferredTrackID: kCMPersistentTrackID_Invalid + ) { + try? compositionAudioTrack.insertTimeRange(fullRange, of: sourceAudioTrack, at: .zero) + } + + let renderSize = resolvedRenderSize(for: sourceVideoTrack) + let videoComposition = AVMutableVideoComposition() + videoComposition.renderSize = renderSize + videoComposition.frameDuration = resolvedFrameDuration(for: sourceVideoTrack) + + let instruction = AVMutableVideoCompositionInstruction() + instruction.timeRange = fullRange + let layerInstruction = AVMutableVideoCompositionLayerInstruction(assetTrack: compositionVideoTrack) + layerInstruction.setTransform(sourceVideoTrack.preferredTransform, at: .zero) + instruction.layerInstructions = [layerInstruction] + videoComposition.instructions = [instruction] + + let parentLayer = CALayer() + parentLayer.frame = CGRect(origin: .zero, size: renderSize) + parentLayer.masksToBounds = true + + let videoLayer = CALayer() + videoLayer.frame = parentLayer.frame + parentLayer.addSublayer(videoLayer) + + let overlayLayer = CALayer() + overlayLayer.frame = parentLayer.frame + parentLayer.addSublayer(overlayLayer) + + for event in envelope.events { + switch event.kind { + case "tap": + addTapLayer(event: event, renderSize: renderSize, to: overlayLayer) + case "longpress": + addLongPressLayer(event: event, renderSize: renderSize, to: overlayLayer) + case "swipe": + addSwipeLayers(event: event, renderSize: renderSize, to: overlayLayer) + case "scroll": + addScrollLayers(event: event, renderSize: renderSize, to: overlayLayer) + case "back-swipe": + addBackSwipeLayers(event: event, renderSize: renderSize, to: overlayLayer) + case "pinch": + addPinchLayers(event: event, renderSize: renderSize, to: overlayLayer) + default: + continue + } + } + + videoComposition.animationTool = AVVideoCompositionCoreAnimationTool( + postProcessingAsVideoLayer: videoLayer, + in: parentLayer + ) + + guard let exporter = AVAssetExportSession(asset: composition, presetName: AVAssetExportPresetHighestQuality) else { + throw OverlayError.exportFailed("Failed to create export session.") + } + + exporter.outputURL = outputURL + exporter.outputFileType = .mp4 + exporter.videoComposition = videoComposition + exporter.shouldOptimizeForNetworkUse = true + + let semaphore = DispatchSemaphore(value: 0) + exporter.exportAsynchronously { + semaphore.signal() + } + if semaphore.wait(timeout: .now() + 120) == .timedOut { + exporter.cancelExport() + throw OverlayError.exportFailed("Touch overlay export timed out.") + } + + if exporter.status != .completed { + throw OverlayError.exportFailed(exporter.error?.localizedDescription ?? "Touch overlay export failed.") + } +} + +func parseArguments(_ arguments: [String]) throws -> (inputPath: String, outputPath: String, eventsPath: String) { + var inputPath: String? + var outputPath: String? + var eventsPath: String? + var index = 0 + + while index < arguments.count { + let argument = arguments[index] + let nextIndex = index + 1 + switch argument { + case "--input": + guard nextIndex < arguments.count else { throw OverlayError.invalidArgs("--input requires a value") } + inputPath = arguments[nextIndex] + index += 2 + case "--output": + guard nextIndex < arguments.count else { throw OverlayError.invalidArgs("--output requires a value") } + outputPath = arguments[nextIndex] + index += 2 + case "--events": + guard nextIndex < arguments.count else { throw OverlayError.invalidArgs("--events requires a value") } + eventsPath = arguments[nextIndex] + index += 2 + default: + throw OverlayError.invalidArgs("Unknown argument: \(argument)") + } + } + + guard let inputPath, let outputPath, let eventsPath else { + throw OverlayError.invalidArgs("Usage: recording-overlay.swift --input