diff --git a/extension/src/background/index.ts b/extension/src/background/index.ts
index b35132c..3b0f24b 100644
--- a/extension/src/background/index.ts
+++ b/extension/src/background/index.ts
@@ -583,7 +583,11 @@ async function captureHighlightedPageState(
         : '';
     const detectedViewport = detectionResult.result.value.viewport || {};
     const layoutStability = detectionResult.result.value.layoutStability;
+    const inPagePerf = detectionResult.result.value._perf || {};
     const highlightTraceStart = Date.now();
+    let paginationMs = 0;
+    let screenshotMs = 0;
+    let consistencyMs = 0;
     const detectedViewportWidth =
       typeof detectedViewport.width === 'number' ? detectedViewport.width : 0;
     const detectedViewportHeight =
@@ -656,8 +660,9 @@ async function captureHighlightedPageState(
       console.log(
         `📄 [${logLabel}] Page ${page}/${totalPages}, showing ${paginatedElements.length} of ${filteredElements.length} elements`,
       );
+      paginationMs = Date.now() - paginationBuildStart;
       console.log(
-        `⏱️ [HighlightTrace] background pagination build-pages=${Date.now() - paginationBuildStart}ms (page=${page}, viewport=${detectedViewportWidth}x${detectedViewportHeight})`,
+        `⏱️ [HighlightTrace] background pagination build-pages=${paginationMs}ms (page=${page}, viewport=${detectedViewportWidth}x${detectedViewportHeight})`,
       );
     }
 
@@ -702,9 +707,8 @@ async function captureHighlightedPageState(
     console.log(
       `📸 [${logLabel}] Screenshot captured (with in-page highlights), size: ${screenshotResult.imageData.length} bytes`,
     );
-    console.log(
-      `⏱️ [HighlightTrace] background screenshot ${Date.now() - screenshotStart}ms`,
-    );
+    screenshotMs = Date.now() - screenshotStart;
+    console.log(`⏱️ [HighlightTrace] background screenshot ${screenshotMs}ms`);
 
     // Apply bboxes returned from the highlight injection script
     const preCaptureData = screenshotResult.preCaptureResult;
@@ -766,8 +770,9 @@ async function captureHighlightedPageState(
         })),
       currentConsistencySamples,
     );
+    consistencyMs = Date.now() - consistencyCheckStart;
     console.log(
-      `⏱️ [HighlightTrace] background consistency-check ${Date.now() - consistencyCheckStart}ms (checked=${highlightConsistency.checkedCount}, matched=${highlightConsistency.matchedCount}, missing=${highlightConsistency.missingCount}, shifted=${highlightConsistency.shiftedCount}, maxCenterShift=${highlightConsistency.maxCenterShift}, maxSizeDelta=${highlightConsistency.maxSizeDelta}, retry=${highlightConsistency.shouldRetry})`,
+      `⏱️ [HighlightTrace] background consistency-check ${consistencyMs}ms (checked=${highlightConsistency.checkedCount}, matched=${highlightConsistency.matchedCount}, missing=${highlightConsistency.missingCount}, shifted=${highlightConsistency.shiftedCount}, maxCenterShift=${highlightConsistency.maxCenterShift}, maxSizeDelta=${highlightConsistency.maxSizeDelta}, retry=${highlightConsistency.shouldRetry})`,
     );
     const repeatedDrift = isRepeatedHighlightDrift(
       highlightConsistency,
@@ -841,6 +846,15 @@ async function captureHighlightedPageState(
       page: currentPage,
       pageState,
       readinessReasons,
+      _perf: {
+        scan_ms:
+          typeof inPagePerf.scan_ms === 'number' ? inPagePerf.scan_ms : 0,
+        scan_stats: inPagePerf.scan_stats || {},
+        scan_times: inPagePerf.scan_times || {},
+        pagination_ms: paginationMs,
+        screenshot_ms: screenshotMs,
+        consistency_ms: consistencyMs,
+      },
       ...buildScreenshotPayload(compressedScreenshotResult),
     };
   }
diff --git a/extension/src/commands/highlight-detection.injected.js b/extension/src/commands/highlight-detection.injected.js
index 72f3e8e..4140016 100644
--- a/extension/src/commands/highlight-detection.injected.js
+++ b/extension/src/commands/highlight-detection.injected.js
@@ -77,6 +77,130 @@ function hasCallableMethod(value, methodNames) {
   );
 }
 
+// Layout reads (getBoundingClientRect, getComputedStyle) and elementsFromPoint
+// are the single biggest cost in collectHighlightCandidates: every visibility
+// predicate re-reads them for the same element. Within one synchronous
+// Runtime.evaluate task no page JS runs concurrently, so the values cannot
+// change mid-scan. We monkey-patch the prototypes for the duration of one
+// scan, populate a per-element WeakMap, and restore originals at the end.
+const SCAN_NON_INTERACTIVE_TAGS = new Set([
+  'script',
+  'style',
+  'link',
+  'meta',
+  'head',
+  'title',
+  'noscript',
+  'br',
+  'hr',
+  'source',
+  'track',
+  'template',
+  'param',
+  'col',
+  'colgroup',
+]);
+
+function isScanSkippableTag(el) {
+  if (!el || !el.tagName) return false;
+  return SCAN_NON_INTERACTIVE_TAGS.has(el.tagName.toLowerCase());
+}
+
+// Per-scan memoization caches for pure-function classifiers that get hit many
+// times for the same element during the resolve phase (each candidate walks
+// up to 5 ancestors, each ancestor calls hasExplicitClickableAncestor which
+// walks ALL ancestors, etc.). Reset at the start of each scan, leak nothing
+// outside it. WeakMap so any GC'd nodes drop out automatically.
+let _scanSemanticSignalCache = null;
+let _scanClickableCandidateCache = null;
+let _scanBaseClickableSignalCache = null;
+let _scanTextContentCache = null;
+let _scanSearchTextCache = null;
+let _scanExplicitAncestorCache = null;
+
+function withScanLayoutCache(fn) {
+  const rectCache = new WeakMap();
+  const styleCache = new WeakMap();
+  // elementsFromPoint dedup keyed by rounded "x:y"
+  const efpCache = new Map();
+  _scanSemanticSignalCache = new WeakMap();
+  _scanClickableCandidateCache = new WeakMap();
+  _scanBaseClickableSignalCache = new WeakMap();
+  _scanTextContentCache = new WeakMap();
+  _scanSearchTextCache = new WeakMap();
+  _scanExplicitAncestorCache = new WeakMap();
+
+  const origElementRect = Element.prototype.getBoundingClientRect;
+  const SVGGraphicsProto =
+    typeof SVGGraphicsElement !== 'undefined'
+      ? SVGGraphicsElement.prototype
+      : null;
+  const origSVGRect =
+    SVGGraphicsProto && SVGGraphicsProto.getBoundingClientRect;
+  const origGetComputedStyle = window.getComputedStyle;
+  // Patch Document.prototype rather than the document instance so we don't
+  // leave an own-property shadowing the prototype after the scan finishes.
+  const DocumentProto =
+    typeof Document !== 'undefined' ? Document.prototype : null;
+  const origElementsFromPoint =
+    DocumentProto && DocumentProto.elementsFromPoint;
+
+  function patchedRect() {
+    let r = rectCache.get(this);
+    if (r === undefined) {
+      r = origElementRect.call(this);
+      rectCache.set(this, r);
+    }
+    return r;
+  }
+
+  Element.prototype.getBoundingClientRect = patchedRect;
+  if (SVGGraphicsProto && origSVGRect) {
+    SVGGraphicsProto.getBoundingClientRect = patchedRect;
+  }
+
+  window.getComputedStyle = function (el, pseudo) {
+    if (pseudo) return origGetComputedStyle.call(window, el, pseudo);
+    let s = styleCache.get(el);
+    if (s === undefined) {
+      s = origGetComputedStyle.call(window, el);
+      styleCache.set(el, s);
+    }
+    return s;
+  };
+
+  if (DocumentProto && origElementsFromPoint) {
+    DocumentProto.elementsFromPoint = function (x, y) {
+      const key = Math.round(x) + ':' + Math.round(y);
+      let stack = efpCache.get(key);
+      if (stack === undefined) {
+        stack = origElementsFromPoint.call(this, x, y);
+        efpCache.set(key, stack);
+      }
+      return stack;
+    };
+  }
+
+  try {
+    return fn();
+  } finally {
+    Element.prototype.getBoundingClientRect = origElementRect;
+    if (SVGGraphicsProto && origSVGRect) {
+      SVGGraphicsProto.getBoundingClientRect = origSVGRect;
+    }
+    window.getComputedStyle = origGetComputedStyle;
+    if (DocumentProto && origElementsFromPoint) {
+      DocumentProto.elementsFromPoint = origElementsFromPoint;
+    }
+    _scanSemanticSignalCache = null;
+    _scanClickableCandidateCache = null;
+    _scanBaseClickableSignalCache = null;
+    _scanTextContentCache = null;
+    _scanSearchTextCache = null;
+    _scanExplicitAncestorCache = null;
+  }
+}
+
 function createHighlightTrace() {
   const traceStart = performance.now();
 
@@ -305,6 +429,15 @@ function getSwipeMarkerText(el) {
 }
 
 function getElementTextForDetection(el) {
+  if (_scanTextContentCache && _scanTextContentCache.has(el)) {
+    return _scanTextContentCache.get(el);
+  }
+  const r = getElementTextForDetectionImpl(el);
+  if (_scanTextContentCache) _scanTextContentCache.set(el, r);
+  return r;
+}
+
+function getElementTextForDetectionImpl(el) {
   if (el instanceof HTMLInputElement) {
     const inputType = (el.type || '').toLowerCase();
     if (
@@ -316,10 +449,22 @@ function getElementTextForDetection(el) {
     }
   }
 
+  // textContent on a deep node walks the entire subtree of text nodes — for
+  // a table row with hundreds of descendants this is expensive enough to
+  // dominate the resolve phase. Cache so each candidate pays at most once.
   return normalizeWhitespace(el.textContent || '', 240);
 }
 
 function getElementSearchText(el) {
+  if (_scanSearchTextCache && _scanSearchTextCache.has(el)) {
+    return _scanSearchTextCache.get(el);
+  }
+  const r = getElementSearchTextImpl(el);
+  if (_scanSearchTextCache) _scanSearchTextCache.set(el, r);
+  return r;
+}
+
+function getElementSearchTextImpl(el) {
   const tokens = [
     el.tagName.toLowerCase(),
     ...getAttributeTextTokens(el, [
@@ -480,6 +625,15 @@ function hasPointerCursor(el) {
 }
 
 function getBaseClickableSignal(el) {
+  if (_scanBaseClickableSignalCache && _scanBaseClickableSignalCache.has(el)) {
+    return _scanBaseClickableSignalCache.get(el);
+  }
+  const r = getBaseClickableSignalImpl(el);
+  if (_scanBaseClickableSignalCache) _scanBaseClickableSignalCache.set(el, r);
+  return r;
+}
+
+function getBaseClickableSignalImpl(el) {
   const semanticSignal = getSemanticClickableSignal(el);
   if (semanticSignal) {
     return semanticSignal;
@@ -573,6 +727,15 @@ function getControlAffinityScore(el) {
 }
 
 function getSemanticClickableSignal(el) {
+  if (_scanSemanticSignalCache && _scanSemanticSignalCache.has(el)) {
+    return _scanSemanticSignalCache.get(el);
+  }
+  const r = getSemanticClickableSignalImpl(el);
+  if (_scanSemanticSignalCache) _scanSemanticSignalCache.set(el, r);
+  return r;
+}
+
+function getSemanticClickableSignalImpl(el) {
   const tag = el.tagName.toLowerCase();
   const role = (el.getAttribute('role') || '').toLowerCase();
 
@@ -769,18 +932,30 @@ function countDirectClickableChildren(el) {
 }
 
 function hasExplicitClickableAncestor(el) {
+  if (_scanExplicitAncestorCache && _scanExplicitAncestorCache.has(el)) {
+    return _scanExplicitAncestorCache.get(el);
+  }
+  // Per-call top-level memoization only. A previous version tried to
+  // walk-and-memoize each visited ancestor too, but that's incorrect —
+  // a node's own `hasExplicitClickableAncestor` is about ITS ancestors,
+  // not about its own signal, and it's also influenced by its own signal
+  // when answering the same question for *its* descendants. Doing the full
+  // walk per unique element (with getSemanticClickableSignal cached) is
+  // already cheap enough thanks to the upstream caches.
   let current = el.parentElement;
-
+  let answer = false;
   while (current && current !== document.body) {
     const signal = getSemanticClickableSignal(current);
     if (signal === 'semantic' || signal === 'attribute') {
-      return true;
+      answer = true;
+      break;
     }
-
     current = current.parentElement;
   }
-
-  return false;
+  if (_scanExplicitAncestorCache) {
+    _scanExplicitAncestorCache.set(el, answer);
+  }
+  return answer;
 }
 
 function isInputableCandidate(el) {
@@ -911,6 +1086,15 @@ function hasStructuredInteractiveDescendant(el) {
 }
 
 function isClickableCandidate(el) {
+  if (_scanClickableCandidateCache && _scanClickableCandidateCache.has(el)) {
+    return _scanClickableCandidateCache.get(el);
+  }
+  const r = isClickableCandidateImpl(el);
+  if (_scanClickableCandidateCache) _scanClickableCandidateCache.set(el, r);
+  return r;
+}
+
+function isClickableCandidateImpl(el) {
   if (isDisabledForDetection(el)) {
     return null;
   }
@@ -2473,6 +2657,12 @@ function collectUploadableCandidates(trace) {
 }
 
 function collectHighlightCandidates(config, trace, layoutStability) {
+  return withScanLayoutCache(() =>
+    collectHighlightCandidatesImpl(config, trace, layoutStability),
+  );
+}
+
+function collectHighlightCandidatesImpl(config, trace, layoutStability) {
   const activeTopLayerRoot = getActiveTopLayerRoot();
   const registry = new Map();
 
@@ -2519,6 +2709,27 @@ function collectHighlightCandidates(config, trace, layoutStability) {
   );
 
   let scannedCount = 0;
+  // Per-phase reject counters and timings — gated behind the trace, helps
+  // identify where the scan budget is spent without per-element console spam.
+  const phaseStats = {
+    tagSkip: 0,
+    notInViewport: 0,
+    notVisible: 0,
+    scrollParentClipped: 0,
+    notInActiveTopLayer: 0,
+    hitTestOccluded: 0,
+    notResolvable: 0,
+    matched: 0,
+  };
+  const phaseTimes = {
+    tag: 0,
+    viewport: 0,
+    visible: 0,
+    scrollParent: 0,
+    topLayer: 0,
+    hitTest: 0,
+    resolve: 0,
+  };
   for (const element of allElements) {
     scannedCount += 1;
 
@@ -2529,34 +2740,65 @@ function collectHighlightCandidates(config, trace, layoutStability) {
       );
     }
 
-    if (!isElementInViewportForDetection(element)) {
+    let t = performance.now();
+    if (isScanSkippableTag(element)) {
+      phaseStats.tagSkip += 1;
+      phaseTimes.tag += performance.now() - t;
       continue;
     }
+    phaseTimes.tag += performance.now() - t;
 
-    if (!isElementVisibleForDetection(element)) {
+    t = performance.now();
+    const inViewport = isElementInViewportForDetection(element);
+    phaseTimes.viewport += performance.now() - t;
+    if (!inViewport) {
+      phaseStats.notInViewport += 1;
       continue;
     }
 
-    if (!isElementVisibleInScrollParent(element)) {
+    t = performance.now();
+    const visible = isElementVisibleForDetection(element);
+    phaseTimes.visible += performance.now() - t;
+    if (!visible) {
+      phaseStats.notVisible += 1;
+      continue;
+    }
+
+    t = performance.now();
+    const scrollOk = isElementVisibleInScrollParent(element);
+    phaseTimes.scrollParent += performance.now() - t;
+    if (!scrollOk) {
+      phaseStats.scrollParentClipped += 1;
       continue;
     }
 
-    if (!isElementInActiveTopLayer(element, activeTopLayerRoot)) {
+    t = performance.now();
+    const topLayerOk = isElementInActiveTopLayer(element, activeTopLayerRoot);
+    phaseTimes.topLayer += performance.now() - t;
+    if (!topLayerOk) {
+      phaseStats.notInActiveTopLayer += 1;
       continue;
     }
 
+    t = performance.now();
     const hitTestVisibility = getElementHitTestVisibility(element);
+    phaseTimes.hitTest += performance.now() - t;
     if (!hitTestVisibility.visible) {
+      phaseStats.hitTestOccluded += 1;
       continue;
     }
 
+    t = performance.now();
     const resolvedCandidate = resolveElementCandidate(
       element,
       config.elementType,
     );
+    phaseTimes.resolve += performance.now() - t;
     if (!resolvedCandidate) {
+      phaseStats.notResolvable += 1;
       continue;
     }
+    phaseStats.matched += 1;
 
     const candidate = {
       element: resolvedCandidate.element,
@@ -2605,14 +2847,20 @@ function collectHighlightCandidates(config, trace, layoutStability) {
     return element;
   });
 
+  const roundedTimes = {};
+  for (const k of Object.keys(phaseTimes)) {
+    roundedTimes[k] = Math.round(phaseTimes[k]);
+  }
   trace(
     'scan:done',
-    `processed=${scannedCount} matched=${elements.length} counts=${JSON.stringify(counts)}`,
+    `processed=${scannedCount} matched=${elements.length} counts=${JSON.stringify(counts)} reject=${JSON.stringify(phaseStats)} ms=${JSON.stringify(roundedTimes)}`,
   );
 
   return {
     elements,
     counts,
+    _scan_stats: phaseStats,
+    _scan_times: roundedTimes,
   };
 }
 
@@ -2625,11 +2873,10 @@ async function runOpenBrowserHighlightDetection(config) {
 
   const layoutStability = evaluateReadinessSnapshot(trace);
 
-  const { elements, counts } = collectHighlightCandidates(
-    config,
-    trace,
-    layoutStability,
-  );
+  const scanStart = performance.now();
+  const scanResult = collectHighlightCandidates(config, trace, layoutStability);
+  const { elements, counts } = scanResult;
+  const scanMs = Math.round(performance.now() - scanStart);
 
   trace('return', `elements=${elements.length}`);
   return {
@@ -2641,5 +2888,10 @@ async function runOpenBrowserHighlightDetection(config) {
       width: window.innerWidth,
       height: window.innerHeight,
     },
+    _perf: {
+      scan_ms: scanMs,
+      scan_stats: scanResult._scan_stats || {},
+      scan_times: scanResult._scan_times || {},
+    },
   };
 }
diff --git a/extension/src/utils/collision-detection.ts b/extension/src/utils/collision-detection.ts
index 054abc2..a409c64 100644
--- a/extension/src/utils/collision-detection.ts
+++ b/extension/src/utils/collision-detection.ts
@@ -36,6 +36,108 @@ interface RemainingCandidate {
   element: InteractiveElement;
 }
 
+// Coarse spatial grid used to skip O(N) scans of `selected` and `remaining`
+// when checking collisions. Cell size is a heuristic — large enough that most
+// label rects touch only a couple of cells, small enough that a typical
+// query returns far fewer than the full set.
+const SPATIAL_INDEX_CELL_PX = 96;
+
+class SelectedSpatialIndex {
+  private cells = new Map<number, InteractiveElement[]>();
+
+  add(element: InteractiveElement): void {
+    const labelBBox = getLabelBBox(
+      element.bbox,
+      element.labelPosition ?? 'above',
+      element.id,
+    );
+    const union = unionBBox(element.bbox, labelBBox);
+    this.forEachCell(union, (key) => {
+      let bucket = this.cells.get(key);
+      if (!bucket) {
+        bucket = [];
+        this.cells.set(key, bucket);
+      }
+      // Avoid duplicate registration when a single element straddles cells we
+      // visit out of order — the per-call dedup Set in queryNear handles dup
+      // results across cells.
+      if (bucket[bucket.length - 1] !== element) {
+        bucket.push(element);
+      }
+    });
+  }
+
+  // Returns elements whose registered union-rect lies in any cell touched by
+  // the query rect (inflated by clearance on each side). Includes elements
+  // whose registration cells are *adjacent* to the query rect — see
+  // `queryNear` callers, which already inflate the query rect with clearance.
+  queryNear(query: BBox): InteractiveElement[] {
+    const seen = new Set<InteractiveElement>();
+    const out: InteractiveElement[] = [];
+    this.forEachCell(query, (key) => {
+      const bucket = this.cells.get(key);
+      if (!bucket) return;
+      for (const el of bucket) {
+        if (!seen.has(el)) {
+          seen.add(el);
+          out.push(el);
+        }
+      }
+    });
+    return out;
+  }
+
+  private forEachCell(rect: BBox, fn: (key: number) => void): void {
+    // Real bboxes from getBoundingClientRect are always finite, but synthetic
+    // test inputs or future callers might pass NaN/Infinity. Without this
+    // guard Math.floor would yield NaN, the loop would skip, and we'd
+    // silently drop a registration — masking real collisions.
+    if (
+      !Number.isFinite(rect.x) ||
+      !Number.isFinite(rect.y) ||
+      !Number.isFinite(rect.width) ||
+      !Number.isFinite(rect.height)
+    ) {
+      // Single sentinel cell so the registration is still discoverable.
+      fn(Number.MIN_SAFE_INTEGER);
+      return;
+    }
+    const minCx = Math.floor(rect.x / SPATIAL_INDEX_CELL_PX);
+    const maxCx = Math.floor(
+      (rect.x + Math.max(0, rect.width)) / SPATIAL_INDEX_CELL_PX,
+    );
+    const minCy = Math.floor(rect.y / SPATIAL_INDEX_CELL_PX);
+    const maxCy = Math.floor(
+      (rect.y + Math.max(0, rect.height)) / SPATIAL_INDEX_CELL_PX,
+    );
+    for (let cy = minCy; cy <= maxCy; cy++) {
+      for (let cx = minCx; cx <= maxCx; cx++) {
+        // Cantor-pair-ish key: cy gets the high bits, cx the low bits.
+        // Negative coords are uncommon for label rects but still encode safely
+        // because Math.floor preserves order under shift.
+        fn(cy * 100000 + cx);
+      }
+    }
+  }
+}
+
+function unionBBox(a: BBox, b: BBox): BBox {
+  const x = Math.min(a.x, b.x);
+  const y = Math.min(a.y, b.y);
+  const xMax = Math.max(a.x + a.width, b.x + b.width);
+  const yMax = Math.max(a.y + a.height, b.y + b.height);
+  return { x, y, width: xMax - x, height: yMax - y };
+}
+
+function inflateBBox(rect: BBox, padding: number): BBox {
+  return {
+    x: rect.x - padding,
+    y: rect.y - padding,
+    width: rect.width + 2 * padding,
+    height: rect.height + 2 * padding,
+  };
+}
+
 interface PlacementEvaluation {
   position: LabelPosition;
   blockedCandidateCount: number;
@@ -302,12 +404,14 @@ function buildCollisionFreePages(
 
   while (remaining.length > 0) {
     const selected: InteractiveElement[] = [];
+    const selectedIndex = new SelectedSpatialIndex();
     let pageRemaining = remaining;
 
     while (pageRemaining.length > 0) {
       const nextSelection = chooseNextCandidate(
         pageRemaining,
         selected,
+        selectedIndex,
         viewportWidth,
         viewportHeight,
       );
@@ -316,10 +420,12 @@ function buildCollisionFreePages(
         break;
       }
 
-      selected.push({
+      const placed: InteractiveElement = {
         ...nextSelection.candidate.element,
         labelPosition: nextSelection.position,
-      });
+      };
+      selected.push(placed);
+      selectedIndex.add(placed);
       pageRemaining = pageRemaining.filter(
         (candidate) =>
           candidate.sourceIndex !== nextSelection.candidate.sourceIndex,
@@ -347,14 +453,16 @@ function tryBuildUniformPositionPage(
   viewportHeight?: number,
 ): InteractiveElement[] | null {
   const selected: InteractiveElement[] = [];
+  const index = new SelectedSpatialIndex();
 
   for (const element of elements) {
+    const nearby = nearbySelectedFor(element, position, element.id, index);
     if (
       !isPlacementFeasible(
         element,
         element.id,
         position,
-        selected,
+        nearby,
         viewportWidth,
         viewportHeight,
       )
@@ -362,10 +470,12 @@ function tryBuildUniformPositionPage(
       return null;
     }
 
-    selected.push({
+    const placed: InteractiveElement = {
       ...element,
       labelPosition: position,
-    });
+    };
+    selected.push(placed);
+    index.add(placed);
   }
 
   return selected;
@@ -374,6 +484,7 @@ function tryBuildUniformPositionPage(
 function chooseNextCandidate(
   remaining: RemainingCandidate[],
   selected: InteractiveElement[],
+  selectedIndex: SelectedSpatialIndex,
   viewportWidth?: number,
   viewportHeight?: number,
 ): (PlacementEvaluation & { candidate: RemainingCandidate }) | null {
@@ -388,6 +499,7 @@ function chooseNextCandidate(
       candidate.element,
       candidate.element.id,
       selected,
+      selectedIndex,
       viewportWidth,
       viewportHeight,
     );
@@ -415,6 +527,7 @@ function chooseNextCandidate(
       constrainedCandidate.feasiblePositions,
       remaining,
       selected,
+      selectedIndex,
       viewportWidth,
       viewportHeight,
     ),
@@ -426,6 +539,7 @@ function chooseLeastBlockingPlacement(
   feasiblePositions: LabelPosition[],
   remaining: RemainingCandidate[],
   selected: InteractiveElement[],
+  selectedIndex: SelectedSpatialIndex,
   viewportWidth?: number,
   viewportHeight?: number,
 ): PlacementEvaluation {
@@ -435,31 +549,109 @@ function chooseLeastBlockingPlacement(
   );
   let bestPlacement: PlacementEvaluation | null = null;
 
-  for (const position of feasiblePositions) {
-    const hypotheticalSelected = [
-      ...selected,
-      {
-        ...candidate.element,
-        labelPosition: position,
-      },
-    ];
-    let blockedCandidateCount = 0;
-    let totalFutureOptions = 0;
-
-    futureCandidates.forEach((candidate) => {
-      const futureOptions = getFeasiblePositions(
-        candidate.element,
-        candidate.element.id,
-        hypotheticalSelected,
-        viewportWidth,
-        viewportHeight,
+  // Pre-compute each future candidate's baseline feasible positions against
+  // the current `selected` set. When we test a hypothetical placement of
+  // `candidate@position`, only future candidates whose bbox/label is
+  // geometrically near that placement can have their feasibility change. The
+  // rest keep their baseline feasibility — saving the O(|future|×4×|selected|)
+  // recomputation per position.
+  interface FutureBaseline {
+    candidate: RemainingCandidate;
+    elementUnion: BBox; // bbox ∪ all four label rects
+    feasibleCount: number;
+    totalLength: number;
+  }
+  const futureBaselines: FutureBaseline[] = futureCandidates.map((fc) => {
+    const baseline = getFeasiblePositions(
+      fc.element,
+      fc.element.id,
+      selected,
+      selectedIndex,
+      viewportWidth,
+      viewportHeight,
+    );
+    let union = fc.element.bbox;
+    for (const pos of POSITION_PRIORITY) {
+      union = unionBBox(
+        union,
+        getLabelBBox(fc.element.bbox, pos, fc.element.id),
       );
+    }
+    return {
+      candidate: fc,
+      elementUnion: union,
+      feasibleCount: baseline.length,
+      totalLength: baseline.length,
+    };
+  });
+
+  const baselineBlockedCount = futureBaselines.reduce(
+    (acc, fb) => (fb.feasibleCount === 0 ? acc + 1 : acc),
+    0,
+  );
+  const baselineTotalOptions = futureBaselines.reduce(
+    (acc, fb) => acc + fb.totalLength,
+    0,
+  );
+
+  for (const position of feasiblePositions) {
+    const hypotheticalElement: InteractiveElement = {
+      ...candidate.element,
+      labelPosition: position,
+    };
+    const hypotheticalLabelBBox = getLabelBBox(
+      candidate.element.bbox,
+      position,
+      candidate.element.id,
+    );
+    // Influence rect: anything whose elementUnion does NOT intersect this
+    // (inflated by clearance) cannot be affected by adding the hypothetical
+    // candidate. We only need to recompute for future candidates inside it.
+    const influenceRect = inflateBBox(
+      unionBBox(candidate.element.bbox, hypotheticalLabelBBox),
+      VISUAL_LABEL_CLEARANCE_PX,
+    );
 
-      if (futureOptions.length === 0) {
+    let blockedCandidateCount = baselineBlockedCount;
+    let totalFutureOptions = baselineTotalOptions;
+
+    for (const fb of futureBaselines) {
+      if (!bboxesIntersect(fb.elementUnion, influenceRect)) {
+        continue;
+      }
+      // Feasibility can change for this future candidate. Re-test against
+      // the spatially-near selected set plus the hypothetical candidate.
+      let updatedFeasibleLen = 0;
+      for (const pos of POSITION_PRIORITY) {
+        const nearby = nearbySelectedFor(
+          fb.candidate.element,
+          pos,
+          fb.candidate.element.id,
+          selectedIndex,
+          [hypotheticalElement],
+        );
+        if (
+          isPlacementFeasible(
+            fb.candidate.element,
+            fb.candidate.element.id,
+            pos,
+            nearby,
+            viewportWidth,
+            viewportHeight,
+          )
+        ) {
+          updatedFeasibleLen++;
+        }
+      }
+
+      // Adjust baseline aggregates for the delta on this single future.
+      if (fb.feasibleCount === 0 && updatedFeasibleLen > 0) {
+        blockedCandidateCount--;
+      } else if (fb.feasibleCount > 0 && updatedFeasibleLen === 0) {
         blockedCandidateCount++;
       }
-      totalFutureOptions += futureOptions.length;
-    });
+      totalFutureOptions += updatedFeasibleLen - fb.totalLength;
+    }
 
     if (
       !bestPlacement ||
@@ -492,18 +684,22 @@ function getFeasiblePositions(
   element: InteractiveElement,
   labelText: string,
   selected: InteractiveElement[],
+  selectedIndex: SelectedSpatialIndex | null,
   viewportWidth?: number,
   viewportHeight?: number,
 ): LabelPosition[] {
   const feasiblePositions: LabelPosition[] = [];
 
   for (const position of POSITION_PRIORITY) {
+    const nearby = selectedIndex
+      ? nearbySelectedFor(element, position, labelText, selectedIndex)
+      : selected;
     if (
       isPlacementFeasible(
         element,
         labelText,
         position,
-        selected,
+        nearby,
         viewportWidth,
         viewportHeight,
       )
@@ -515,6 +711,28 @@ function getFeasiblePositions(
   return feasiblePositions;
 }
 
+// Returns the subset of `selected` that could plausibly collide with the
+// candidate placement. The query rect is the union of the candidate's bbox
+// and its label rect for the requested position, inflated by the visible
+// clearance threshold. Optional `extras` are appended (e.g. a hypothetical
+// candidate not yet inserted into the index).
+function nearbySelectedFor(
+  element: InteractiveElement,
+  position: LabelPosition,
+  labelText: string,
+  index: SelectedSpatialIndex,
+  extras: InteractiveElement[] = [],
+): InteractiveElement[] {
+  const labelBBox = getLabelBBox(element.bbox, position, labelText);
+  const query = inflateBBox(
+    unionBBox(element.bbox, labelBBox),
+    VISUAL_LABEL_CLEARANCE_PX,
+  );
+  const near = index.queryNear(query);
+  if (extras.length === 0) return near;
+  return near.concat(extras);
+}
+
 function isPlacementFeasible(
   element: InteractiveElement,
   labelText: string,
diff --git a/extension/vite.config.ts b/extension/vite.config.ts
index bf660fc..3ebf0bd 100644
--- a/extension/vite.config.ts
+++ b/extension/vite.config.ts
@@ -122,16 +122,18 @@ const devReloadPlugin = () => {
         return;
       }
 
-      // Otherwise wait for the extension to connect (up to 10s)
+      // Otherwise wait for the extension to connect (up to 40s — covers a
+      // full chrome.alarms keepalive cycle when the MV3 service worker has
+      // been terminated by Chrome).
       console.log(
         '🔄 [DevReload] Build complete — waiting for extension to connect...',
       );
       const timeout = setTimeout(() => {
         console.warn(
-          '🔄 [DevReload] No extension connected within 10s. Reload the extension manually once, then future `npm run dev` runs will auto-reload.',
+          '🔄 [DevReload] No extension connected within 40s. Reload the extension manually once, then future `npm run dev` runs will auto-reload.',
         );
         process.exit(0);
-      }, 10_000);
+      }, 40_000);
 
       // Check periodically if a client has connected
       const poll = setInterval(() => {
diff --git a/pyproject.toml b/pyproject.toml
index dd933be..69ae578 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -76,5 +76,5 @@ override-dependencies = [
 ]
 
 [tool.uv.sources]
-openhands-sdk = { git = "https://github.com/softpudding/agent-sdk.git", subdirectory = "openhands-sdk", rev = "764fb87256d7bc20b3eccf82c8a4d241e6740d63" }
-openhands-tools = { git = "https://github.com/softpudding/agent-sdk.git", subdirectory = "openhands-tools", rev = "764fb87256d7bc20b3eccf82c8a4d241e6740d63" }
+openhands-sdk = { git = "https://github.com/softpudding/agent-sdk.git", subdirectory = "openhands-sdk", rev = "bd4cb296355c3d03dd411883e78527b1915fa8c4" }
+openhands-tools = { git = "https://github.com/softpudding/agent-sdk.git", subdirectory = "openhands-tools", rev = "bd4cb296355c3d03dd411883e78527b1915fa8c4" }
diff --git a/server/agent/context_image_window.py b/server/agent/context_image_window.py
index c2da913..39f09cc 100644
--- a/server/agent/context_image_window.py
+++ b/server/agent/context_image_window.py
@@ -12,9 +12,16 @@
 DEFAULT_CONTEXT_IMAGE_WINDOW = 3
 
 
-def get_context_image_window() -> int | None:
+ROUTINE_REPLAY_CONTEXT_IMAGE_WINDOW = 1
+
+
+def get_context_image_window(routine_replay: bool = False) -> int | None:
     """Return the tool-image window passed to the SDK Agent.
 
+    Routine-replay conversations use a fixed window of 1: the SOP already
+    spells out each step, so a single most-recent screenshot is enough to
+    ground the next action and three-frame history would only pad context.
+
     The default is to keep only the latest screenshot-bearing tool message.
     Environment variable semantics:
     - `-1`: disable SDK filtering entirely (`None`)
@@ -22,6 +29,9 @@ def get_context_image_window() -> int | None:
     - `N >= 1`: keep the latest N screenshot-bearing tool messages
     """
 
+    if routine_replay:
+        return ROUTINE_REPLAY_CONTEXT_IMAGE_WINDOW
+
     raw_value = os.getenv(ENV_CONTEXT_IMAGE_WINDOW)
     if raw_value is None or raw_value.strip() == "":
         return DEFAULT_CONTEXT_IMAGE_WINDOW
diff --git a/server/agent/manager.py b/server/agent/manager.py
index cef99c0..7e87026 100644
--- a/server/agent/manager.py
+++ b/server/agent/manager.py
@@ -329,7 +329,9 @@ def _create_conversation_in_process(
         agent_context = self._build_agent_context()
         llm_instance = self._create_llm_from_config(model, base_url, model_alias)
         tools = self._get_tools_for_model(model, model_alias)
-        tool_image_window = get_context_image_window()
+        tool_image_window = get_context_image_window(
+            routine_replay=self._is_routine_replay_mode(mode)
+        )
         condenser_llm = llm_instance.model_copy(update={"usage_id": "condenser"})
         agent = Agent(
             llm=llm_instance,
@@ -576,7 +578,9 @@ def get_or_create_conversation(
         agent_context = self._build_agent_context()
         llm_instance = self._create_llm_from_config(model, base_url, model_alias)
         tools = self._get_tools_for_model(model, model_alias)
-        tool_image_window = get_context_image_window()
+        tool_image_window = get_context_image_window(
+            routine_replay=self._is_routine_replay_mode(mode)
+        )
         condenser_llm = llm_instance.model_copy(update={"usage_id": "condenser"})
         agent = Agent(
             llm=llm_instance,
diff --git a/server/agent/tools/browser_executor.py b/server/agent/tools/browser_executor.py
index 26b3f35..81feb97 100644
--- a/server/agent/tools/browser_executor.py
+++ b/server/agent/tools/browser_executor.py
@@ -105,6 +105,11 @@ def __init__(self):
         self.conversation_id = None
         # Pending confirmations per conversation for 2PC actions.
         self.pending_confirmations: Dict[str, Dict[str, Any]] = {}
+        # Most recent highlight result per conversation. Keyed by conversation_id,
+        # value is the list of element dicts returned by the last highlight call.
+        # Used in routine-replay mode to auto-confirm clicks/selects/keyboard_input
+        # when the target was just uniquely highlighted.
+        self.last_highlight_elements: Dict[str, List[Dict[str, Any]]] = {}
 
     def _uses_small_model(self) -> bool:
         """Whether the active conversation uses the small-model profile."""
@@ -132,6 +137,38 @@ def _uses_small_model(self) -> bool:
 
         return is_small_model(model_name)
 
+    def _is_routine_replay_mode(self) -> bool:
+        """Whether the active conversation is running in routine-replay mode."""
+        if not self.conversation_id:
+            return False
+
+        session = session_manager.get_session(str(self.conversation_id))
+        if session is None:
+            return False
+
+        return session.metadata.get("mode") == "routine_replay"
+
+    def _auto_confirm_target_id(self, requested_element_id: str) -> str | None:
+        """Return the resolved element id if auto-confirm applies, else None.
+
+        In routine-replay mode, when the most recent highlight call in this
+        conversation returned exactly one element whose id matches the one the
+        agent is now targeting, we can skip the two-phase confirmation round
+        trip: the routine SOP's precise keywords already disambiguated the
+        target, so a confirmation prompt adds latency without adding safety.
+        """
+        if not self._is_routine_replay_mode():
+            return None
+        if not self.conversation_id or not requested_element_id:
+            return None
+        recent = self.last_highlight_elements.get(self.conversation_id)
+        if not recent or len(recent) != 1:
+            return None
+        only_id = recent[0].get("id")
+        if not only_id or only_id != requested_element_id:
+            return None
+        return only_id
+
     def __call__(
         self, action: OpenBrowserAction, conversation
     ) -> OpenBrowserObservation:
@@ -333,6 +370,8 @@ def _execute_highlight_action(
         # Extract elements and pagination info
         elements = result_dict.get("data", {}).get("elements", [])
         total_elements = result_dict.get("data", {}).get("totalElements", 0)
+        if self.conversation_id:
+            self.last_highlight_elements[self.conversation_id] = list(elements)
         element_label = self._format_highlight_element_label(
             element_type=element_type, count=len(elements)
         )
@@ -366,6 +405,22 @@ def _execute_element_interaction_action(
         if action_type == "click":
             if not action.element_id:
                 raise ValueError("click requires element_id parameter")
+            auto_id = self._auto_confirm_target_id(action.element_id)
+            if auto_id:
+                command = ClickElementCommand(
+                    element_id=auto_id,
+                    conversation_id=self.conversation_id,
+                    tab_id=action.tab_id,
+                )
+                result_dict = self._execute_command_sync(command)
+                if not result_dict or not result_dict.get("success"):
+                    ext_error = self._extract_result_error(result_dict)
+                    raise RuntimeError(f"Failed to click element: {ext_error}")
+                return self._build_observation_from_result(
+                    result_dict,
+                    f"Auto-confirmed and clicked element: {auto_id}",
+                    element_id=auto_id,
+                )
             element_preview = self._get_element_full_html(action.element_id, "click")
             full_html = element_preview[0]
             screenshot = element_preview[1]
@@ -572,6 +627,23 @@ def _execute_element_interaction_action(
                 raise ValueError("keyboard_input requires element_id parameter")
             if not action.text:
                 raise ValueError("keyboard_input requires text parameter")
+            auto_id = self._auto_confirm_target_id(action.element_id)
+            if auto_id:
+                command = KeyboardInputCommand(
+                    element_id=auto_id,
+                    text=action.text,
+                    conversation_id=self.conversation_id,
+                    tab_id=action.tab_id,
+                )
+                result_dict = self._execute_command_sync(command)
+                if not result_dict or not result_dict.get("success"):
+                    ext_error = self._extract_result_error(result_dict)
+                    raise RuntimeError(f"Failed to input text: {ext_error}")
+                return self._build_observation_from_result(
+                    result_dict,
+                    f"Auto-confirmed and input text to element: {auto_id}",
+                    element_id=auto_id,
+                )
             element_preview = self._get_element_full_html(
                 action.element_id, "keyboard_input"
             )
@@ -622,6 +694,25 @@ def _execute_element_interaction_action(
                 raise ValueError("select requires element_id parameter")
             if action.value is None:
                 raise ValueError("select requires value parameter")
+            auto_id = self._auto_confirm_target_id(action.element_id)
+            if auto_id:
+                command = SelectElementCommand(
+                    element_id=auto_id,
+                    value=action.value,
+                    conversation_id=self.conversation_id,
+                    tab_id=action.tab_id,
+                )
+                result_dict = self._execute_command_sync(command)
+                if not result_dict or not result_dict.get("success"):
+                    ext_error = self._extract_result_error(result_dict)
+                    raise RuntimeError(f"Failed to select option: {ext_error}")
+                value_preview = self._format_select_value_preview(action.value)
+                return self._build_observation_from_result(
+                    result_dict,
+                    f"Auto-confirmed and selected option {value_preview} in element: "
+                    f"{auto_id}",
+                    element_id=auto_id,
+                )
             element_preview = self._get_element_full_html(action.element_id, "select")
             full_html = element_preview[0]
             screenshot = element_preview[1]
diff --git a/skill/claude/ob-routines/SKILL.md b/skill/claude/ob-routines/SKILL.md
new file mode 100644
index 0000000..589bd0e
--- /dev/null
+++ b/skill/claude/ob-routines/SKILL.md
@@ -0,0 +1,250 @@
+---
+name: ob-routines
+description: Record, compile, and replay Browser Routines — saved, named browser workflows. (Alias for openbrowser-routines.) Supports subcommands: "list [query]" to list/search routines, "new" to record a new routine, "execute <name>" to replay a saved routine. Use when the user says "list routines", "record a routine", "replay X", "execute X", or "/ob-routines <subcommand>".
+---
+
+# Browser Routines
+
+Browser Routines are named, compiled workflows captured from real Chrome sessions.
+The pipeline has four stages: **record → compile → name → replay**.
+
+## Subcommand dispatch
+
+When invoked with arguments, act immediately — do not ask the user what they want:
+
+| Invocation | Action |
+|---|---|
+| `/ob-routines` | Show available routines and ask what to do |
+| `/ob-routines list [query]` | Run `list_routines.py [query]` and display results |
+| `/ob-routines new` | Ask **only** for the one-line goal/intention, then start recording immediately (see "Before recording" below) |
+| `/ob-routines execute <name>` | Run `replay.py <name>` immediately |
+
+---
+
+## Your role during compilation
+
+You are a **bridge and quality gate**, not the compiler. The Compiler Agent does
+the reasoning; you ensure it did its job correctly before finalizing.
+
+### Bridge duties
+1. Run `compile.py` in a tmux pane (mandatory — see below).
+2. Watch for `[compiler:question]` — relay it to the user, send their answer back.
+3. Watch for `[compiler:stalled]` — show the agent's message, optionally prompt a follow-up.
+4. At `[compiler:name_prompt]` — help the user pick a short slug.
+
+### Quality gate (run before every finalize)
+
+After the compiler reports `status=review`, read the compiled routine markdown
+and check **both** of the following before calling `/compile/finalize`:
+
+#### Gate 1 — Intent clarity
+Did the compiler understand *why* the user performed each action, not just *what*
+they clicked? Red flags:
+- Steps that say "click X" with no explanation of goal or condition
+- A position-based selection from a sorted/filtered list without asking whether
+  to replay by position or by identity (e.g. "upvote the top 3 posts" — top 3
+  today vs. the same 3 posts always?)
+- A value (date, search query, ticker, ID) that will obviously change between
+  runs, not parameterized
+
+If any red flag is present and the compiler did NOT ask about it: relay the
+ambiguity to the user yourself, get their answer, then send it via
+`POST /recordings/{id}/compile/answer` so the compiler can revise.
+
+#### Gate 2 — Delivery goal for read-only workflows
+
+A workflow is **read-only** if it has no form submission, no purchase, no
+send/post/create/delete action — the user only navigated, read, filtered, or
+inspected. For read-only workflows, ask: does the compiled routine end with a
+delivery step (a `file_editor` write, a `terminal` command, or an explicit
+instruction to report results in chat)?
+
+**If the routine is read-only AND has no delivery step, the compiler made an
+error.** Do not finalize. Instead:
+
+1. Tell the user: "This routine reads data but doesn't capture results anywhere.
+   How do you want results delivered on replay?"
+   - (a) Summary shown in chat (brief / structured table / full details?)
+   - (b) Written to a local file (path + format: plain text, Markdown, CSV, JSON?)
+   - (c) Both
+2. Get their answer.
+3. Send it to the compiler via `POST /recordings/{id}/compile/answer` — the
+   compiler will revise the routine to include the delivery step.
+4. Wait for the next `status=review`, then re-run both gates.
+
+> **Why this matters:** A routine that just clicks through pages is useless on
+> replay — OpenBrowser will navigate and stop with no output. The delivery step
+> is what makes the routine meaningful.
+
+---
+
+## Preconditions
+
+**First time?** Complete the full setup in `skill/claude/open-browser/references/setup.md`
+before using this skill. That guide covers: loading the Chrome extension, connecting
+it to the server, and obtaining a valid `OPENBROWSER_CHROME_UUID`. Without that,
+recording and replay will fail immediately.
+
+For subsequent uses, confirm:
+- OpenBrowser server at `http://127.0.0.1:8765`
+- Chrome extension connected
+- `OPENBROWSER_CHROME_UUID` set (or passed via `--chrome-uuid`)
+
+Quick check:
+```bash
+python3 skill/claude/open-browser/scripts/check_status.py --chrome-uuid "$OPENBROWSER_CHROME_UUID"
+```
+
+Start the server if needed:
+```bash
+cd /Users/yangxiao/git/OpenBrowser && uv run local-chrome-server serve
+```
+
+Scripts path: `skill/claude/ob-routines/scripts/` (run from repo root).
+
+---
+
+## List & search routines
+
+```bash
+python3 skill/claude/ob-routines/scripts/list_routines.py
+python3 skill/claude/ob-routines/scripts/list_routines.py "login"
+python3 skill/claude/ob-routines/scripts/list_routines.py --recordings
+```
+
+---
+
+## Record a routine
+
+### Before recording — DO NOT interrogate the user
+
+The whole point of record → compile is that the browser actions are **observed**,
+and the Compiler Agent asks clarifying questions *after* it has seen them.
+
+Ask the user **only** for a short goal/intention (one line). Do **NOT** ask:
+- which site or URL to start from
+- which tool/screener to use
+- how to define filter terms ("what's high-value?", "what's significant?")
+- which parameters should vary between runs
+
+All of that is the compiler's job during Gate 1. Pre-record interrogation
+defeats the pipeline and wastes the user's time. If the user's goal is vague
+("find good stocks"), that's fine — start recording. The compiler will ask.
+
+### Step 1 — start recording
+```bash
+python3 skill/claude/ob-routines/scripts/start_recording.py \
+  --chrome-uuid "$OPENBROWSER_CHROME_UUID" \
+  --name "xiaohongshu-messages" \
+  --intent "check messages on Xiaohongshu"
+```
+
+Prints `[recording:started] <recording_id>`. **Save this ID.**
+
+Tell the user: **"Perform your actions in the browser window, then come back and say done."**
+Do NOT proceed until the user confirms.
+
+### Step 2 — stop recording
+```bash
+python3 skill/claude/ob-routines/scripts/stop_recording.py <recording_id>
+```
+
+---
+
+## Compile to a routine — MANDATORY: tmux interactive session
+
+**compile.py uses `input()` for Q&A and the name prompt. It MUST run in an
+interactive shell. Never invoke it directly via the Bash tool — it will block
+and then be killed, losing the compiler session.**
+
+### Launch in tmux
+```bash
+tmux new-window -n "compile" \
+  "cd /Users/yangxiao/git/OpenBrowser && python3 skill/claude/ob-routines/scripts/compile.py <recording_id>; echo '[compile-done]'"
+```
+
+### Monitor output
+```bash
+tmux capture-pane -t "compile" -p
+```
+
+### Send an answer
+```bash
+tmux send-keys -t "compile" "the answer" Enter
+```
+
+### Markers to watch for
+
+| Marker | Your action |
+|---|---|
+| `[compiler:thought]` / `[compiler:action]` | Relay as progress to user |
+| `[compiler:question] <text>` | Relay to user, wait for answer, send via `tmux send-keys` |
+| `[compiler:stalled] <text>` | Show message, ask user for follow-up |
+| `[compiler:complete] goal=… steps=N` | Compilation reached review state |
+| `[compiler:routine_draft]` | Full routine markdown printed for inspection |
+| `[compiler:gate_check]` | **Run both quality gates here.** Send feedback or press Enter |
+| `[compiler:name_prompt]` | Gates passed — help user pick slug |
+| `[compiler:saved]` | Done — report name and id |
+
+### Quality gate checkpoint
+When `[compiler:gate_check]` appears in the pane, compile.py is explicitly
+paused waiting for your review of `[compiler:routine_draft]`. Run Gate 1 and Gate 2:
+
+- **Gates pass** → send an empty Enter: `tmux send-keys -t main:compile "" Enter`
+- **Gate fails** → send corrective feedback:
+  `tmux send-keys -t main:compile "Please add a delivery step: summarise results in chat as a structured list of tickers with metrics." Enter`
+
+compile.py forwards non-empty input back to the compiler, streams the revision,
+and loops back to another `[compiler:gate_check]`. Only an empty Enter advances
+to `[compiler:name_prompt]`.
+
+**Never send gate feedback at the `[compiler:name_prompt]` stage** — that input
+goes directly to the routine name field, not the compiler.
+
+---
+
+## Replay a routine
+
+```bash
+python3 skill/claude/ob-routines/scripts/replay.py "routine-name" \
+  --chrome-uuid "$OPENBROWSER_CHROME_UUID"
+
+# List without replaying
+python3 skill/claude/ob-routines/scripts/replay.py --list
+```
+
+Name matching: exact → ID → prefix → substring.
+
+---
+
+## Full example workflow
+
+```
+1. /ob-routines new  →  ask user what to record
+2. start_recording  →  [recording:started] abc123
+3. (user records in browser, says "done")
+4. stop_recording abc123  →  [recording:events] 21 events
+5. tmux new-window "compile.py abc123"
+6. monitor pane → relay questions → send answers
+7. [compiler:complete]  →  run Gate 1 + Gate 2
+   Gate 2 fails: routine is read-only, no delivery step
+   → ask user: chat summary, file, or both?
+   → send answer via tmux send-keys
+   → wait for next [compiler:complete]
+8. Gates pass → [compiler:name_prompt] → user picks slug
+9. [compiler:saved] name='…' id=…
+10. /ob-routines execute <name>  →  streams [action] … [complete]
+```
+
+---
+
+## Failure handling
+
+- **Server unreachable**: `uv run local-chrome-server serve`
+- **Browser UUID invalid**: reconnect Chrome extension, get fresh UUID
+- **0 events captured**: browser disconnected; re-record
+- **tmux not found**: `brew install tmux`
+- **tmux window conflict**: check `tmux list-windows`, use a unique `-n` name
+- **Compiler session expired** (pane exited before finalize): call
+  `POST /recordings/{id}/compile` again to restart — session is fresh
+- **Relay stuck**: `[observation:error]` lines in SSE stream; relay to user
diff --git a/skill/claude/ob-routines/scripts/compile.py b/skill/claude/ob-routines/scripts/compile.py
new file mode 100644
index 0000000..5f56b6e
--- /dev/null
+++ b/skill/claude/ob-routines/scripts/compile.py
@@ -0,0 +1,397 @@
+#!/usr/bin/env python3
+"""Compile a stopped recording into a named Browser Routine.
+
+Starts the Compiler Agent, streams its SSE output, and acts as a bridge
+between the agent and the user:
+  - Agent reasoning and tool calls are printed to stdout as they arrive.
+  - When the compiler agent asks a clarification question (status=asking),
+    this script prints the question and reads the user's answer from stdin,
+    then resumes compilation via /compile/answer.
+  - When the agent stalls (status=stalled), the agent's last message is
+    shown and the user can send a follow-up.
+  - When compilation completes (status=review), the script prompts the user
+    to name the routine, then calls /compile/finalize to save it.
+
+The outer agent (Claude Code / Codex) should relay the printed questions to
+the user and feed their responses back via stdin — it should NOT try to
+re-implement compiler logic.
+
+Example:
+  python3 compile.py abc123-recording-id
+  python3 compile.py abc123-recording-id --model-alias fast
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from urllib.error import HTTPError, URLError
+from urllib.request import Request, urlopen
+
+# ---------------------------------------------------------------------------
+# HTTP helpers
+# ---------------------------------------------------------------------------
+
+
+def request_json(
+    url: str,
+    *,
+    method: str = "GET",
+    body: dict | None = None,
+    timeout: int = 15,
+) -> dict:
+    headers = {"Content-Type": "application/json", "Accept": "application/json"}
+    data = None if body is None else json.dumps(body).encode("utf-8")
+    req = Request(url, data=data, headers=headers, method=method)
+    with urlopen(req, timeout=timeout) as r:
+        return json.loads(r.read().decode("utf-8"))
+
+
+# ---------------------------------------------------------------------------
+# SSE event formatting  (same conventions as send_task.py)
+# ---------------------------------------------------------------------------
+
+
+def _format_compiler_event(event_type: str, data: dict) -> None:
+    """Print one SSE event from the compiler agent stream."""
+    if event_type == "error":
+        print(f"[compiler:error] {data.get('error', data)}", flush=True)
+        return
+
+    if event_type != "agent_event":
+        # Pass-through for unknown top-level event types
+        print(f"[{event_type}] {json.dumps(data, ensure_ascii=False)}", flush=True)
+        return
+
+    data_type = data.get("type", "unknown")
+
+    if data_type == "SystemPromptEvent":
+        text_len = len(data.get("text", ""))
+        print(
+            f"[compiler:system_prompt] suppressed ({text_len} chars)",
+            flush=True,
+        )
+        return
+
+    if data_type == "ThoughtEvent":
+        thought = data.get("thought", data.get("content", ""))
+        print(f"[compiler:thought] {thought}", flush=True)
+        return
+
+    if data_type == "ActionEvent":
+        action = data.get("action", {})
+        if isinstance(action, dict):
+            action_name = action.get("action", "unknown")
+            if action_name == "ask_user":
+                question = action.get("question", "")
+                print(f"[compiler:ask_user] {question}", flush=True)
+            else:
+                # FileEditorTool, TraceViewerTool, SubmitWorkflowTool, etc.
+                extras = {
+                    k: v for k, v in action.items() if k != "action" and v is not None
+                }
+                suffix = (
+                    (" " + json.dumps(extras, ensure_ascii=False)) if extras else ""
+                )
+                print(f"[compiler:action] {action_name}{suffix}", flush=True)
+        else:
+            print(f"[compiler:action] {action}", flush=True)
+        return
+
+    if data_type == "ObservationEvent":
+        success = data.get("success", False)
+        message = data.get("message", "")
+        state = "ok" if success else "error"
+        print(f"[compiler:observation:{state}] {message}", flush=True)
+        return
+
+    if data_type == "MessageEvent":
+        role = data.get("role", "unknown")
+        text = data.get("text", "")
+        print(f"[compiler:message:{role}] {text}", flush=True)
+        return
+
+    if data_type == "ErrorEvent":
+        print(f"[compiler:error] {data.get('error', 'unknown error')}", flush=True)
+        return
+
+    print(
+        f"[compiler:agent_event:{data_type}] {json.dumps(data, ensure_ascii=False)}",
+        flush=True,
+    )
+
+
+# ---------------------------------------------------------------------------
+# SSE streaming
+# ---------------------------------------------------------------------------
+
+
+def _stream_sse(url: str, body: dict) -> dict | None:
+    """POST to url with body, stream SSE events, return the final complete result.
+
+    Returns the ``result`` dict from the complete event, or None on error.
+    """
+    req = Request(
+        url,
+        data=json.dumps(body).encode("utf-8"),
+        headers={
+            "Content-Type": "application/json",
+            "Accept": "text/event-stream",
+        },
+        method="POST",
+    )
+
+    complete_result: dict | None = None
+    sse_event: str | None = None
+    sse_data: str | None = None
+
+    try:
+        with urlopen(req, timeout=None) as response:
+            for raw_line in response:
+                line = raw_line.decode("utf-8").rstrip("\n")
+                if not line:
+                    if sse_event and sse_data is not None:
+                        try:
+                            parsed = json.loads(sse_data)
+                        except json.JSONDecodeError:
+                            parsed = {"raw": sse_data}
+
+                        if sse_event == "complete":
+                            complete_result = parsed.get("result", parsed)
+                        else:
+                            _format_compiler_event(sse_event, parsed)
+
+                    sse_event = None
+                    sse_data = None
+                    continue
+
+                if line.startswith("event:"):
+                    sse_event = line[6:].strip()
+                elif line.startswith("data:"):
+                    sse_data = line[5:].lstrip()
+
+    except HTTPError as exc:
+        body_text = exc.read().decode("utf-8", errors="replace")
+        print(
+            f"[compiler:http_error] {exc.code} {exc.reason}: {body_text}",
+            file=sys.stderr,
+        )
+        return None
+
+    return complete_result
+
+
+# ---------------------------------------------------------------------------
+# Compile loop
+# ---------------------------------------------------------------------------
+
+
+def compile_recording(base_url: str, recording_id: str, model_alias: str | None) -> int:
+    """Run the compile → Q&A → finalize flow. Returns exit code."""
+    print(f"[compiler:start] recording={recording_id}", flush=True)
+
+    # ── Phase 1: initial compile ──────────────────────────────────────────
+    compile_body: dict = {}
+    if model_alias:
+        compile_body["model_alias"] = model_alias
+
+    result = _stream_sse(
+        f"{base_url}/recordings/{recording_id}/compile",
+        body=compile_body,
+    )
+    if result is None:
+        return 1
+
+    # ── Phase 2: Q&A loop ─────────────────────────────────────────────────
+    while True:
+        status = result.get("status")
+
+        if status == "asking":
+            question = result.get("question", "")
+            print(f"\n[compiler:question] {question}", flush=True)
+            print(
+                "[compiler:waiting_for_answer] Type your answer and press Enter:",
+                flush=True,
+            )
+            try:
+                answer = input().strip()
+            except (EOFError, KeyboardInterrupt):
+                print("\n[compiler:interrupted] Compilation cancelled.", flush=True)
+                return 130
+
+            result = _stream_sse(
+                f"{base_url}/recordings/{recording_id}/compile/answer",
+                body={"answer": answer},
+            )
+            if result is None:
+                return 1
+
+        elif status == "stalled":
+            # Agent replied in prose instead of calling ask_user.
+            # Show the message and let the user send a follow-up.
+            message = result.get("message", "")
+            if message:
+                print(f"\n[compiler:stalled] {message}", flush=True)
+            print(
+                "[compiler:waiting_for_follow_up] Agent stalled — send a follow-up "
+                "(or press Enter to continue without one):",
+                flush=True,
+            )
+            try:
+                follow_up = input().strip()
+            except (EOFError, KeyboardInterrupt):
+                print("\n[compiler:interrupted] Compilation cancelled.", flush=True)
+                return 130
+
+            if not follow_up:
+                follow_up = "Please continue."
+
+            result = _stream_sse(
+                f"{base_url}/recordings/{recording_id}/compile/answer",
+                body={"answer": follow_up},
+            )
+            if result is None:
+                return 1
+
+        elif status == "review":
+            # Compilation done — show the draft and pause for quality gate
+            # before proceeding to the name prompt. The outer agent (Claude
+            # Code / Codex) reads the routine here and may send corrective
+            # feedback (e.g. missing delivery step) via the gate prompt.
+            # Only an empty Enter moves forward to naming.
+            goal = result.get("goal", "")
+            step_count = result.get("step_count", "?")
+            routine_markdown = result.get("routine_markdown", "")
+            print(
+                f"\n[compiler:complete] goal={goal!r}  steps={step_count}", flush=True
+            )
+            if routine_markdown:
+                print(f"[compiler:routine_draft]\n{routine_markdown}", flush=True)
+            print(
+                "\n[compiler:gate_check] Review the routine above.\n"
+                "Press Enter to proceed to naming, or type feedback to send back to the compiler:",
+                flush=True,
+            )
+            try:
+                gate_input = input().strip()
+            except (EOFError, KeyboardInterrupt):
+                print("\n[compiler:interrupted] Compilation cancelled.", flush=True)
+                return 130
+
+            if gate_input:
+                # Outer agent has feedback — send it back to the compiler
+                result = _stream_sse(
+                    f"{base_url}/recordings/{recording_id}/compile/answer",
+                    body={"answer": gate_input},
+                )
+                if result is None:
+                    return 1
+                # Loop back to handle the next status
+                continue
+
+            # Gate passed — proceed to naming
+            break
+
+        else:
+            print(
+                f"[compiler:unexpected_status] {status} — result: {result}",
+                file=sys.stderr,
+            )
+            return 1
+
+    # ── Phase 3: name the routine and finalize ────────────────────────────
+    goal = result.get("goal", "")
+    step_count = result.get("step_count", "?")
+
+    # Suggest a slug derived from the goal
+    suggested = _slugify(goal) if goal else "my-routine"
+    print(
+        f"\n[compiler:name_prompt] Suggested name: {suggested!r}\n"
+        f"Accept (press Enter) or type a new name:",
+        flush=True,
+    )
+    try:
+        chosen_name = input().strip()
+    except (EOFError, KeyboardInterrupt):
+        print("\n[compiler:interrupted] Finalization cancelled.", flush=True)
+        return 130
+
+    if not chosen_name:
+        chosen_name = suggested
+
+    # ── Phase 4: finalize ─────────────────────────────────────────────────
+    try:
+        finalize_result = request_json(
+            f"{base_url}/recordings/{recording_id}/compile/finalize",
+            method="POST",
+            body={"name": chosen_name},
+        )
+    except HTTPError as exc:
+        body_text = exc.read().decode("utf-8", errors="replace")
+        print(f"[compiler:finalize_error] {exc.code}: {body_text}", file=sys.stderr)
+        return 1
+    except Exception as exc:
+        print(f"[compiler:finalize_error] {exc}", file=sys.stderr)
+        return 1
+
+    routine = finalize_result.get("routine", {})
+    routine_id = routine.get("routine_id", "?")
+    name = routine.get("name", chosen_name)
+    steps = routine.get("step_count", "?")
+
+    print(f"[compiler:saved] name={name!r}  id={routine_id}  steps={steps}", flush=True)
+    print(
+        f"\nRoutine saved. To replay it, run:\n\n" f"  python3 replay.py {name!r}\n",
+        flush=True,
+    )
+    return 0
+
+
+def _slugify(text: str) -> str:
+    """Turn a goal string into a short, lowercase, hyphenated slug."""
+    import re
+
+    # Lowercase, keep only alnum and spaces, collapse and replace with hyphens
+    slug = re.sub(r"[^\w\s]", "", text.lower())
+    slug = re.sub(r"\s+", "-", slug.strip())
+    # Truncate to 40 chars, trim trailing hyphens
+    slug = slug[:40].rstrip("-")
+    return slug or "routine"
+
+
+# ---------------------------------------------------------------------------
+# Entry point
+# ---------------------------------------------------------------------------
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(
+        description="Compile a stopped recording into a named Browser Routine",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__,
+    )
+    parser.add_argument("recording_id", help="Recording ID from stop_recording.py")
+    parser.add_argument(
+        "--model-alias",
+        help="LLM model alias to use for compilation (uses server default if omitted)",
+    )
+    parser.add_argument(
+        "--url",
+        default="http://127.0.0.1:8765",
+        help="OpenBrowser server URL",
+    )
+    args = parser.parse_args()
+
+    try:
+        return compile_recording(args.url, args.recording_id, args.model_alias)
+    except URLError as exc:
+        print(f"Cannot reach OpenBrowser server: {exc}", file=sys.stderr)
+        return 1
+    except KeyboardInterrupt:
+        print("Interrupted.", file=sys.stderr)
+        return 130
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/skill/claude/ob-routines/scripts/list_routines.py b/skill/claude/ob-routines/scripts/list_routines.py
new file mode 100644
index 0000000..a1ab1e7
--- /dev/null
+++ b/skill/claude/ob-routines/scripts/list_routines.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python3
+"""List saved routines and/or stopped recordings.
+
+Routines are named, compiled browser workflows ready to replay.
+Recordings are raw captured traces that may not yet be compiled.
+
+Examples:
+  python3 list_routines.py                    # list all routines
+  python3 list_routines.py login              # filter by name/goal substring
+  python3 list_routines.py --recordings       # list stopped recordings instead
+  python3 list_routines.py --recordings login # filter recordings by name
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from urllib.error import URLError
+from urllib.request import Request, urlopen
+
+
+def request_json(url: str, *, timeout: int = 10) -> dict:
+    req = Request(url, headers={"Accept": "application/json"})
+    with urlopen(req, timeout=timeout) as r:
+        return json.loads(r.read().decode("utf-8"))
+
+
+def list_routines(base_url: str, query: str | None) -> int:
+    try:
+        data = request_json(f"{base_url}/routines")
+    except URLError as exc:
+        print(f"Cannot reach OpenBrowser server: {exc}", file=sys.stderr)
+        return 1
+
+    items = data.get("routines", [])
+    if query:
+        q = query.lower()
+        items = [
+            r for r in items if q in r["name"].lower() or q in r.get("goal", "").lower()
+        ]
+
+    if not items:
+        suffix = f" matching {query!r}" if query else ""
+        print(f"No routines found{suffix}.")
+        return 0
+
+    print(f"{'NAME':<30}  {'STEPS':>5}  {'GOAL'}")
+    print("-" * 72)
+    for r in items:
+        name = r["name"]
+        steps = r.get("step_count", "?")
+        goal = r.get("goal", "")
+        routine_id = r["routine_id"]
+        print(f"{name:<30}  {steps:>5}  {goal}")
+        print(f"  id={routine_id}")
+    return 0
+
+
+def list_recordings(base_url: str, query: str | None) -> int:
+    try:
+        data = request_json(f"{base_url}/recordings?status=stopped")
+    except URLError as exc:
+        print(f"Cannot reach OpenBrowser server: {exc}", file=sys.stderr)
+        return 1
+
+    items = data.get("recordings", [])
+    if query:
+        q = query.lower()
+        items = [r for r in items if q in (r.get("name") or "").lower()]
+
+    if not items:
+        suffix = f" matching {query!r}" if query else ""
+        print(f"No stopped recordings found{suffix}.")
+        return 0
+
+    print(f"{'NAME':<30}  {'EVENTS':>6}  {'RECORDING ID'}")
+    print("-" * 72)
+    for r in items:
+        name = r.get("name") or "(unnamed)"
+        events = r.get("event_count", "?")
+        recording_id = r["recording_id"]
+        compiled = "(compiled)" if (r.get("metadata") or {}).get("routine_id") else ""
+        print(f"{name:<30}  {events:>6}  {recording_id}  {compiled}")
+    return 0
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(
+        description="List saved routines or stopped recordings",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__,
+    )
+    parser.add_argument(
+        "query",
+        nargs="?",
+        help="Filter by name or goal substring (case-insensitive)",
+    )
+    parser.add_argument(
+        "--recordings",
+        action="store_true",
+        help="List stopped recordings instead of compiled routines",
+    )
+    parser.add_argument(
+        "--url",
+        default="http://127.0.0.1:8765",
+        help="OpenBrowser server URL",
+    )
+    args = parser.parse_args()
+
+    if args.recordings:
+        return list_recordings(args.url, args.query)
+    return list_routines(args.url, args.query)
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/skill/claude/ob-routines/scripts/replay.py b/skill/claude/ob-routines/scripts/replay.py
new file mode 100644
index 0000000..8b61d7b
--- /dev/null
+++ b/skill/claude/ob-routines/scripts/replay.py
@@ -0,0 +1,348 @@
+#!/usr/bin/env python3
+"""Execute a saved Browser Routine in Chrome.
+
+Looks up the routine by name (exact or prefix match, case-insensitive),
+creates an agent conversation in routine_replay mode, sends the routine
+markdown as the task, and streams execution output.
+
+Examples:
+  python3 replay.py "techforum-upvote" --chrome-uuid "$OPENBROWSER_CHROME_UUID"
+  python3 replay.py login               # prefix match
+  python3 replay.py --list              # list all available routines
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import sys
+from urllib.error import URLError
+from urllib.request import Request, urlopen
+
+# ---------------------------------------------------------------------------
+# HTTP helpers
+# ---------------------------------------------------------------------------
+
+
+def request_json(
+    url: str,
+    *,
+    method: str = "GET",
+    body: dict | None = None,
+    timeout: int = 10,
+) -> dict:
+    headers = {"Content-Type": "application/json", "Accept": "application/json"}
+    data = None if body is None else json.dumps(body).encode("utf-8")
+    req = Request(url, data=data, headers=headers, method=method)
+    with urlopen(req, timeout=timeout) as r:
+        return json.loads(r.read().decode("utf-8"))
+
+
+# ---------------------------------------------------------------------------
+# Routine lookup
+# ---------------------------------------------------------------------------
+
+
+def find_routine(base_url: str, query: str) -> dict | None:
+    """Return a single routine matching query by exact name, then prefix, then substring."""
+    data = request_json(f"{base_url}/routines")
+    routines = data.get("routines", [])
+    if not routines:
+        return None
+
+    q = query.lower()
+
+    # 1. Exact name match
+    for r in routines:
+        if r["name"].lower() == q:
+            return r
+
+    # 2. Exact routine_id match
+    for r in routines:
+        if r["routine_id"].lower() == q:
+            return r
+
+    # 3. Prefix match on name
+    prefix = [r for r in routines if r["name"].lower().startswith(q)]
+    if len(prefix) == 1:
+        return prefix[0]
+    if len(prefix) > 1:
+        print("[replay:ambiguous] Multiple routines match that prefix:", flush=True)
+        for r in prefix:
+            print(f"  {r['name']}  (id={r['routine_id']})", flush=True)
+        print("Provide a more specific name or the full routine_id.", flush=True)
+        return None
+
+    # 4. Substring match on name or goal
+    sub = [
+        r for r in routines if q in r["name"].lower() or q in r.get("goal", "").lower()
+    ]
+    if len(sub) == 1:
+        return sub[0]
+    if len(sub) > 1:
+        print("[replay:ambiguous] Multiple routines match that substring:", flush=True)
+        for r in sub:
+            print(f"  {r['name']}  (id={r['routine_id']})", flush=True)
+        print("Provide a more specific name or the full routine_id.", flush=True)
+        return None
+
+    return None
+
+
+# ---------------------------------------------------------------------------
+# SSE streaming  (same conventions as send_task.py)
+# ---------------------------------------------------------------------------
+
+
+def _format_event(event_type: str, data: dict) -> None:
+    if event_type == "complete":
+        print(f"[complete] {data.get('message', '')}", flush=True)
+        return
+
+    if event_type == "usage_metrics":
+        metrics = data.get("metrics", {})
+        model_name = metrics.get("model_name", "unknown")
+        cost = metrics.get("accumulated_cost", 0)
+        token_usage = metrics.get("accumulated_token_usage", {})
+        total_tokens = token_usage.get("total_tokens", 0)
+        if total_tokens == 0:
+            total_tokens = (
+                token_usage.get("prompt_tokens", 0)
+                + token_usage.get("completion_tokens", 0)
+                + token_usage.get("reasoning_tokens", 0)
+            )
+        print(
+            f"[usage] model={model_name} cost_rmb={cost:.6f} tokens={total_tokens}",
+            flush=True,
+        )
+        return
+
+    if event_type != "agent_event":
+        print(f"[{event_type}] {json.dumps(data, ensure_ascii=False)}", flush=True)
+        return
+
+    data_type = data.get("type", "unknown")
+
+    if data_type == "SystemPromptEvent":
+        text_len = len(data.get("text", ""))
+        print(
+            f"[system_prompt] suppressed ({text_len} chars)",
+            flush=True,
+        )
+        return
+
+    if data_type == "MessageEvent":
+        role = data.get("role", "unknown")
+        text = data.get("text", "")
+        print(f"[message:{role}] {text}", flush=True)
+        return
+
+    if data_type == "ThoughtEvent":
+        thought = data.get("thought", data.get("content", ""))
+        print(f"[thought] {thought}", flush=True)
+        return
+
+    if data_type == "ActionEvent":
+        action = data.get("action", {})
+        if isinstance(action, dict):
+            action_name = action.get("action", "unknown")
+            element_id = action.get("element_id")
+            url = action.get("url")
+            text = action.get("text")
+            extras = []
+            if element_id:
+                extras.append(f"element_id={element_id}")
+            if url:
+                extras.append(f"url={url}")
+            if text:
+                extras.append(f"text={text!r}")
+            suffix = (" " + " ".join(extras)) if extras else ""
+            print(f"[action] {action_name}{suffix}", flush=True)
+        else:
+            print(f"[action] {action}", flush=True)
+        return
+
+    if data_type == "ObservationEvent":
+        success = data.get("success", False)
+        message = data.get("message", "")
+        state = "ok" if success else "error"
+        print(f"[observation:{state}] {message}", flush=True)
+        return
+
+    if data_type == "ErrorEvent":
+        print(f"[error] {data.get('error', 'unknown error')}", flush=True)
+        return
+
+    print(
+        f"[agent_event:{data_type}] {json.dumps(data, ensure_ascii=False)}",
+        flush=True,
+    )
+
+
+def stream_replay(
+    base_url: str,
+    conversation_id: str,
+    task: str,
+    cwd: str,
+    chrome_uuid: str,
+) -> None:
+    req = Request(
+        f"{base_url}/agent/conversations/{conversation_id}/messages",
+        data=json.dumps(
+            {
+                "text": task,
+                "cwd": cwd,
+                "browser_id": chrome_uuid,
+            }
+        ).encode("utf-8"),
+        headers={
+            "Content-Type": "application/json",
+            "Accept": "text/event-stream",
+        },
+        method="POST",
+    )
+
+    with urlopen(req, timeout=None) as response:
+        sse_event: str | None = None
+        sse_data: str | None = None
+        for raw_line in response:
+            line = raw_line.decode("utf-8").rstrip("\n")
+            if not line:
+                if sse_event and sse_data is not None:
+                    try:
+                        _format_event(sse_event, json.loads(sse_data))
+                    except json.JSONDecodeError:
+                        print(f"[{sse_event}] {sse_data}", flush=True)
+                sse_event = None
+                sse_data = None
+                continue
+
+            if line.startswith("event:"):
+                sse_event = line[6:].strip()
+            elif line.startswith("data:"):
+                sse_data = line[5:].lstrip()
+
+
+# ---------------------------------------------------------------------------
+# Entry point
+# ---------------------------------------------------------------------------
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(
+        description="Replay a saved Browser Routine in Chrome",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__,
+    )
+    parser.add_argument(
+        "routine",
+        nargs="?",
+        help="Routine name, ID, or prefix to replay",
+    )
+    parser.add_argument(
+        "--chrome-uuid",
+        default=os.environ.get("OPENBROWSER_CHROME_UUID"),
+        help="Browser UUID capability token (or set OPENBROWSER_CHROME_UUID)",
+    )
+    parser.add_argument(
+        "--cwd",
+        default=".",
+        help="Working directory passed to the agent",
+    )
+    parser.add_argument(
+        "--list",
+        action="store_true",
+        help="List available routines and exit",
+    )
+    parser.add_argument(
+        "--url",
+        default="http://127.0.0.1:8765",
+        help="OpenBrowser server URL",
+    )
+    args = parser.parse_args()
+
+    try:
+        if args.list or not args.routine:
+            data = request_json(f"{args.url}/routines")
+            routines = data.get("routines", [])
+            if not routines:
+                print("No routines saved yet.")
+                return 0
+            print(f"{'NAME':<30}  {'STEPS':>5}  GOAL")
+            print("-" * 72)
+            for r in routines:
+                print(
+                    f"{r['name']:<30}  {r.get('step_count', '?'):>5}  {r.get('goal', '')}"
+                )
+            return 0
+
+        if not args.chrome_uuid:
+            print(
+                "Browser UUID is required. Set OPENBROWSER_CHROME_UUID or pass --chrome-uuid.",
+                file=sys.stderr,
+            )
+            return 2
+
+        # ── Find the routine ──────────────────────────────────────────────
+        routine = find_routine(args.url, args.routine)
+        if routine is None:
+            print(
+                f"[replay:not_found] No routine found matching {args.routine!r}. "
+                "Run with --list to see available routines.",
+                file=sys.stderr,
+            )
+            return 1
+
+        name = routine["name"]
+        routine_id = routine["routine_id"]
+        goal = routine.get("goal", "")
+        routine_markdown = routine.get("routine_markdown", "")
+
+        print(f"[replay:routine] {name}  id={routine_id}", flush=True)
+        if goal:
+            print(f"[replay:goal] {goal}", flush=True)
+
+        # ── Validate browser UUID ─────────────────────────────────────────
+        browser_status = request_json(f"{args.url}/browsers/{args.chrome_uuid}/valid")
+        if not browser_status.get("valid", False):
+            msg = browser_status.get("message", "browser UUID is not valid")
+            print(f"Browser UUID validation failed: {msg}", file=sys.stderr)
+            return 1
+
+        # ── Create conversation in routine_replay mode ────────────────────
+        conv_result = request_json(
+            f"{args.url}/agent/conversations",
+            method="POST",
+            body={
+                "cwd": args.cwd,
+                "browser_id": args.chrome_uuid,
+                "mode": "routine_replay",
+            },
+        )
+        conversation_id = conv_result["conversation_id"]
+        print(f"[replay:conversation] {conversation_id}", flush=True)
+
+        # ── Send routine markdown as the task ────────────────────────────
+        stream_replay(
+            args.url,
+            conversation_id,
+            routine_markdown,
+            args.cwd,
+            args.chrome_uuid,
+        )
+        return 0
+
+    except URLError as exc:
+        print(f"Cannot reach OpenBrowser server: {exc}", file=sys.stderr)
+        return 1
+    except KeyboardInterrupt:
+        print("Interrupted.", file=sys.stderr)
+        return 130
+    except Exception as exc:
+        print(f"Replay failed: {exc}", file=sys.stderr)
+        return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/skill/claude/ob-routines/scripts/start_recording.py b/skill/claude/ob-routines/scripts/start_recording.py
new file mode 100644
index 0000000..a34fad2
--- /dev/null
+++ b/skill/claude/ob-routines/scripts/start_recording.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python3
+"""Start a new browser recording session.
+
+The server sends a command to the Chrome extension which opens a dedicated
+recording window. After this script exits, the user performs their actions
+in that browser window. When done, they return to the terminal and run
+stop_recording.py with the printed recording_id.
+
+Example:
+  python3 start_recording.py \\
+    --chrome-uuid "$OPENBROWSER_CHROME_UUID" \\
+    --name "Gmail compose flow" \\
+    --intent "draft a new email to a contact and send it"
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import sys
+from urllib.error import URLError
+from urllib.request import Request, urlopen
+
+
+def request_json(
+    url: str,
+    *,
+    method: str = "GET",
+    body: dict | None = None,
+    timeout: int = 10,
+) -> dict:
+    headers = {"Content-Type": "application/json", "Accept": "application/json"}
+    data = None if body is None else json.dumps(body).encode("utf-8")
+    req = Request(url, data=data, headers=headers, method=method)
+    with urlopen(req, timeout=timeout) as r:
+        return json.loads(r.read().decode("utf-8"))
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(
+        description="Start a new recording session in Chrome",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__,
+    )
+    parser.add_argument(
+        "--chrome-uuid",
+        default=os.environ.get("OPENBROWSER_CHROME_UUID"),
+        help="Browser UUID capability token (or set OPENBROWSER_CHROME_UUID)",
+    )
+    parser.add_argument(
+        "--name",
+        help="Human-readable name for this recording session",
+    )
+    parser.add_argument(
+        "--intent",
+        help="Short description of what you intend to record (guides compilation later)",
+    )
+    parser.add_argument(
+        "--url",
+        default="http://127.0.0.1:8765",
+        help="OpenBrowser server URL",
+    )
+    args = parser.parse_args()
+
+    if not args.chrome_uuid:
+        print(
+            "Browser UUID is required. Set OPENBROWSER_CHROME_UUID or pass --chrome-uuid.",
+            file=sys.stderr,
+        )
+        return 2
+
+    try:
+        # Validate browser connectivity first
+        browser_status = request_json(f"{args.url}/browsers/{args.chrome_uuid}/valid")
+        if not browser_status.get("valid", False):
+            msg = browser_status.get("message", "browser UUID is not valid")
+            print(f"Browser UUID validation failed: {msg}", file=sys.stderr)
+            return 1
+
+        # Create and start recording
+        payload: dict = {"browser_id": args.chrome_uuid}
+        if args.name:
+            payload["name"] = args.name
+
+        result = request_json(f"{args.url}/recordings", method="POST", body=payload)
+        if not result.get("success"):
+            print(f"Failed to create recording: {result}", file=sys.stderr)
+            return 1
+
+        recording = result["recording"]
+        recording_id = recording["recording_id"]
+
+        # Save intent note if provided
+        if args.intent:
+            request_json(
+                f"{args.url}/recordings/{recording_id}/intent-note",
+                method="POST",
+                body={"intent_note": args.intent},
+            )
+
+    except URLError as exc:
+        print(f"Cannot reach OpenBrowser server: {exc}", file=sys.stderr)
+        return 1
+    except Exception as exc:
+        print(f"Failed to start recording: {exc}", file=sys.stderr)
+        return 1
+
+    name_display = f" ({args.name})" if args.name else ""
+    print(f"[recording:started] {recording_id}{name_display}", flush=True)
+    if args.intent:
+        print(f"[recording:intent] {args.intent}", flush=True)
+    print(
+        "\nA recording window has opened in Chrome.\n"
+        "Perform your actions in the browser, then return here and run:\n\n"
+        f"  python3 stop_recording.py {recording_id}\n",
+        flush=True,
+    )
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/skill/claude/ob-routines/scripts/stop_recording.py b/skill/claude/ob-routines/scripts/stop_recording.py
new file mode 100644
index 0000000..6d91656
--- /dev/null
+++ b/skill/claude/ob-routines/scripts/stop_recording.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python3
+"""Stop an active recording session.
+
+Sends a stop command to the Chrome extension, which closes the recording
+window and flushes the event buffer. Prints the final event count so the
+agent knows how much was captured before kicking off compilation.
+
+Example:
+  python3 stop_recording.py abc123-recording-id
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from urllib.error import URLError
+from urllib.request import Request, urlopen
+
+
+def request_json(
+    url: str,
+    *,
+    method: str = "GET",
+    body: dict | None = None,
+    timeout: int = 15,
+) -> dict:
+    headers = {"Content-Type": "application/json", "Accept": "application/json"}
+    data = None if body is None else json.dumps(body).encode("utf-8")
+    req = Request(url, data=data, headers=headers, method=method)
+    with urlopen(req, timeout=timeout) as r:
+        return json.loads(r.read().decode("utf-8"))
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(
+        description="Stop an active recording session",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__,
+    )
+    parser.add_argument("recording_id", help="Recording ID from start_recording.py")
+    parser.add_argument(
+        "--url",
+        default="http://127.0.0.1:8765",
+        help="OpenBrowser server URL",
+    )
+    args = parser.parse_args()
+
+    try:
+        result = request_json(
+            f"{args.url}/recordings/{args.recording_id}/stop",
+            method="POST",
+            body={},
+        )
+    except URLError as exc:
+        print(f"Cannot reach OpenBrowser server: {exc}", file=sys.stderr)
+        return 1
+    except Exception as exc:
+        print(f"Failed to stop recording: {exc}", file=sys.stderr)
+        return 1
+
+    if not result.get("success"):
+        print(f"Stop failed: {result}", file=sys.stderr)
+        return 1
+
+    recording = result.get("recording") or {}
+    event_count = recording.get("event_count", "?")
+    name = recording.get("name") or ""
+    stop_reason = result.get("stop_reason", "")
+
+    display = f" ({name})" if name else ""
+    print(f"[recording:stopped] {args.recording_id}{display}", flush=True)
+    print(f"[recording:events] {event_count} events captured", flush=True)
+    if stop_reason == "browser_disconnected":
+        print(
+            "[recording:warning] Browser was disconnected — recording marked stopped "
+            "locally. Event capture may be incomplete.",
+            flush=True,
+        )
+
+    print(
+        f"\nRecording stopped. To compile this recording into a routine, run:\n\n"
+        f"  python3 compile.py {args.recording_id}\n",
+        flush=True,
+    )
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/skill/claude/open-browser/SKILL.md b/skill/claude/open-browser/SKILL.md
index 1130574..3b7478c 100644
--- a/skill/claude/open-browser/SKILL.md
+++ b/skill/claude/open-browser/SKILL.md
@@ -37,7 +37,7 @@ Before sending a browser task, confirm all of the following:
 Run this first:
 
 ```bash
-python3 skill/claude/open-browser/scripts/check_status.py --chrome-uuid "$OPENBROWSER_CHROME_UUID"
+python3 ~/.claude/skills/open-browser/scripts/check_status.py --chrome-uuid "$OPENBROWSER_CHROME_UUID"
 ```
 
 If readiness fails, read [references/setup.md](references/setup.md) or
@@ -72,7 +72,7 @@ Code, because the SSE stream becomes part of your conversation context
 without any extra plumbing:
 
 ```bash
-python3 skill/claude/open-browser/scripts/send_task.py \
+python3 ~/.claude/skills/open-browser/scripts/send_task.py \
   "Open https://example.com and report the page title" \
   --chrome-uuid "$OPENBROWSER_CHROME_UUID"
 ```
@@ -115,7 +115,7 @@ encoded, and sent as data URIs — no upload endpoint or static server is
 required. Limit: 10 MB per image, up to 8 images per message.
 
 ```bash
-python3 skill/claude/open-browser/scripts/send_task.py \
+python3 ~/.claude/skills/open-browser/scripts/send_task.py \
   "Open the local dashboard and tell me which section looks different from this screenshot." \
   --image /tmp/reference.png \
   --chrome-uuid "$OPENBROWSER_CHROME_UUID"
@@ -141,7 +141,7 @@ keeps its prior screenshots and observations), reuse the conversation
 ID from the previous run:
 
 ```bash
-python3 skill/claude/open-browser/scripts/send_task.py \
+python3 ~/.claude/skills/open-browser/scripts/send_task.py \
   "Now click the 'Sign in' button you just identified" \
   --chrome-uuid "$OPENBROWSER_CHROME_UUID" \
   --conversation-id 1b32b26a-1a7e-4b6c-9599-139fc6b9c89b
@@ -153,14 +153,16 @@ report a value it already saw.
 
 ## Working Directory
 
-Run commands from the OpenBrowser repo root so the relative script
-paths resolve cleanly.
+The skill's scripts live at `~/.claude/skills/open-browser/` so they
+work from any project's current working directory. The OpenBrowser
+server itself must still be started from the repo root
+(`uv run local-chrome-server serve` in `~/git/OpenBrowser`).
 
 Use `--cwd` when the browser task should operate with context from
 another workspace:
 
 ```bash
-python3 skill/claude/open-browser/scripts/send_task.py \
+python3 ~/.claude/skills/open-browser/scripts/send_task.py \
   "Open the local app and verify the login flow" \
   --cwd /absolute/path/to/project \
   --chrome-uuid "$OPENBROWSER_CHROME_UUID"
diff --git a/skill/claude/open-browser/references/setup.md b/skill/claude/open-browser/references/setup.md
index 5abbc1c..477596e 100644
--- a/skill/claude/open-browser/references/setup.md
+++ b/skill/claude/open-browser/references/setup.md
@@ -45,7 +45,7 @@ drive the browser that registered it.
 ## Quick verification
 
 ```bash
-python3 skill/claude/open-browser/scripts/check_status.py --chrome-uuid "$OPENBROWSER_CHROME_UUID"
+python3 ~/.claude/skills/open-browser/scripts/check_status.py --chrome-uuid "$OPENBROWSER_CHROME_UUID"
 ```
 
 Expected outcome:
diff --git a/skill/claude/open-browser/scripts/check_status.py b/skill/claude/open-browser/scripts/check_status.py
index c218162..8752bf7 100644
--- a/skill/claude/open-browser/scripts/check_status.py
+++ b/skill/claude/open-browser/scripts/check_status.py
@@ -136,7 +136,7 @@ def main() -> int:
         print("Ready for browser automation.")
         return 0
 
-    print("Not ready. See skill/claude/open-browser/references/setup.md if needed.")
+    print("Not ready. See ~/.claude/skills/open-browser/references/setup.md if needed.")
     return 1
 
 
diff --git a/uv.lock b/uv.lock
index 418acbe..36f3fc5 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1678,8 +1678,8 @@ requires-dist = [
     { name = "litellm", git = "https://github.com/softpudding/litellm.git?rev=2eb7db59461e9117b1e3e0519616b39f1497c0f9" },
     { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.7.0" },
     { name = "numpy", specifier = ">=1.24.0" },
-    { name = "openhands-sdk", git = "https://github.com/softpudding/agent-sdk.git?subdirectory=openhands-sdk&rev=764fb87256d7bc20b3eccf82c8a4d241e6740d63" },
-    { name = "openhands-tools", git = "https://github.com/softpudding/agent-sdk.git?subdirectory=openhands-tools&rev=764fb87256d7bc20b3eccf82c8a4d241e6740d63" },
+    { name = "openhands-sdk", git = "https://github.com/softpudding/agent-sdk.git?subdirectory=openhands-sdk&rev=bd4cb296355c3d03dd411883e78527b1915fa8c4" },
+    { name = "openhands-tools", git = "https://github.com/softpudding/agent-sdk.git?subdirectory=openhands-tools&rev=bd4cb296355c3d03dd411883e78527b1915fa8c4" },
     { name = "pillow", specifier = ">=10.0.0" },
     { name = "pre-commit", marker = "extra == 'dev'", specifier = ">=4.0.0" },
     { name = "pydantic", specifier = ">=2.5.0" },
@@ -2224,7 +2224,7 @@ wheels = [
 [[package]]
 name = "openhands-sdk"
 version = "1.12.0"
-source = { git = "https://github.com/softpudding/agent-sdk.git?subdirectory=openhands-sdk&rev=764fb87256d7bc20b3eccf82c8a4d241e6740d63#764fb87256d7bc20b3eccf82c8a4d241e6740d63" }
+source = { git = "https://github.com/softpudding/agent-sdk.git?subdirectory=openhands-sdk&rev=bd4cb296355c3d03dd411883e78527b1915fa8c4#bd4cb296355c3d03dd411883e78527b1915fa8c4" }
 dependencies = [
     { name = "agent-client-protocol" },
     { name = "deprecation" },
@@ -2244,7 +2244,7 @@ dependencies = [
 [[package]]
 name = "openhands-tools"
 version = "1.12.0"
-source = { git = "https://github.com/softpudding/agent-sdk.git?subdirectory=openhands-tools&rev=764fb87256d7bc20b3eccf82c8a4d241e6740d63#764fb87256d7bc20b3eccf82c8a4d241e6740d63" }
+source = { git = "https://github.com/softpudding/agent-sdk.git?subdirectory=openhands-tools&rev=bd4cb296355c3d03dd411883e78527b1915fa8c4#bd4cb296355c3d03dd411883e78527b1915fa8c4" }
 dependencies = [
     { name = "bashlex" },
     { name = "binaryornot" },