EtanHey · EtanHey · Mar 27, 2026 · Mar 26, 2026 · Mar 26, 2026 · Mar 26, 2026
@@ -0,0 +1,88 @@
+#!/usr/bin/env bash
+# backfill_orchestrate.sh — Full pipeline orchestrator for Gemini batch enrichment
+# Handles: resume submitted → fix broken-completed → resubmit failed+unsubmitted → resume again
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+PYTHON="$SCRIPT_DIR/../.venv/bin/python3"
+BACKFILL="$SCRIPT_DIR/cloud_backfill.py"
-PYTHON="$SCRIPT_DIR/../.venv/bin/python3"
-BACKFILL="$SCRIPT_DIR/cloud_backfill.py"
+PYTHON="$SCRIPT_DIR/../.venv/bin/python3"
+if [[ ! -x "$PYTHON" ]]; then
+    echo "ERROR: Python not found at $PYTHON"
+    echo "Ensure virtual environment is set up: python3 -m venv .venv"
+    exit 1
+fi
+BACKFILL="$SCRIPT_DIR/cloud_backfill.py"
-PYTHON="$SCRIPT_DIR/../.venv/bin/python3"
-BACKFILL="$SCRIPT_DIR/cloud_backfill.py"
+PYTHON="$SCRIPT_DIR/../.venv/bin/python3"
+if [[ ! -x "$PYTHON" ]]; then
+    echo "ERROR: Python not found at $PYTHON"
+    echo "Ensure virtual environment is set up: python3 -m venv .venv"
+    exit 1
+fi
+BACKFILL="$SCRIPT_DIR/cloud_backfill.py"
+LOG="/tmp/backfill_orchestrate.log"
+API_KEY="${GOOGLE_API_KEY:-}"
+
+if [[ -z "$API_KEY" ]]; then
+    echo "ERROR: GOOGLE_API_KEY not set"
+    exit 1
+fi
+
+log() { echo "[$(date '+%H:%M:%S')] $*" | tee -a "$LOG"; }
+
+log "=== ORCHESTRATOR START ==="
+cd "$SCRIPT_DIR/.."
+
+# PHASE 1: Resume all submitted batches (may already be running)
+log "Phase 1: Resume submitted batches..."
+GOOGLE_API_KEY="$API_KEY" "$PYTHON" -u "$BACKFILL" --resume 2>&1 | tee -a "$LOG"
+
+# PHASE 2: Fix "completed" batches that had 0 imports (broken download bug)
+# The broken run completed all its batches at 2026-03-14T12:21 UTC.
+# Current (correct) run completes batches after 2026-03-14T12:22 UTC.
+log ""
+log "Phase 2: Checking for incorrectly-completed batches (0 imports due to broken download)..."
+"$PYTHON" -u - <<'PYEOF' 2>&1 | tee -a "$LOG"
+import apsw
+from pathlib import Path
+
+cp_db = Path.home() / '.local/share/brainlayer/enrichment_checkpoints.db'
+conn = apsw.Connection(str(cp_db))
+conn.setbusytimeout(10000)
+
+# Find completed batches from the broken 3rd run (completed at 12:21 UTC)
+# These had 0 imports due to Files.download(name=) bug that has since been fixed
+rows = list(conn.cursor().execute("""
+    SELECT batch_id, chunk_count, completed_at
+    FROM enrichment_checkpoints
+    WHERE status = 'completed'
+    AND completed_at < '2026-03-14T12:22:00'
+"""))
+
+print(f"Found {len(rows)} broken 'completed' batches (before fix at 12:22 UTC)")
+if rows:
+    # Reset them to 'submitted' so --resume will re-process them
+    conn.cursor().execute("""
+        UPDATE enrichment_checkpoints
+        SET status = 'submitted', completed_at = NULL
+        WHERE status = 'completed'
+        AND completed_at < '2026-03-14T12:22:00'
+    """)
+    print(f"Reset {len(rows)} batches to 'submitted'")
+else:
+    print("No broken batches found - all completed batches imported correctly")
+
+conn.close()
+PYEOF
+
+# PHASE 3: Resume the reset batches
+log ""
+log "Phase 3: Resume reset batches..."
+GOOGLE_API_KEY="$API_KEY" "$PYTHON" -u "$BACKFILL" --resume 2>&1 | tee -a "$LOG"
+
+# PHASE 4: Resubmit failed batches + submit unsubmitted JSONL files
+log ""
+log "Phase 4: Resubmit failed/unsubmitted batches..."
+GOOGLE_API_KEY="$API_KEY" "$PYTHON" -u "$BACKFILL" --submit-only 2>&1 | tee -a "$LOG"
+
+log ""
+log "Phase 4 done. Waiting 60s for jobs to register before polling..."
+sleep 60
+
+# PHASE 5: Resume all newly submitted batches
+log ""
+log "Phase 5: Resume newly submitted batches (poll + import)..."
+GOOGLE_API_KEY="$API_KEY" "$PYTHON" -u "$BACKFILL" --resume 2>&1 | tee -a "$LOG"
+
+# Final stats
+log ""
+log "=== FINAL STATUS ==="
+GOOGLE_API_KEY="$API_KEY" "$PYTHON" -u "$BACKFILL" --status 2>&1 | tee -a "$LOG"
+
+log "=== ORCHESTRATOR COMPLETE ==="
@@ -0,0 +1,106 @@
+#!/usr/bin/env python3
+"""Paced batch submission — submits one batch at a time with delays to avoid 429s.
+Bypasses VectorStore entirely to avoid DB lock issues with BrainBar.
+
+Usage: GOOGLE_API_KEY=... python3 scripts/batch_submit_paced.py [--delay 45] [--max-retries 5]
+"""
+import glob, json, os, sys, time
+from pathlib import Path
+
+try:
+    import google.generativeai as genai
+except ImportError:
+    print("pip install google-generativeai"); sys.exit(1)
+
+API_KEY = os.environ.get("GOOGLE_API_KEY")
+if not API_KEY:
+    print("ERROR: GOOGLE_API_KEY required"); sys.exit(1)
+
+genai.configure(api_key=API_KEY)
+
+DELAY = int(sys.argv[sys.argv.index("--delay") + 1]) if "--delay" in sys.argv else 45
+MAX_RETRIES = int(sys.argv[sys.argv.index("--max-retries") + 1]) if "--max-retries" in sys.argv else 10
+MODEL = "gemini-2.5-flash-lite"
+
+# Track what we've already submitted this run
+STATE_FILE = Path(__file__).parent / "backfill_data" / ".paced_state.json"
+submitted = {}
+if STATE_FILE.exists():
+    submitted = json.loads(STATE_FILE.read_text())
+
+# Find all batch JSONL files
+batch_files = sorted(glob.glob(str(Path(__file__).parent / "backfill_data" / "batch_*.jsonl")))
+print(f"Total batch files: {len(batch_files)}")
+print(f"Already submitted (this run): {len(submitted)}")
+print(f"Delay between submissions: {DELAY}s")
+print(f"Max retries per batch: {MAX_RETRIES}")
+print(f"Model: {MODEL}")
+print()
+
+created = 0
+skipped = 0
+failed = 0
+
+for i, fpath in enumerate(batch_files):
+    fname = Path(fpath).name
+    if fname in submitted:
+        skipped += 1
+        continue
+
+    # Count chunks in this file
+    with open(fpath) as f:
+        chunks = sum(1 for _ in f)
+
+    print(f"[{i+1}/{len(batch_files)}] {fname} ({chunks} chunks)")
+
+    # Upload file
+    for attempt in range(MAX_RETRIES):
+        try:
+            print(f"  Uploading...", end="", flush=True)
+            uploaded = genai.upload_file(fpath)
+            print(f" ok ({uploaded.name})")
+            break
+        except Exception as e:
+            wait = min(30 * (attempt + 1), 300)
+            print(f" 429, waiting {wait}s (attempt {attempt+1}/{MAX_RETRIES})")
+            time.sleep(wait)
-        except Exception as e:
-            wait = min(30 * (attempt + 1), 300)
-            print(f" 429, waiting {wait}s (attempt {attempt+1}/{MAX_RETRIES})")
-            time.sleep(wait)
+        except Exception as e:
+            err_str = str(e)
+            if "429" in err_str or "RESOURCE_EXHAUSTED" in err_str:
+                wait = min(30 * (attempt + 1), 300)
+                print(f" rate-limited, waiting {wait}s (attempt {attempt+1}/{MAX_RETRIES})")
+                time.sleep(wait)
+            else:
+                print(f" ERROR: {e}")
+                failed += 1
+                break
-        except Exception as e:
-            wait = min(30 * (attempt + 1), 300)
-            print(f" 429, waiting {wait}s (attempt {attempt+1}/{MAX_RETRIES})")
-            time.sleep(wait)
+        except Exception as e:
+            err_str = str(e)
+            if "429" in err_str or "RESOURCE_EXHAUSTED" in err_str:
+                wait = min(30 * (attempt + 1), 300)
+                print(f" rate-limited, waiting {wait}s (attempt {attempt+1}/{MAX_RETRIES})")
+                time.sleep(wait)
+            else:
+                print(f" ERROR: {e}")
+                failed += 1
+                break
+    else:
+        print(f"  FAILED upload after {MAX_RETRIES} retries, skipping")
+        failed += 1
+        continue
+
+    # Create batch job
+    for attempt in range(MAX_RETRIES):
+        try:
+            print(f"  Creating batch job...", end="", flush=True)
+            job = genai.batches.create(
+                model=f"models/{MODEL}",
+                src=uploaded.uri,
+                config={"display_name": fname},
+            )
+            print(f" ok ({job.name}, {job.state})")
+            submitted[fname] = {"job_name": job.name, "chunks": chunks, "time": time.strftime("%Y-%m-%dT%H:%M:%S")}
+            STATE_FILE.write_text(json.dumps(submitted, indent=2))
-            submitted[fname] = {"job_name": job.name, "chunks": chunks, "time": time.strftime("%Y-%m-%dT%H:%M:%S")}
-            STATE_FILE.write_text(json.dumps(submitted, indent=2))
+import tempfile
+...
+            submitted[fname] = {"job_name": job.name, "chunks": chunks, "time": time.strftime("%Y-%m-%dT%H:%M:%S")}
+            # Atomic write to prevent corruption on crash
+            tmp = STATE_FILE.with_suffix('.tmp')
+            tmp.write_text(json.dumps(submitted, indent=2))
+            tmp.rename(STATE_FILE)
-            submitted[fname] = {"job_name": job.name, "chunks": chunks, "time": time.strftime("%Y-%m-%dT%H:%M:%S")}
-            STATE_FILE.write_text(json.dumps(submitted, indent=2))
+import tempfile
+...
+            submitted[fname] = {"job_name": job.name, "chunks": chunks, "time": time.strftime("%Y-%m-%dT%H:%M:%S")}
+            # Atomic write to prevent corruption on crash
+            tmp = STATE_FILE.with_suffix('.tmp')
+            tmp.write_text(json.dumps(submitted, indent=2))
+            tmp.rename(STATE_FILE)
+            created += 1
+            break
+        except Exception as e:
+            if "429" in str(e) or "RESOURCE_EXHAUSTED" in str(e):
+                wait = min(60 * (attempt + 1), 600)
+                print(f" 429, waiting {wait}s (attempt {attempt+1}/{MAX_RETRIES})")
+                time.sleep(wait)
+            else:
+                print(f" ERROR: {e}")
+                failed += 1
+                break
+    else:
+        print(f"  FAILED create after {MAX_RETRIES} retries, skipping")
+        failed += 1
+        continue
-            else:
-                print(f" ERROR: {e}")
-                failed += 1
-                break
-    else:
-        print(f"  FAILED create after {MAX_RETRIES} retries, skipping")
-        failed += 1
-        continue
+            else:
+                print(f" ERROR: {e}")
+                failed += 1
+                continue
+    else:
+        print(f"  FAILED create after {MAX_RETRIES} retries, skipping")
+        failed += 1
+        continue
-            else:
-                print(f" ERROR: {e}")
-                failed += 1
-                break
-    else:
-        print(f"  FAILED create after {MAX_RETRIES} retries, skipping")
-        failed += 1
-        continue
+            else:
+                print(f" ERROR: {e}")
+                failed += 1
+                continue
+    else:
+        print(f"  FAILED create after {MAX_RETRIES} retries, skipping")
+        failed += 1
+        continue
+
+    # Pace ourselves
+    if i < len(batch_files) - 1:
+        print(f"  Waiting {DELAY}s before next...")
+        time.sleep(DELAY)
+
+print(f"\nDone! Created: {created}, Skipped (already done): {skipped}, Failed: {failed}")
+print(f"State saved to {STATE_FILE}")
@@ -0,0 +1,56 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+    <key>Label</key>
+    <string>com.brainlayer.enrichment</string>
+
+    <!-- Unified enrichment daemon — replaces com.brainlayer.enrich
+         Runs realtime enrichment hourly on recent chunks.
+         For batch/local modes, use the CLI or brain_enrich MCP tool. -->
+
+    <key>ProgramArguments</key>
+    <array>
+        <string>__BRAINLAYER_BIN__</string>
+        <string>enrich</string>
+        <string>--mode</string>
+        <string>realtime</string>
+        <string>--since-hours</string>
+        <string>24</string>
+        <string>--limit</string>
+        <string>50</string>
+    </array>
+
+    <key>StartInterval</key>
+    <integer>3600</integer>
+
+    <key>StandardOutPath</key>
+    <string>__HOME__/.local/share/brainlayer/logs/enrichment.log</string>
+    <key>StandardErrorPath</key>
+    <string>__HOME__/.local/share/brainlayer/logs/enrichment.err</string>
+
+    <key>EnvironmentVariables</key>
+    <dict>
+        <key>PATH</key>
+        <string>/usr/local/bin:/usr/bin:/bin:__HOME__/.local/bin</string>
+        <key>PYTHONUNBUFFERED</key>
+        <string>1</string>
+        <key>BRAINLAYER_STALL_TIMEOUT</key>
+        <string>300</string>
+        <key>GOOGLE_API_KEY</key>
+        <string>__GOOGLE_API_KEY__</string>
+        <!-- Optional: set regional endpoint for lower latency -->
+        <!-- <key>GOOGLE_CLOUD_REGION</key>
+        <string>us-central1</string> -->
+    </dict>
+
+    <key>RunAtLoad</key>
+    <true/>
+
+    <key>Nice</key>
+    <integer>15</integer>
+
+    <key>ProcessType</key>
+    <string>Background</string>
+</dict>
+</plist>
@@ -70,23 +70,31 @@ case "${1:-all}" in
         install_plist index
         ;;
     enrich)
+        # Legacy — install old enrich plist
         install_plist enrich
         ;;
+    enrichment)
+        # New unified enrichment plist (replaces enrich)
+        install_plist enrichment
+        ;;
     checkpoint)
         install_plist wal-checkpoint
         ;;
     all)
         install_plist index
-        install_plist enrich
+        install_plist enrichment
         install_plist wal-checkpoint
+        # Remove old enrich plist if present
+        remove_plist enrich 2>/dev/null || true
         ;;
     remove)
         remove_plist index
-        remove_plist enrich
+        remove_plist enrich 2>/dev/null || true
+        remove_plist enrichment 2>/dev/null || true
         remove_plist wal-checkpoint
         ;;
     *)
-        echo "Usage: $0 [index|enrich|checkpoint|all|remove]"
+        echo "Usage: $0 [index|enrich|enrichment|checkpoint|all|remove]"
         exit 1
         ;;
 esac