Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
311 changes: 311 additions & 0 deletions hooks/brainlayer-prompt-search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,311 @@
#!/usr/bin/env python3
"""
BrainLayer UserPromptSubmit Hook — auto-searches memories relevant to the user's prompt.

Extracts keywords from the prompt, runs FTS5 search against BrainLayer.
Two modes:
- Light (default): top 3 results, ~300 tokens
- Deep (triggered by memory words): top 8 results, ~800 tokens

Output: plain text to stdout (injected as Claude context).
Target: <500ms total.
"""

import json
import os
import re
import sqlite3
import sys
import time

DEADLINE_MS = 450

# Prompts shorter than this are probably greetings/commands — skip search
MIN_PROMPT_LENGTH = 15

# Trigger words that activate deep mode (more results)
DEEP_TRIGGERS = {
"remember",
"last time",
"previous",
"previously",
"before",
"history",
"earlier",
"we discussed",
"we decided",
"we talked",
"recall",
"forgot",
"what was",
"what were",
"when did",
"how did",
"brainlayer",
}

# Common English stop words to skip during keyword extraction
STOP_WORDS = {
"a", "an", "the", "is", "it", "in", "on", "at", "to", "for", "of",
"and", "or", "but", "not", "with", "this", "that", "from", "by",
"are", "was", "were", "be", "been", "being", "have", "has", "had",
"do", "does", "did", "will", "would", "could", "should", "may",
"might", "can", "shall", "must", "need", "let", "me", "my", "i",
"you", "your", "we", "our", "they", "them", "their", "he", "she",
"his", "her", "its", "if", "then", "else", "when", "where", "how",
"what", "which", "who", "why", "so", "just", "also", "very", "too",
"up", "out", "about", "into", "over", "after", "some", "any", "all",
"no", "yes", "ok", "okay", "please", "thanks", "thank", "hey",
"hi", "hello", "sure", "right", "well", "now", "here", "there",
"like", "want", "think", "know", "see", "look", "make", "take",
"get", "go", "come", "use", "try", "help", "tell", "give", "show",
"work", "call", "run", "set", "add", "put", "keep", "find", "read",
"write", "create", "build", "check", "start", "stop", "change",
"move", "open", "close", "new", "old", "good", "bad", "big",
"small", "first", "last", "next", "more", "less", "much", "many",
"each", "every", "other", "same", "different", "own", "still",
"already", "again", "even", "really", "actually", "probably",
"maybe", "file", "code", "thing", "way", "something", "anything",
}

DB_PATHS = [
os.path.expanduser("~/.local/share/zikaron/zikaron.db"),
os.path.expanduser("~/.local/share/brainlayer/brainlayer.db"),
]


def get_db_path():
env = os.environ.get("BRAINLAYER_DB")
if env and os.path.exists(env):
return env
for p in DB_PATHS:
if os.path.exists(p):
return p
return None


def is_deep_mode(prompt_lower):
for trigger in DEEP_TRIGGERS:
if trigger in prompt_lower:
return True
return False


def extract_keywords(prompt):
"""Extract meaningful keywords from the prompt for FTS5 search."""
# Remove URLs, paths, code blocks
text = re.sub(r"https?://\S+", "", prompt)
text = re.sub(r"[/~]\S+", "", text)
text = re.sub(r"`[^`]+`", "", text)

# Extract words (keep hyphens for compound terms like "6pm-mini")
words = re.findall(r"[a-zA-Z0-9][\w-]*", text.lower())

# Filter out stop words and short words
keywords = []
seen = set()
for w in words:
if w not in STOP_WORDS and len(w) > 2 and w not in seen:
keywords.append(w)
seen.add(w)

return keywords[:8] # Cap at 8 keywords for FTS5 performance


def truncate(text, max_chars=200):
# Clean up multi-line content for compact display
text = re.sub(r"\n+", " | ", text.strip())
if len(text) <= max_chars:
return text
return text[:max_chars].rsplit(" ", 1)[0] + "..."


def elapsed_ms(start):
return (time.monotonic() - start) * 1000


def detect_entities_in_prompt(prompt, conn):
"""Detect known KG entity names in the prompt.

Checks bigrams and single capitalized words (3+ chars) against kg_entities.
Returns list of dicts: {id, name, entity_type}.
Fast: exact SQL LOWER() match, no FTS5 overhead.

Only injects context for high-signal entity types (person, company, agent).
Technology/concept entities are too noisy for automatic injection.
"""
# Entity types that warrant automatic context injection
INJECT_TYPES = {"person", "company", "agent"}

def _clean_word(w):
"""Strip trailing punctuation and possessive suffixes ('s, 's)."""
# Remove all non-alphanumeric except hyphen (for compound words)
cleaned = re.sub(r"[^a-zA-Z0-9-]", "", w)
# Strip trailing possessive suffix "s" preceded by nothing (was apostrophe)
if cleaned.endswith("s") and len(cleaned) > 2:
# heuristic: if original had 's or 's before 's, strip the trailing s
if re.search(r"'s?$", w):
cleaned = cleaned[:-1]
return cleaned

words = prompt.split()
cleaned_words = [_clean_word(w) for w in words]
candidates = []

# Bigrams: "Avi Simon", "Fedor Sidorov" etc.
for i in range(len(cleaned_words) - 1):
w1, w2 = cleaned_words[i], cleaned_words[i + 1]
if not w1 or not w2:
continue
# At least one word must start uppercase (entities are proper nouns)
if w1[0].isupper() or w2[0].isupper():
candidates.append(f"{w1} {w2}")

# Single capitalized words (4+ chars to avoid "What", "Tell", etc.)
for w in cleaned_words:
if len(w) >= 4 and w[0].isupper() and not w.isupper():
candidates.append(w)

if not candidates:
return []

matched = []
seen_ids = set()
try:
for candidate in candidates:
rows = conn.execute(
"SELECT id, name, entity_type FROM kg_entities WHERE LOWER(name) = LOWER(?) LIMIT 1",
(candidate,),
).fetchall()
if rows:
eid, name, etype = rows[0]
if eid not in seen_ids and etype in INJECT_TYPES:
seen_ids.add(eid)
matched.append({"id": eid, "name": name, "entity_type": etype})
except sqlite3.Error:
pass

return matched


def get_entity_chunks(entity_id, conn, limit=3):
"""Get top linked chunk summaries for an entity."""
try:
rows = conn.execute(
"""
SELECT c.content, c.created_at, c.project
FROM kg_entity_chunks ec
JOIN chunks c ON c.id = ec.chunk_id
WHERE ec.entity_id = ?
ORDER BY ec.relevance DESC
LIMIT ?
""",
(entity_id, limit),
).fetchall()
return rows
except sqlite3.Error:
return []


def main():
start = time.monotonic()

try:
hook_input = json.loads(sys.stdin.read())
except (json.JSONDecodeError, EOFError):
sys.exit(0)

prompt = hook_input.get("prompt", "")
if not prompt or len(prompt) < MIN_PROMPT_LENGTH:
sys.exit(0)

prompt_lower = prompt.lower()

# Skip if prompt is a slash command
if prompt.strip().startswith("/"):
sys.exit(0)

deep = is_deep_mode(prompt_lower)
keywords = extract_keywords(prompt)

if not keywords:
sys.exit(0)

db_path = get_db_path()
if not db_path:
sys.exit(0)

try:
conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True, timeout=2)
conn.execute("PRAGMA journal_mode=WAL")
conn.execute("PRAGMA query_only=true")
except sqlite3.Error:
sys.exit(0)

limit = 8 if deep else 3

# Build FTS5 query: join keywords with OR for broader matching
fts_query = " OR ".join(f'"{kw}"' for kw in keywords)

lines = []
try:
# Phase A: Entity routing — detect known entity names in prompt
# and inject entity profile before FTS results.
if elapsed_ms(start) < DEADLINE_MS:
entities = detect_entities_in_prompt(prompt, conn)
for entity in entities[:2]: # at most 2 entities per prompt
etype = entity["entity_type"]
ename = entity["name"]
lines.append(f"[Entity: {ename} — {etype}]")
# Get entity-linked chunks for context
entity_chunks = get_entity_chunks(entity["id"], conn, limit=2)
for content, created_at, project in entity_chunks:
date = created_at[:10] if created_at else "?"
proj = f" ({project})" if project else ""
lines.append(f"- [{date}{proj}] {truncate(content, max_chars=150)}")

if elapsed_ms(start) < DEADLINE_MS:
rows = conn.execute(
"""
SELECT c.content, c.importance, c.project, c.tags, c.created_at
FROM chunks_fts f
JOIN chunks c ON c.id = f.chunk_id
WHERE chunks_fts MATCH ?
ORDER BY rank
LIMIT ?
""",
(fts_query, limit),
).fetchall()

if rows:
mode_label = "deep" if deep else "auto"
if lines:
# Entity section already started — add separator
lines.append(f"[BrainLayer {mode_label}] Memories matching your prompt:")
else:
lines.append(f"[BrainLayer {mode_label}] Memories matching your prompt:")
for content, importance, project, tags, created_at in rows:
date = created_at[:10] if created_at else "?"
imp = f" imp:{importance:.0f}" if importance else ""
proj = f" ({project})" if project else ""
lines.append(
f"- [{date}{imp}{proj}] {truncate(content)}"
)

if not deep:
lines.append(
"(Use brain_search for deeper results.)"
)
except sqlite3.Error:
pass
finally:
conn.close()

if lines:
print("\n".join(lines))

sys.exit(0)


if __name__ == "__main__":
main()
Loading