-
Notifications
You must be signed in to change notification settings - Fork 7
Optimize hook injection fallback and filtering #232
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -40,6 +40,11 @@ | |
| ENTITY_TOKEN_RE = re.compile(r"[A-Za-z0-9]+(?:-[A-Za-z0-9]+)*|[\u0590-\u05FF]+") | ||
| _ENTITY_CACHE = None | ||
| _ENTITY_CACHE_DB_PATH = None | ||
| LOW_CONFIDENCE_FALLBACK_THRESHOLD = 0.30 | ||
| LOW_CONFIDENCE_FALLBACK_MESSAGE = ( | ||
| "No high-confidence memories found. Use brain_search() for deeper retrieval." | ||
| ) | ||
| _KG_ENTITY_CHUNKS_RELATION_TYPE_CACHE = {} | ||
|
|
||
|
|
||
| def get_session_context(conn, session_id: str, limit: int = 3) -> list[str]: | ||
|
|
@@ -400,11 +405,9 @@ def truncate(text, max_chars=80): | |
| if len(text) <= max_chars: | ||
| return text | ||
| candidate = text[:max_chars] | ||
| search_start = max(0, max_chars - 40) | ||
| for sep in (". ", "! ", "? ", "| "): | ||
| idx = candidate.rfind(sep, search_start) | ||
| if idx > 0: | ||
| return candidate[: idx + len(sep) - 1] + "..." | ||
| sentence_matches = list(re.finditer(r"[.!?](?=(?:\s|$))", candidate)) | ||
| if sentence_matches: | ||
| return candidate[: sentence_matches[-1].end()] + "..." | ||
| return candidate.rsplit(" ", 1)[0] + "..." | ||
|
|
||
|
|
||
|
|
@@ -428,6 +431,22 @@ def _get_connection_cache_key(conn): | |
| return f"conn:{id(conn)}" | ||
|
|
||
|
|
||
| def _kg_entity_chunks_has_relation_type(conn): | ||
| cache_key = _get_connection_cache_key(conn) | ||
| cached = _KG_ENTITY_CHUNKS_RELATION_TYPE_CACHE.get(cache_key) | ||
| if cached is not None: | ||
| return cached | ||
|
|
||
| try: | ||
| columns = {row[1] for row in conn.execute("PRAGMA table_info(kg_entity_chunks)").fetchall()} | ||
| except sqlite3.Error: | ||
| columns = set() | ||
|
|
||
| has_column = "relation_type" in columns | ||
| _KG_ENTITY_CHUNKS_RELATION_TYPE_CACHE[cache_key] = has_column | ||
| return has_column | ||
|
|
||
|
|
||
| def _load_entity_cache(conn=None): | ||
| global _ENTITY_CACHE, _ENTITY_CACHE_DB_PATH | ||
|
|
||
|
|
@@ -621,12 +640,17 @@ def detect_entities_in_prompt(prompt, conn=None): | |
| def get_entity_chunks(entity_id, conn, limit=3): | ||
| """Get top linked chunk summaries for an entity.""" | ||
| try: | ||
| relation_filter = "" | ||
| if _kg_entity_chunks_has_relation_type(conn): | ||
| relation_filter = "AND COALESCE(ec.relation_type, '') != 'co_occurs_with'" | ||
|
|
||
| rows = conn.execute( | ||
| """ | ||
| f""" | ||
| SELECT c.content, c.created_at, c.project | ||
| FROM kg_entity_chunks ec | ||
| JOIN chunks c ON c.id = ec.chunk_id | ||
| WHERE ec.entity_id = ? | ||
| {relation_filter} | ||
| AND COALESCE(c.project, '') != 'eval-sandbox' | ||
| AND COALESCE(c.tags, '') NOT LIKE '%"eval-test"%' | ||
| ORDER BY ec.relevance DESC | ||
|
|
@@ -710,6 +734,24 @@ def select_adaptive_injection_rows(rows, entity_count=0, light_mode=False): | |
| return strategic_reorder(selected[:MAX_ADAPTIVE_INJECTION]) | ||
|
|
||
|
|
||
| def build_low_confidence_fallback(rows): | ||
| if not rows: | ||
| return LOW_CONFIDENCE_FALLBACK_MESSAGE | ||
|
|
||
| top_row = rows[0] | ||
| if isinstance(top_row, dict): | ||
| relevance = top_row.get("relevance") | ||
| if relevance is None: | ||
| relevance = top_row.get("rrf_score") | ||
| else: | ||
| relevance = None | ||
|
|
||
| if relevance is not None and relevance < LOW_CONFIDENCE_FALLBACK_THRESHOLD: | ||
| return LOW_CONFIDENCE_FALLBACK_MESSAGE | ||
|
|
||
| return None | ||
|
|
||
|
Comment on lines
+737
to
+753
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: The If the intent is to use the actual FTS rank or relevance score, you need to compute a meaningful score. If the intent is to always show the fallback when no search results were injected, the threshold check is unnecessary. 🔧 Possible fix if intent is to always emit fallback when no lines def build_low_confidence_fallback(rows):
if not rows:
return LOW_CONFIDENCE_FALLBACK_MESSAGE
-
- top_row = rows[0]
- if isinstance(top_row, dict):
- relevance = top_row.get("relevance")
- if relevance is None:
- relevance = top_row.get("rrf_score")
- else:
- relevance = None
-
- if relevance is not None and relevance < LOW_CONFIDENCE_FALLBACK_THRESHOLD:
- return LOW_CONFIDENCE_FALLBACK_MESSAGE
-
- return None
+ # Always return fallback when called (caller determines when to call)
+ return LOW_CONFIDENCE_FALLBACK_MESSAGEAlternatively, compute actual relevance scores in Also applies to: 1218-1229 🤖 Prompt for AI Agents |
||
|
|
||
| def _ensure_src_on_syspath(): | ||
| src_path = Path(__file__).resolve().parents[1] / "src" | ||
| if src_path.exists(): | ||
|
|
@@ -1014,6 +1056,7 @@ def main(): | |
| new_chunk_ids = [] | ||
| new_briefs = [] | ||
| entities_detected = 0 | ||
| fallback_rows = [] | ||
|
|
||
| def finalize_and_exit(*, mode=None): | ||
| final_mode = mode or telemetry_mode | ||
|
|
@@ -1172,6 +1215,18 @@ def finalize_and_exit(*, mode=None): | |
| if len(filtered_rows) >= base_limit: | ||
| break | ||
|
|
||
| fallback_rows = [ | ||
| { | ||
| "id": chunk_id, | ||
| "content": content, | ||
| "importance": importance, | ||
| "project": project, | ||
| "tags": tags, | ||
| "created_at": created_at, | ||
| "rrf_score": 0.0, | ||
| } | ||
| for chunk_id, content, importance, project, tags, created_at in filtered_rows | ||
| ] | ||
| new_chunk_ids, new_briefs = inject_search_results(lines, filtered_rows, deep, label=label) | ||
| except sqlite3.Error: | ||
| pass | ||
|
|
@@ -1186,6 +1241,11 @@ def finalize_and_exit(*, mode=None): | |
| "Run brain_search() to verify before stating any personal details (hardware, history, names)." | ||
| ) | ||
|
|
||
| if not lines: | ||
| fallback = build_low_confidence_fallback(fallback_rows) | ||
|
Comment on lines
+1244
to
+1245
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
In Useful? React with 👍 / 👎. |
||
| if fallback: | ||
| lines.append(fallback) | ||
|
|
||
|
Comment on lines
+1244
to
+1248
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧹 Nitpick | 🔵 Trivial Logic concern: Fallback only applies to FTS-based paths, not entity lookup. The Consider whether this is the intended behavior or if entity lookup should have its own fallback handling. 🤖 Prompt for AI Agents |
||
| if lines: | ||
| print("\n".join(lines)) | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧹 Nitpick | 🔵 Trivial
🧩 Analysis chain
🏁 Script executed:
Repository: EtanHey/brainlayer
Length of output: 4918
🏁 Script executed:
Repository: EtanHey/brainlayer
Length of output: 4124
🏁 Script executed:
Repository: EtanHey/brainlayer
Length of output: 8273
🏁 Script executed:
Repository: EtanHey/brainlayer
Length of output: 6993
🏁 Script executed:
Repository: EtanHey/brainlayer
Length of output: 1691
🏁 Script executed:
Repository: EtanHey/brainlayer
Length of output: 2170
🏁 Script executed:
Repository: EtanHey/brainlayer
Length of output: 3299
🏁 Script executed:
Repository: EtanHey/brainlayer
Length of output: 2885
Document forward-compatible schema detection or clarify the intended timeline for relation_type in kg_entity_chunks.
The defensive check is sound—the
relation_typecolumn doesn't exist inkg_entity_chunksin production (created atvector_store.py:81-89with onlyentity_id,chunk_id,relevance), andco_occurs_withfiltering happens at thekg_relationslevel, not here. The function gracefully handles this by returning an empty filter string when the column is absent, so no filtering occurs in production.However, the test schema in
test_prompt_classification.py:40-45includesrelation_typeinkg_entity_chunks, suggesting this may be planned. Either document this as intentional forward-compatibility or clarify whether and whenrelation_typewill be added to the production schema.🤖 Prompt for AI Agents