Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion src/brainlayer/search_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,13 @@
_HYBRID_CACHE_TTL = 60.0 # seconds
_HYBRID_CACHE_MAX = 128 # max entries (LRU eviction)
_MMR_CANDIDATE_LIMIT = 50
_MMR_LAMBDA = 0.65
try:
_MMR_LAMBDA = float(os.environ.get("BRAINLAYER_MMR_LAMBDA", "1.0"))
if not math.isfinite(_MMR_LAMBDA):
raise ValueError
_MMR_LAMBDA = max(0.0, min(1.0, _MMR_LAMBDA))
except (TypeError, ValueError):
_MMR_LAMBDA = 1.0
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Comment on lines +35 to +41
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Sanitize MMR lambda env to finite [0,1] before use

BRAINLAYER_MMR_LAMBDA is parsed with float(...), so non-finite values like nan/inf are accepted and do not hit the fallback. With nan, the reranker enters the MMR path (nan >= 1.0 is false) and computes nan MMR scores, so candidate selection no longer reflects relevance/diversity and retrieval ordering can silently degrade for all queries in that process. Validate the parsed value (finite and bounded) and fall back to the default when invalid.

Useful? React with 👍 / 👎.

_FILTERED_KNN_MAX = 2000
META_NOISE_PATTERNS = [
"brain_search(",
Expand Down Expand Up @@ -478,6 +484,8 @@ def _mmr_rerank_scored_results(
"""Diversify the top candidate pool with MMR while preserving overall recall."""
if len(scored) < 2:
return scored
if _MMR_LAMBDA >= 1.0:
return scored

candidate_limit = min(len(scored), _MMR_CANDIDATE_LIMIT)
top_candidates = scored[:candidate_limit]
Expand Down
23 changes: 23 additions & 0 deletions src/brainlayer/vector_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,25 @@ def _set_busy_timeout_hook(conn: apsw.Connection) -> None:
apsw.bestpractice.apply(apsw.bestpractice.recommended)


def _int_env(name: str, default: int) -> int:
try:
return int(os.environ.get(name, str(default)))
except (TypeError, ValueError):
return default


def _read_mmap_bytes() -> int:
return max(_int_env("BRAINLAYER_READ_MMAP_BYTES", 30_000_000_000), 0)


def _read_cache_size_kb() -> int:
return -abs(_int_env("BRAINLAYER_READ_CACHE_KB", 64_000))


def _wal_autocheckpoint_pages() -> int:
return max(_int_env("BRAINLAYER_WAL_AUTOCHECKPOINT", 10_000), 0)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Validate WAL autocheckpoint override is positive

_wal_autocheckpoint_pages() currently clamps negative BRAINLAYER_WAL_AUTOCHECKPOINT values to 0, and SQLite treats wal_autocheckpoint <= 0 as disabling auto-checkpointing. That means a common misconfiguration like -1 silently turns checkpointing off instead of falling back to the safe default, which can rapidly grow the WAL file under write load (a known stability risk in this repo). Treat non-positive values as invalid and fall back to 10000 rather than passing 0 through.

Useful? React with 👍 / 👎.



class VectorStore(SearchMixin, KGMixin, SessionMixin):
"""SQLite-vec based vector store.

Expand Down Expand Up @@ -180,6 +199,7 @@ def _init_db(self) -> None:

# WAL mode is persistent on the DB file — set it every time
cursor.execute("PRAGMA journal_mode = WAL")
cursor.execute(f"PRAGMA wal_autocheckpoint = {_wal_autocheckpoint_pages()}")

# Create tables
cursor.execute("""
Expand Down Expand Up @@ -1177,6 +1197,9 @@ def _get_read_conn(self) -> apsw.Connection:
conn.loadextension(sqlite_vec.loadable_path())
conn.enableloadextension(False)
conn.setbusytimeout(30_000)
cursor = conn.cursor()
cursor.execute(f"PRAGMA mmap_size = {_read_mmap_bytes()}")
cursor.execute(f"PRAGMA cache_size = {_read_cache_size_kb()}")
self._local.read_conn = conn
return conn

Expand Down
8 changes: 6 additions & 2 deletions tests/test_hybrid_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,9 @@ def test_hybrid_search_demotes_chunks_with_quarantine_metadata(self, store):
def test_noise_demoter_does_not_treat_arbitrary_true_metadata_as_quarantine(self):
assert not _contains_precompact_or_quarantined_meta({"feature_enabled": "true"}, "ordinary content")

def test_mmr_rerank_dedupes_near_duplicates(self, store):
def test_mmr_rerank_dedupes_near_duplicates(self, store, monkeypatch):
monkeypatch.setattr("brainlayer.search_repo._MMR_LAMBDA", 0.65)

def embedding(primary: float, secondary: float = 0.0) -> list[float]:
vector = [0.0] * 1024
vector[0] = primary
Expand Down Expand Up @@ -487,7 +489,9 @@ def embedding(primary: float, secondary: float = 0.0) -> list[float]:
assert "distinct-relevant" in ids[:2], ids
assert set(ids[:2]) != {"dup-primary", "dup-secondary"}, ids

def test_mmr_rerank_keeps_nonvector_hits_in_original_score_slots(self, store):
def test_mmr_rerank_keeps_nonvector_hits_in_original_score_slots(self, store, monkeypatch):
monkeypatch.setattr("brainlayer.search_repo._MMR_LAMBDA", 0.65)

def embedding(primary: float, secondary: float = 0.0) -> list[float]:
vector = [0.0] * 1024
vector[0] = primary
Expand Down
78 changes: 78 additions & 0 deletions tests/test_mmr_default_off.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import importlib


def _scored_candidates():
return [
(0.99, "a", "alpha", {}, 0.01),
(0.98, "b", "beta", {}, 0.02),
(0.97, "c", "gamma", {}, 0.03),
]


class _SpyStore:
def __init__(self):
self.embedding_load_calls = 0

def _load_chunk_embeddings(self, chunk_ids):
self.embedding_load_calls += 1
return {}


def test_mmr_default_is_off_and_does_not_load_embeddings(monkeypatch):
monkeypatch.delenv("BRAINLAYER_MMR_LAMBDA", raising=False)
import brainlayer.search_repo as search_repo

search_repo = importlib.reload(search_repo)
store = _SpyStore()
scored = _scored_candidates()

reranked = search_repo.SearchMixin._mmr_rerank_scored_results(store, scored, n_results=2)

assert search_repo._MMR_LAMBDA == 1.0
assert reranked == scored
assert store.embedding_load_calls == 0


def test_mmr_env_override_reenables_embedding_load(monkeypatch):
monkeypatch.setenv("BRAINLAYER_MMR_LAMBDA", "0.65")
import brainlayer.search_repo as search_repo

search_repo = importlib.reload(search_repo)
store = _SpyStore()

search_repo.SearchMixin._mmr_rerank_scored_results(store, _scored_candidates(), n_results=2)

assert search_repo._MMR_LAMBDA == 0.65
assert store.embedding_load_calls == 1


def test_invalid_mmr_env_override_falls_back_to_default_off(monkeypatch):
monkeypatch.setenv("BRAINLAYER_MMR_LAMBDA", "not-a-float")
import brainlayer.search_repo as search_repo

search_repo = importlib.reload(search_repo)

assert search_repo._MMR_LAMBDA == 1.0


def test_nonfinite_mmr_env_override_falls_back_to_default_off(monkeypatch):
monkeypatch.setenv("BRAINLAYER_MMR_LAMBDA", "nan")
import brainlayer.search_repo as search_repo

search_repo = importlib.reload(search_repo)

assert search_repo._MMR_LAMBDA == 1.0


def test_out_of_range_mmr_env_override_is_clamped(monkeypatch):
monkeypatch.setenv("BRAINLAYER_MMR_LAMBDA", "-0.25")
import brainlayer.search_repo as search_repo

search_repo = importlib.reload(search_repo)

assert search_repo._MMR_LAMBDA == 0.0

monkeypatch.setenv("BRAINLAYER_MMR_LAMBDA", "1.25")
search_repo = importlib.reload(search_repo)

assert search_repo._MMR_LAMBDA == 1.0
84 changes: 84 additions & 0 deletions tests/test_vector_store_pragma_tuning.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import pytest

from brainlayer.vector_store import VectorStore


def _pragma_value(conn, name: str) -> int:
row = conn.cursor().execute(f"PRAGMA {name}").fetchone()
return int(row[0])


def test_read_connection_sets_mmap_size(tmp_path, monkeypatch):
monkeypatch.delenv("BRAINLAYER_READ_MMAP_BYTES", raising=False)
store = VectorStore(tmp_path / "pragma-read.db")
store.close()

reader = VectorStore(tmp_path / "pragma-read.db", readonly=True)
try:
mmap_size = _pragma_value(reader._get_read_conn(), "mmap_size")
if mmap_size == 0:
pytest.skip("SQLite build does not support mmap_size")
assert mmap_size > 0
finally:
reader.close()


def test_read_connection_sets_private_cache_size(tmp_path, monkeypatch):
monkeypatch.delenv("BRAINLAYER_READ_CACHE_KB", raising=False)
store = VectorStore(tmp_path / "pragma-cache.db")
store.close()

reader = VectorStore(tmp_path / "pragma-cache.db", readonly=True)
try:
assert _pragma_value(reader._get_read_conn(), "cache_size") == -64000
finally:
reader.close()


def test_writer_init_sets_wal_autocheckpoint(tmp_path, monkeypatch):
monkeypatch.delenv("BRAINLAYER_WAL_AUTOCHECKPOINT", raising=False)
store = VectorStore(tmp_path / "pragma-wal.db")
try:
assert _pragma_value(store.conn, "wal_autocheckpoint") == 10000
finally:
store.close()


def test_pragma_env_overrides_apply_to_new_connections(tmp_path, monkeypatch):
monkeypatch.setenv("BRAINLAYER_WAL_AUTOCHECKPOINT", "2222")
monkeypatch.setenv("BRAINLAYER_READ_MMAP_BYTES", "0")
monkeypatch.setenv("BRAINLAYER_READ_CACHE_KB", "12345")

store = VectorStore(tmp_path / "pragma-overrides.db")
try:
assert _pragma_value(store.conn, "wal_autocheckpoint") == 2222
finally:
store.close()

reader = VectorStore(tmp_path / "pragma-overrides.db", readonly=True)
try:
conn = reader._get_read_conn()
assert _pragma_value(conn, "mmap_size") == 0
assert _pragma_value(conn, "cache_size") == -12345
finally:
reader.close()


def test_invalid_pragma_env_values_fall_back_to_defaults(tmp_path, monkeypatch):
monkeypatch.setenv("BRAINLAYER_WAL_AUTOCHECKPOINT", "not-an-int")
monkeypatch.setenv("BRAINLAYER_READ_MMAP_BYTES", "not-an-int")
monkeypatch.setenv("BRAINLAYER_READ_CACHE_KB", "not-an-int")

store = VectorStore(tmp_path / "pragma-invalid-env.db")
try:
assert _pragma_value(store.conn, "wal_autocheckpoint") == 10000
finally:
store.close()

reader = VectorStore(tmp_path / "pragma-invalid-env.db", readonly=True)
try:
conn = reader._get_read_conn()
assert _pragma_value(conn, "mmap_size") > 0
assert _pragma_value(conn, "cache_size") == -64000
finally:
reader.close()
Loading