From 348cee945ad64aba555f662051bfe7bd53b405f7 Mon Sep 17 00:00:00 2001 From: Etan Joseph Heyman Date: Thu, 21 May 2026 16:55:31 +0300 Subject: [PATCH 1/2] feat: tune read path pragmas and default MMR off --- src/brainlayer/search_repo.py | 7 +- src/brainlayer/vector_store.py | 23 +++++++ tests/test_hybrid_search.py | 8 ++- tests/test_mmr_default_off.py | 55 ++++++++++++++++ tests/test_vector_store_pragma_tuning.py | 84 ++++++++++++++++++++++++ 5 files changed, 174 insertions(+), 3 deletions(-) create mode 100644 tests/test_mmr_default_off.py create mode 100644 tests/test_vector_store_pragma_tuning.py diff --git a/src/brainlayer/search_repo.py b/src/brainlayer/search_repo.py index db884caa..60f168f6 100644 --- a/src/brainlayer/search_repo.py +++ b/src/brainlayer/search_repo.py @@ -32,7 +32,10 @@ _HYBRID_CACHE_TTL = 60.0 # seconds _HYBRID_CACHE_MAX = 128 # max entries (LRU eviction) _MMR_CANDIDATE_LIMIT = 50 -_MMR_LAMBDA = 0.65 +try: + _MMR_LAMBDA = float(os.environ.get("BRAINLAYER_MMR_LAMBDA", "1.0")) +except (TypeError, ValueError): + _MMR_LAMBDA = 1.0 _FILTERED_KNN_MAX = 2000 META_NOISE_PATTERNS = [ "brain_search(", @@ -478,6 +481,8 @@ def _mmr_rerank_scored_results( """Diversify the top candidate pool with MMR while preserving overall recall.""" if len(scored) < 2: return scored + if _MMR_LAMBDA >= 1.0: + return scored candidate_limit = min(len(scored), _MMR_CANDIDATE_LIMIT) top_candidates = scored[:candidate_limit] diff --git a/src/brainlayer/vector_store.py b/src/brainlayer/vector_store.py index b43f9e4e..c587c3a5 100644 --- a/src/brainlayer/vector_store.py +++ b/src/brainlayer/vector_store.py @@ -71,6 +71,25 @@ def _set_busy_timeout_hook(conn: apsw.Connection) -> None: apsw.bestpractice.apply(apsw.bestpractice.recommended) +def _int_env(name: str, default: int) -> int: + try: + return int(os.environ.get(name, str(default))) + except (TypeError, ValueError): + return default + + +def _read_mmap_bytes() -> int: + return max(_int_env("BRAINLAYER_READ_MMAP_BYTES", 30_000_000_000), 0) + + +def _read_cache_size_kb() -> int: + return -abs(_int_env("BRAINLAYER_READ_CACHE_KB", 64_000)) + + +def _wal_autocheckpoint_pages() -> int: + return max(_int_env("BRAINLAYER_WAL_AUTOCHECKPOINT", 10_000), 0) + + class VectorStore(SearchMixin, KGMixin, SessionMixin): """SQLite-vec based vector store. @@ -180,6 +199,7 @@ def _init_db(self) -> None: # WAL mode is persistent on the DB file — set it every time cursor.execute("PRAGMA journal_mode = WAL") + cursor.execute(f"PRAGMA wal_autocheckpoint = {_wal_autocheckpoint_pages()}") # Create tables cursor.execute(""" @@ -1177,6 +1197,9 @@ def _get_read_conn(self) -> apsw.Connection: conn.loadextension(sqlite_vec.loadable_path()) conn.enableloadextension(False) conn.setbusytimeout(30_000) + cursor = conn.cursor() + cursor.execute(f"PRAGMA mmap_size = {_read_mmap_bytes()}") + cursor.execute(f"PRAGMA cache_size = {_read_cache_size_kb()}") self._local.read_conn = conn return conn diff --git a/tests/test_hybrid_search.py b/tests/test_hybrid_search.py index 365a0beb..46c21dad 100644 --- a/tests/test_hybrid_search.py +++ b/tests/test_hybrid_search.py @@ -431,7 +431,9 @@ def test_hybrid_search_demotes_chunks_with_quarantine_metadata(self, store): def test_noise_demoter_does_not_treat_arbitrary_true_metadata_as_quarantine(self): assert not _contains_precompact_or_quarantined_meta({"feature_enabled": "true"}, "ordinary content") - def test_mmr_rerank_dedupes_near_duplicates(self, store): + def test_mmr_rerank_dedupes_near_duplicates(self, store, monkeypatch): + monkeypatch.setattr("brainlayer.search_repo._MMR_LAMBDA", 0.65) + def embedding(primary: float, secondary: float = 0.0) -> list[float]: vector = [0.0] * 1024 vector[0] = primary @@ -487,7 +489,9 @@ def embedding(primary: float, secondary: float = 0.0) -> list[float]: assert "distinct-relevant" in ids[:2], ids assert set(ids[:2]) != {"dup-primary", "dup-secondary"}, ids - def test_mmr_rerank_keeps_nonvector_hits_in_original_score_slots(self, store): + def test_mmr_rerank_keeps_nonvector_hits_in_original_score_slots(self, store, monkeypatch): + monkeypatch.setattr("brainlayer.search_repo._MMR_LAMBDA", 0.65) + def embedding(primary: float, secondary: float = 0.0) -> list[float]: vector = [0.0] * 1024 vector[0] = primary diff --git a/tests/test_mmr_default_off.py b/tests/test_mmr_default_off.py new file mode 100644 index 00000000..c8a63b6d --- /dev/null +++ b/tests/test_mmr_default_off.py @@ -0,0 +1,55 @@ +import importlib + + +def _scored_candidates(): + return [ + (0.99, "a", "alpha", {}, 0.01), + (0.98, "b", "beta", {}, 0.02), + (0.97, "c", "gamma", {}, 0.03), + ] + + +class _SpyStore: + def __init__(self): + self.embedding_load_calls = 0 + + def _load_chunk_embeddings(self, chunk_ids): + self.embedding_load_calls += 1 + return {} + + +def test_mmr_default_is_off_and_does_not_load_embeddings(monkeypatch): + monkeypatch.delenv("BRAINLAYER_MMR_LAMBDA", raising=False) + import brainlayer.search_repo as search_repo + + search_repo = importlib.reload(search_repo) + store = _SpyStore() + scored = _scored_candidates() + + reranked = search_repo.SearchMixin._mmr_rerank_scored_results(store, scored, n_results=2) + + assert search_repo._MMR_LAMBDA == 1.0 + assert reranked == scored + assert store.embedding_load_calls == 0 + + +def test_mmr_env_override_reenables_embedding_load(monkeypatch): + monkeypatch.setenv("BRAINLAYER_MMR_LAMBDA", "0.65") + import brainlayer.search_repo as search_repo + + search_repo = importlib.reload(search_repo) + store = _SpyStore() + + search_repo.SearchMixin._mmr_rerank_scored_results(store, _scored_candidates(), n_results=2) + + assert search_repo._MMR_LAMBDA == 0.65 + assert store.embedding_load_calls == 1 + + +def test_invalid_mmr_env_override_falls_back_to_default_off(monkeypatch): + monkeypatch.setenv("BRAINLAYER_MMR_LAMBDA", "not-a-float") + import brainlayer.search_repo as search_repo + + search_repo = importlib.reload(search_repo) + + assert search_repo._MMR_LAMBDA == 1.0 diff --git a/tests/test_vector_store_pragma_tuning.py b/tests/test_vector_store_pragma_tuning.py new file mode 100644 index 00000000..4bfdcaad --- /dev/null +++ b/tests/test_vector_store_pragma_tuning.py @@ -0,0 +1,84 @@ +import pytest + +from brainlayer.vector_store import VectorStore + + +def _pragma_value(conn, name: str) -> int: + row = conn.cursor().execute(f"PRAGMA {name}").fetchone() + return int(row[0]) + + +def test_read_connection_sets_mmap_size(tmp_path, monkeypatch): + monkeypatch.delenv("BRAINLAYER_READ_MMAP_BYTES", raising=False) + store = VectorStore(tmp_path / "pragma-read.db") + store.close() + + reader = VectorStore(tmp_path / "pragma-read.db", readonly=True) + try: + mmap_size = _pragma_value(reader._get_read_conn(), "mmap_size") + if mmap_size == 0: + pytest.skip("SQLite build does not support mmap_size") + assert mmap_size > 0 + finally: + reader.close() + + +def test_read_connection_sets_private_cache_size(tmp_path, monkeypatch): + monkeypatch.delenv("BRAINLAYER_READ_CACHE_KB", raising=False) + store = VectorStore(tmp_path / "pragma-cache.db") + store.close() + + reader = VectorStore(tmp_path / "pragma-cache.db", readonly=True) + try: + assert _pragma_value(reader._get_read_conn(), "cache_size") == -64000 + finally: + reader.close() + + +def test_writer_init_sets_wal_autocheckpoint(tmp_path, monkeypatch): + monkeypatch.delenv("BRAINLAYER_WAL_AUTOCHECKPOINT", raising=False) + store = VectorStore(tmp_path / "pragma-wal.db") + try: + assert _pragma_value(store.conn, "wal_autocheckpoint") == 10000 + finally: + store.close() + + +def test_pragma_env_overrides_apply_to_new_connections(tmp_path, monkeypatch): + monkeypatch.setenv("BRAINLAYER_WAL_AUTOCHECKPOINT", "2222") + monkeypatch.setenv("BRAINLAYER_READ_MMAP_BYTES", "0") + monkeypatch.setenv("BRAINLAYER_READ_CACHE_KB", "12345") + + store = VectorStore(tmp_path / "pragma-overrides.db") + try: + assert _pragma_value(store.conn, "wal_autocheckpoint") == 2222 + finally: + store.close() + + reader = VectorStore(tmp_path / "pragma-overrides.db", readonly=True) + try: + conn = reader._get_read_conn() + assert _pragma_value(conn, "mmap_size") == 0 + assert _pragma_value(conn, "cache_size") == -12345 + finally: + reader.close() + + +def test_invalid_pragma_env_values_fall_back_to_defaults(tmp_path, monkeypatch): + monkeypatch.setenv("BRAINLAYER_WAL_AUTOCHECKPOINT", "not-an-int") + monkeypatch.setenv("BRAINLAYER_READ_MMAP_BYTES", "not-an-int") + monkeypatch.setenv("BRAINLAYER_READ_CACHE_KB", "not-an-int") + + store = VectorStore(tmp_path / "pragma-invalid-env.db") + try: + assert _pragma_value(store.conn, "wal_autocheckpoint") == 10000 + finally: + store.close() + + reader = VectorStore(tmp_path / "pragma-invalid-env.db", readonly=True) + try: + conn = reader._get_read_conn() + assert _pragma_value(conn, "mmap_size") > 0 + assert _pragma_value(conn, "cache_size") == -64000 + finally: + reader.close() From 50389f500c107f2d4728d18571a6a50af87a8387 Mon Sep 17 00:00:00 2001 From: Etan Joseph Heyman Date: Thu, 21 May 2026 17:11:59 +0300 Subject: [PATCH 2/2] fix: bound MMR lambda env override --- src/brainlayer/search_repo.py | 3 +++ tests/test_mmr_default_off.py | 23 +++++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/src/brainlayer/search_repo.py b/src/brainlayer/search_repo.py index 60f168f6..cd8d4dc2 100644 --- a/src/brainlayer/search_repo.py +++ b/src/brainlayer/search_repo.py @@ -34,6 +34,9 @@ _MMR_CANDIDATE_LIMIT = 50 try: _MMR_LAMBDA = float(os.environ.get("BRAINLAYER_MMR_LAMBDA", "1.0")) + if not math.isfinite(_MMR_LAMBDA): + raise ValueError + _MMR_LAMBDA = max(0.0, min(1.0, _MMR_LAMBDA)) except (TypeError, ValueError): _MMR_LAMBDA = 1.0 _FILTERED_KNN_MAX = 2000 diff --git a/tests/test_mmr_default_off.py b/tests/test_mmr_default_off.py index c8a63b6d..42cddcd0 100644 --- a/tests/test_mmr_default_off.py +++ b/tests/test_mmr_default_off.py @@ -53,3 +53,26 @@ def test_invalid_mmr_env_override_falls_back_to_default_off(monkeypatch): search_repo = importlib.reload(search_repo) assert search_repo._MMR_LAMBDA == 1.0 + + +def test_nonfinite_mmr_env_override_falls_back_to_default_off(monkeypatch): + monkeypatch.setenv("BRAINLAYER_MMR_LAMBDA", "nan") + import brainlayer.search_repo as search_repo + + search_repo = importlib.reload(search_repo) + + assert search_repo._MMR_LAMBDA == 1.0 + + +def test_out_of_range_mmr_env_override_is_clamped(monkeypatch): + monkeypatch.setenv("BRAINLAYER_MMR_LAMBDA", "-0.25") + import brainlayer.search_repo as search_repo + + search_repo = importlib.reload(search_repo) + + assert search_repo._MMR_LAMBDA == 0.0 + + monkeypatch.setenv("BRAINLAYER_MMR_LAMBDA", "1.25") + search_repo = importlib.reload(search_repo) + + assert search_repo._MMR_LAMBDA == 1.0