-
Notifications
You must be signed in to change notification settings - Fork 7
feat: tune BrainLayer read path PRAGMAs and default MMR off #305
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -32,7 +32,13 @@ | |
| _HYBRID_CACHE_TTL = 60.0 # seconds | ||
| _HYBRID_CACHE_MAX = 128 # max entries (LRU eviction) | ||
| _MMR_CANDIDATE_LIMIT = 50 | ||
| _MMR_LAMBDA = 0.65 | ||
| try: | ||
| _MMR_LAMBDA = float(os.environ.get("BRAINLAYER_MMR_LAMBDA", "1.0")) | ||
| if not math.isfinite(_MMR_LAMBDA): | ||
| raise ValueError | ||
| _MMR_LAMBDA = max(0.0, min(1.0, _MMR_LAMBDA)) | ||
| except (TypeError, ValueError): | ||
| _MMR_LAMBDA = 1.0 | ||
|
Comment on lines
+35
to
+41
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Useful? React with 👍 / 👎. |
||
| _FILTERED_KNN_MAX = 2000 | ||
| META_NOISE_PATTERNS = [ | ||
| "brain_search(", | ||
|
|
@@ -478,6 +484,8 @@ def _mmr_rerank_scored_results( | |
| """Diversify the top candidate pool with MMR while preserving overall recall.""" | ||
| if len(scored) < 2: | ||
| return scored | ||
| if _MMR_LAMBDA >= 1.0: | ||
| return scored | ||
|
|
||
| candidate_limit = min(len(scored), _MMR_CANDIDATE_LIMIT) | ||
| top_candidates = scored[:candidate_limit] | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -71,6 +71,25 @@ def _set_busy_timeout_hook(conn: apsw.Connection) -> None: | |
| apsw.bestpractice.apply(apsw.bestpractice.recommended) | ||
|
|
||
|
|
||
| def _int_env(name: str, default: int) -> int: | ||
| try: | ||
| return int(os.environ.get(name, str(default))) | ||
| except (TypeError, ValueError): | ||
| return default | ||
|
|
||
|
|
||
| def _read_mmap_bytes() -> int: | ||
| return max(_int_env("BRAINLAYER_READ_MMAP_BYTES", 30_000_000_000), 0) | ||
|
|
||
|
|
||
| def _read_cache_size_kb() -> int: | ||
| return -abs(_int_env("BRAINLAYER_READ_CACHE_KB", 64_000)) | ||
|
|
||
|
|
||
| def _wal_autocheckpoint_pages() -> int: | ||
| return max(_int_env("BRAINLAYER_WAL_AUTOCHECKPOINT", 10_000), 0) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Useful? React with 👍 / 👎. |
||
|
|
||
|
|
||
| class VectorStore(SearchMixin, KGMixin, SessionMixin): | ||
| """SQLite-vec based vector store. | ||
|
|
||
|
|
@@ -180,6 +199,7 @@ def _init_db(self) -> None: | |
|
|
||
| # WAL mode is persistent on the DB file — set it every time | ||
| cursor.execute("PRAGMA journal_mode = WAL") | ||
| cursor.execute(f"PRAGMA wal_autocheckpoint = {_wal_autocheckpoint_pages()}") | ||
|
|
||
| # Create tables | ||
| cursor.execute(""" | ||
|
|
@@ -1177,6 +1197,9 @@ def _get_read_conn(self) -> apsw.Connection: | |
| conn.loadextension(sqlite_vec.loadable_path()) | ||
| conn.enableloadextension(False) | ||
| conn.setbusytimeout(30_000) | ||
| cursor = conn.cursor() | ||
| cursor.execute(f"PRAGMA mmap_size = {_read_mmap_bytes()}") | ||
| cursor.execute(f"PRAGMA cache_size = {_read_cache_size_kb()}") | ||
| self._local.read_conn = conn | ||
| return conn | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,78 @@ | ||
| import importlib | ||
|
|
||
|
|
||
| def _scored_candidates(): | ||
| return [ | ||
| (0.99, "a", "alpha", {}, 0.01), | ||
| (0.98, "b", "beta", {}, 0.02), | ||
| (0.97, "c", "gamma", {}, 0.03), | ||
| ] | ||
|
|
||
|
|
||
| class _SpyStore: | ||
| def __init__(self): | ||
| self.embedding_load_calls = 0 | ||
|
|
||
| def _load_chunk_embeddings(self, chunk_ids): | ||
| self.embedding_load_calls += 1 | ||
| return {} | ||
|
|
||
|
|
||
| def test_mmr_default_is_off_and_does_not_load_embeddings(monkeypatch): | ||
| monkeypatch.delenv("BRAINLAYER_MMR_LAMBDA", raising=False) | ||
| import brainlayer.search_repo as search_repo | ||
|
|
||
| search_repo = importlib.reload(search_repo) | ||
| store = _SpyStore() | ||
| scored = _scored_candidates() | ||
|
|
||
| reranked = search_repo.SearchMixin._mmr_rerank_scored_results(store, scored, n_results=2) | ||
|
|
||
| assert search_repo._MMR_LAMBDA == 1.0 | ||
| assert reranked == scored | ||
| assert store.embedding_load_calls == 0 | ||
|
|
||
|
|
||
| def test_mmr_env_override_reenables_embedding_load(monkeypatch): | ||
| monkeypatch.setenv("BRAINLAYER_MMR_LAMBDA", "0.65") | ||
| import brainlayer.search_repo as search_repo | ||
|
|
||
| search_repo = importlib.reload(search_repo) | ||
| store = _SpyStore() | ||
|
|
||
| search_repo.SearchMixin._mmr_rerank_scored_results(store, _scored_candidates(), n_results=2) | ||
|
|
||
| assert search_repo._MMR_LAMBDA == 0.65 | ||
| assert store.embedding_load_calls == 1 | ||
|
|
||
|
|
||
| def test_invalid_mmr_env_override_falls_back_to_default_off(monkeypatch): | ||
| monkeypatch.setenv("BRAINLAYER_MMR_LAMBDA", "not-a-float") | ||
| import brainlayer.search_repo as search_repo | ||
|
|
||
| search_repo = importlib.reload(search_repo) | ||
|
|
||
| assert search_repo._MMR_LAMBDA == 1.0 | ||
|
|
||
|
|
||
| def test_nonfinite_mmr_env_override_falls_back_to_default_off(monkeypatch): | ||
| monkeypatch.setenv("BRAINLAYER_MMR_LAMBDA", "nan") | ||
| import brainlayer.search_repo as search_repo | ||
|
|
||
| search_repo = importlib.reload(search_repo) | ||
|
|
||
| assert search_repo._MMR_LAMBDA == 1.0 | ||
|
|
||
|
|
||
| def test_out_of_range_mmr_env_override_is_clamped(monkeypatch): | ||
| monkeypatch.setenv("BRAINLAYER_MMR_LAMBDA", "-0.25") | ||
| import brainlayer.search_repo as search_repo | ||
|
|
||
| search_repo = importlib.reload(search_repo) | ||
|
|
||
| assert search_repo._MMR_LAMBDA == 0.0 | ||
|
|
||
| monkeypatch.setenv("BRAINLAYER_MMR_LAMBDA", "1.25") | ||
| search_repo = importlib.reload(search_repo) | ||
|
|
||
| assert search_repo._MMR_LAMBDA == 1.0 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,84 @@ | ||
| import pytest | ||
|
|
||
| from brainlayer.vector_store import VectorStore | ||
|
|
||
|
|
||
| def _pragma_value(conn, name: str) -> int: | ||
| row = conn.cursor().execute(f"PRAGMA {name}").fetchone() | ||
| return int(row[0]) | ||
|
|
||
|
|
||
| def test_read_connection_sets_mmap_size(tmp_path, monkeypatch): | ||
| monkeypatch.delenv("BRAINLAYER_READ_MMAP_BYTES", raising=False) | ||
| store = VectorStore(tmp_path / "pragma-read.db") | ||
| store.close() | ||
|
|
||
| reader = VectorStore(tmp_path / "pragma-read.db", readonly=True) | ||
| try: | ||
| mmap_size = _pragma_value(reader._get_read_conn(), "mmap_size") | ||
| if mmap_size == 0: | ||
| pytest.skip("SQLite build does not support mmap_size") | ||
| assert mmap_size > 0 | ||
| finally: | ||
| reader.close() | ||
|
|
||
|
|
||
| def test_read_connection_sets_private_cache_size(tmp_path, monkeypatch): | ||
| monkeypatch.delenv("BRAINLAYER_READ_CACHE_KB", raising=False) | ||
| store = VectorStore(tmp_path / "pragma-cache.db") | ||
| store.close() | ||
|
|
||
| reader = VectorStore(tmp_path / "pragma-cache.db", readonly=True) | ||
| try: | ||
| assert _pragma_value(reader._get_read_conn(), "cache_size") == -64000 | ||
| finally: | ||
| reader.close() | ||
|
|
||
|
|
||
| def test_writer_init_sets_wal_autocheckpoint(tmp_path, monkeypatch): | ||
| monkeypatch.delenv("BRAINLAYER_WAL_AUTOCHECKPOINT", raising=False) | ||
| store = VectorStore(tmp_path / "pragma-wal.db") | ||
| try: | ||
| assert _pragma_value(store.conn, "wal_autocheckpoint") == 10000 | ||
| finally: | ||
| store.close() | ||
|
|
||
|
|
||
| def test_pragma_env_overrides_apply_to_new_connections(tmp_path, monkeypatch): | ||
| monkeypatch.setenv("BRAINLAYER_WAL_AUTOCHECKPOINT", "2222") | ||
| monkeypatch.setenv("BRAINLAYER_READ_MMAP_BYTES", "0") | ||
| monkeypatch.setenv("BRAINLAYER_READ_CACHE_KB", "12345") | ||
|
|
||
| store = VectorStore(tmp_path / "pragma-overrides.db") | ||
| try: | ||
| assert _pragma_value(store.conn, "wal_autocheckpoint") == 2222 | ||
| finally: | ||
| store.close() | ||
|
|
||
| reader = VectorStore(tmp_path / "pragma-overrides.db", readonly=True) | ||
| try: | ||
| conn = reader._get_read_conn() | ||
| assert _pragma_value(conn, "mmap_size") == 0 | ||
| assert _pragma_value(conn, "cache_size") == -12345 | ||
| finally: | ||
| reader.close() | ||
|
|
||
|
|
||
| def test_invalid_pragma_env_values_fall_back_to_defaults(tmp_path, monkeypatch): | ||
| monkeypatch.setenv("BRAINLAYER_WAL_AUTOCHECKPOINT", "not-an-int") | ||
| monkeypatch.setenv("BRAINLAYER_READ_MMAP_BYTES", "not-an-int") | ||
| monkeypatch.setenv("BRAINLAYER_READ_CACHE_KB", "not-an-int") | ||
|
|
||
| store = VectorStore(tmp_path / "pragma-invalid-env.db") | ||
| try: | ||
| assert _pragma_value(store.conn, "wal_autocheckpoint") == 10000 | ||
| finally: | ||
| store.close() | ||
|
|
||
| reader = VectorStore(tmp_path / "pragma-invalid-env.db", readonly=True) | ||
| try: | ||
| conn = reader._get_read_conn() | ||
| assert _pragma_value(conn, "mmap_size") > 0 | ||
| assert _pragma_value(conn, "cache_size") == -64000 | ||
| finally: | ||
| reader.close() |
Uh oh!
There was an error while loading. Please reload this page.