From ea0645ccfd92f1293c84e1c0627790c3536c71ad Mon Sep 17 00:00:00 2001
From: MakiforDevelop <makifordevelop@gmail.com>
Date: Sun, 19 Apr 2026 14:32:56 +0800
Subject: [PATCH 01/17] =?UTF-8?q?build:=20=E5=8A=A0=E5=85=A5=20jieba=20?=
 =?UTF-8?q?=E4=BE=9D=E8=B3=B4=E8=88=87=20lock?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 pyproject.toml | 1 +
 uv.lock        | 8 ++++++++
 2 files changed, 9 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 358da61..6a12f4f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,6 +26,7 @@ dependencies = [
     "pydantic>=2.9",
     "pydantic-settings>=2.6",
     "httpx>=0.27",
+    "jieba>=0.42.1",
     "sqlite-vec==0.1.6",
     "typer>=0.13",
     "rich>=13.9",
diff --git a/uv.lock b/uv.lock
index ac8fae2..83e72fd 100644
--- a/uv.lock
+++ b/uv.lock
@@ -639,6 +639,12 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
 ]
 
+[[package]]
+name = "jieba"
+version = "0.42.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/c6/cb/18eeb235f833b726522d7ebed54f2278ce28ba9438e3135ab0278d9792a2/jieba-0.42.1.tar.gz", hash = "sha256:055ca12f62674fafed09427f176506079bc135638a14e23e25be909131928db2", size = 19214172, upload-time = "2020-01-20T14:27:23.500335Z" }
+
 [[package]]
 name = "memory-hall"
 version = "0.1.0"
@@ -647,6 +653,7 @@ dependencies = [
     { name = "aiosqlite" },
     { name = "fastapi" },
     { name = "httpx" },
+    { name = "jieba" },
     { name = "pydantic" },
     { name = "pydantic-settings" },
     { name = "python-multipart" },
@@ -692,6 +699,7 @@ requires-dist = [
     { name = "fastapi", specifier = ">=0.115" },
     { name = "httpx", specifier = ">=0.27" },
     { name = "httpx", marker = "extra == 'dev'", specifier = ">=0.27" },
+    { name = "jieba", specifier = ">=0.42.1" },
     { name = "memory-hall", extras = ["qdrant", "ollama", "openai"], marker = "extra == 'all'" },
     { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.13" },
     { name = "ollama", marker = "extra == 'ollama'", specifier = ">=0.4" },

From fe2f3ba4e475860ba52035f8773eacab50f97f15 Mon Sep 17 00:00:00 2001
From: MakiforDevelop <makifordevelop@gmail.com>
Date: Sun, 19 Apr 2026 14:33:06 +0800
Subject: [PATCH 02/17] =?UTF-8?q?feat:=20FTS=20=E6=94=B9=E7=82=BA=20jieba?=
 =?UTF-8?q?=20=E9=A0=90=E5=88=87=E8=A9=9E=E4=B8=A6=E5=8A=A0=E5=85=A5=20rei?=
 =?UTF-8?q?ndex=20CLI?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/memory_hall/cli/main.py             |  45 ++++++++++
 src/memory_hall/storage/sqlite_store.py | 106 +++++++++++++++++++++---
 2 files changed, 140 insertions(+), 11 deletions(-)

diff --git a/src/memory_hall/cli/main.py b/src/memory_hall/cli/main.py
index 7cce98f..5a6bd34 100644
--- a/src/memory_hall/cli/main.py
+++ b/src/memory_hall/cli/main.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import asyncio
 import json
 import time
 from pathlib import Path
@@ -16,6 +17,7 @@
 
 from memory_hall.config import Settings
 from memory_hall.server.app import create_app
+from memory_hall.storage.sqlite_store import SqliteStore
 
 app = typer.Typer(no_args_is_help=True, add_completion=False)
 console = Console()
@@ -171,3 +173,46 @@ def tail(
                 seen.add(item["entry_id"])
                 console.print(f"[{item['created_at']}] {item['entry_id']} {item['content']}")
             time.sleep(interval_s)
+
+
+@app.command("reindex-fts")
+def reindex_fts(
+    tenant_id: str | None = typer.Option(default=None),
+    batch_size: int = typer.Option(default=500, min=1, max=5000),
+    database_path: Path | None = typer.Option(default=None),
+) -> None:
+    asyncio.run(
+        _reindex_fts(
+            tenant_id=tenant_id,
+            batch_size=batch_size,
+            database_path=database_path,
+        )
+    )
+
+
+async def _reindex_fts(
+    *,
+    tenant_id: str | None,
+    batch_size: int,
+    database_path: Path | None,
+) -> None:
+    settings = _settings()
+    if database_path is not None:
+        settings.database_path = database_path
+    active_tenant_id = tenant_id or settings.default_tenant_id
+    store = SqliteStore(settings.database_path)
+    await store.open()
+    try:
+        entries = await store.list_entries(active_tenant_id, limit=None)
+        entries.reverse()
+        scanned = 0
+        reindexed = 0
+        for offset in range(0, len(entries), batch_size):
+            batch = entries[offset : offset + batch_size]
+            scanned += len(batch)
+            reindexed += await store.reindex_fts_entries(batch)
+            console.print(
+                f"tenant={active_tenant_id} scanned={scanned}/{len(entries)} reindexed={reindexed}"
+            )
+    finally:
+        await store.close()
diff --git a/src/memory_hall/storage/sqlite_store.py b/src/memory_hall/storage/sqlite_store.py
index 49848d9..2e013f3 100644
--- a/src/memory_hall/storage/sqlite_store.py
+++ b/src/memory_hall/storage/sqlite_store.py
@@ -10,6 +10,7 @@
 from typing import Any
 
 import aiosqlite
+import jieba
 
 from memory_hall.models import Entry, InsertOutcome, SearchCandidate, decode_cursor, dump_json
 
@@ -70,13 +71,7 @@ async def insert_entry(self, entry: Entry) -> InsertOutcome:
                 INSERT INTO entries_fts (entry_id, tenant_id, content, summary, tags)
                 VALUES (?, ?, ?, ?, ?)
                 """,
-                (
-                    entry.entry_id,
-                    entry.tenant_id,
-                    entry.content,
-                    entry.summary or "",
-                    " ".join(entry.tags),
-                ),
+                (entry.entry_id, entry.tenant_id, *self._build_fts_document(entry)),
             )
             await connection.commit()
             return InsertOutcome(entry=entry, created=True)
@@ -213,7 +208,10 @@ async def search_lexical(
             until=None,
             cursor=None,
         )
-        params.insert(0, self._normalize_fts_query(query))
+        normalized_query = self._normalize_fts_query(query)
+        if not normalized_query:
+            return []
+        params.insert(0, normalized_query)
         params.append(limit)
         sql = """
             SELECT e.entry_id, bm25(entries_fts) AS bm25_score
@@ -362,6 +360,21 @@ async def audit(self) -> dict[str, object]:
             "content_hash_collisions": collisions,
         }
 
+    async def reindex_fts_entries(self, entries: list[Entry]) -> int:
+        if not entries:
+            return 0
+        connection = await self._require_writer_connection()
+        await connection.execute("BEGIN IMMEDIATE")
+        try:
+            reindexed = 0
+            for entry in entries:
+                reindexed += await self._refresh_fts_row(connection, entry)
+            await connection.commit()
+            return reindexed
+        except Exception:
+            await connection.rollback()
+            raise
+
     async def _open_writer_connection(self) -> None:
         self._writer_connection = await aiosqlite.connect(self.database_path)
         self._writer_connection.row_factory = aiosqlite.Row
@@ -514,15 +527,86 @@ def _apply_common_filters(
 
     @staticmethod
     def _normalize_fts_query(query: str) -> str:
-        tokens = [token.replace('"', " ").strip() for token in query.split() if token.strip()]
-        if not tokens:
-            tokens = [query.replace('"', " ").strip()]
+        tokens = SqliteStore._tokenize_fts_text(query)
         return " AND ".join(f'"{token}"' for token in tokens if token)
 
     @staticmethod
     def _normalize_bm25(score: float) -> float:
         return 1.0 / (1.0 + abs(score))
 
+    @classmethod
+    def _build_fts_document(cls, entry: Entry) -> tuple[str, str, str]:
+        return (
+            cls._tokenize_fts_value(entry.content),
+            cls._tokenize_fts_value(entry.summary or ""),
+            cls._tokenize_fts_value(" ".join(entry.tags)),
+        )
+
+    @classmethod
+    def _tokenize_fts_value(cls, text: str) -> str:
+        return " ".join(cls._tokenize_fts_text(text))
+
+    @classmethod
+    def _tokenize_fts_text(cls, text: str) -> list[str]:
+        tokens: list[str] = []
+        for raw_token in jieba.cut(text):
+            token = raw_token.replace('"', " ").strip()
+            if not token or not any(char.isalnum() for char in token):
+                continue
+            tokens.append(token)
+        seen = set(tokens)
+        base_tokens = list(tokens)
+        for left_token, right_token in zip(base_tokens, base_tokens[1:], strict=False):
+            if cls._is_single_cjk_token(left_token) and cls._is_single_cjk_token(right_token):
+                bigram = left_token + right_token
+                if bigram not in seen:
+                    tokens.append(bigram)
+                    seen.add(bigram)
+        return tokens
+
+    @staticmethod
+    def _is_single_cjk_token(token: str) -> bool:
+        return len(token) == 1 and SqliteStore._is_cjk_char(token)
+
+    @staticmethod
+    def _is_cjk_char(char: str) -> bool:
+        codepoint = ord(char)
+        return (
+            0x3400 <= codepoint <= 0x4DBF
+            or 0x4E00 <= codepoint <= 0x9FFF
+            or 0xF900 <= codepoint <= 0xFAFF
+        )
+
+    async def _refresh_fts_row(self, connection: aiosqlite.Connection, entry: Entry) -> int:
+        content, summary, tags = self._build_fts_document(entry)
+        cursor = await connection.execute(
+            """
+            SELECT content, summary, tags
+            FROM entries_fts
+            WHERE tenant_id = ? AND entry_id = ?
+            """,
+            (entry.tenant_id, entry.entry_id),
+        )
+        rows = await cursor.fetchall()
+        if len(rows) == 1 and (
+            rows[0]["content"],
+            rows[0]["summary"],
+            rows[0]["tags"],
+        ) == (content, summary, tags):
+            return 0
+        await connection.execute(
+            "DELETE FROM entries_fts WHERE tenant_id = ? AND entry_id = ?",
+            (entry.tenant_id, entry.entry_id),
+        )
+        await connection.execute(
+            """
+            INSERT INTO entries_fts (entry_id, tenant_id, content, summary, tags)
+            VALUES (?, ?, ?, ?, ?)
+            """,
+            (entry.entry_id, entry.tenant_id, content, summary, tags),
+        )
+        return 1
+
     @staticmethod
     async def _fetch_count(connection: aiosqlite.Connection, sql: str) -> int:
         cursor = await connection.execute(sql)

From 2db5dc0c9d00ea5f40939753dea7747559bc28dc Mon Sep 17 00:00:00 2001
From: MakiforDevelop <makifordevelop@gmail.com>
Date: Sun, 19 Apr 2026 14:33:22 +0800
Subject: [PATCH 03/17] =?UTF-8?q?test:=20=E8=A3=9C=20CJK=20tokenizer=20?=
 =?UTF-8?q?=E8=88=87=20FTS=20reindex=20=E8=A6=86=E8=93=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/test_cjk_search.py       |   1 +
 tests/test_fts_tokenization.py | 153 +++++++++++++++++++++++++++++++++
 2 files changed, 154 insertions(+)
 create mode 100644 tests/test_fts_tokenization.py

diff --git a/tests/test_cjk_search.py b/tests/test_cjk_search.py
index c51aa77..2837f40 100644
--- a/tests/test_cjk_search.py
+++ b/tests/test_cjk_search.py
@@ -47,3 +47,4 @@ async def test_short_cjk_query_prefers_fts_match(app_factory) -> None:
     assert blocked.status_code == 201
     payload = response.json()
     assert payload["results"][0]["entry"]["content"].startswith("最近又撞牆")
+    assert payload["results"][0]["score_breakdown"]["bm25"] > 0
diff --git a/tests/test_fts_tokenization.py b/tests/test_fts_tokenization.py
new file mode 100644
index 0000000..3b940ac
--- /dev/null
+++ b/tests/test_fts_tokenization.py
@@ -0,0 +1,153 @@
+from __future__ import annotations
+
+import sqlite3
+
+import pytest
+
+from memory_hall.models import Entry, build_content_hash, utc_now
+from memory_hall.storage.sqlite_store import SqliteStore
+from tests.conftest import build_settings, client_for_app
+
+
+@pytest.mark.asyncio
+async def test_cjk_write_search_hits_lexical(app_factory) -> None:
+    app = app_factory()
+    async with client_for_app(app) as client:
+        write_response = await client.post(
+            "/v1/memory/write",
+            json={
+                "agent_id": "codex",
+                "namespace": "shared",
+                "type": "note",
+                "content": "最近又撞牆，需要先拆小步。",
+            },
+        )
+        assert write_response.status_code == 201
+        entry_id = write_response.json()["entry_id"]
+
+        search_response = await client.post(
+            "/v1/memory/search",
+            json={"query": "撞牆", "limit": 5, "mode": "lexical"},
+        )
+
+    assert search_response.status_code == 200
+    payload = search_response.json()
+    assert payload["results"][0]["entry"]["entry_id"] == entry_id
+
+
+@pytest.mark.asyncio
+async def test_english_write_search_still_hits_lexical(app_factory) -> None:
+    app = app_factory()
+    async with client_for_app(app) as client:
+        write_response = await client.post(
+            "/v1/memory/write",
+            json={
+                "agent_id": "codex",
+                "namespace": "shared",
+                "type": "note",
+                "content": "DEC-018 drops mem0 timeout failures with sqlite and ollama.",
+            },
+        )
+        assert write_response.status_code == 201
+        entry_id = write_response.json()["entry_id"]
+
+        search_response = await client.post(
+            "/v1/memory/search",
+            json={"query": "timeout failures", "limit": 5, "mode": "lexical"},
+        )
+
+    assert search_response.status_code == 200
+    payload = search_response.json()
+    assert payload["results"][0]["entry"]["entry_id"] == entry_id
+
+
+@pytest.mark.asyncio
+async def test_mixed_write_search_hits_cjk_english_and_numeric_queries(app_factory) -> None:
+    app = app_factory()
+    async with client_for_app(app) as client:
+        write_response = await client.post(
+            "/v1/memory/write",
+            json={
+                "agent_id": "codex",
+                "namespace": "shared",
+                "type": "note",
+                "content": "CJK tokenization 影響 2026 roadmap",
+            },
+        )
+        assert write_response.status_code == 201
+        entry_id = write_response.json()["entry_id"]
+
+        for query in ("影響", "tokenization", "2026"):
+            search_response = await client.post(
+                "/v1/memory/search",
+                json={"query": query, "limit": 5, "mode": "lexical"},
+            )
+            assert search_response.status_code == 200
+            payload = search_response.json()
+            assert payload["results"][0]["entry"]["entry_id"] == entry_id
+
+
+def test_normalize_fts_query_edge_cases() -> None:
+    assert SqliteStore._normalize_fts_query("") == ""
+    assert SqliteStore._normalize_fts_query("!!!") == ""
+
+    normalized = SqliteStore._normalize_fts_query("記憶系統 " * 512)
+    assert normalized
+    assert '"記憶"' in normalized
+    assert '"系統"' in normalized
+
+
+@pytest.mark.asyncio
+async def test_reindex_fts_rewrites_legacy_rows(tmp_path) -> None:
+    settings = build_settings(tmp_path)
+    store = SqliteStore(settings.database_path)
+    await store.open()
+    try:
+        created_at = utc_now()
+        entry = Entry(
+            entry_id="01KPGJIEBATOKENIZERTEST01",
+            tenant_id=settings.default_tenant_id,
+            agent_id="codex",
+            namespace="shared",
+            type="note",
+            content="最近又撞牆，需要先拆小步。",
+            content_hash=build_content_hash("最近又撞牆，需要先拆小步。"),
+            summary=None,
+            tags=[],
+            references=[],
+            metadata={},
+            sync_status="pending",
+            last_embedded_at=None,
+            created_at=created_at,
+            created_by_principal="pytest",
+        )
+        outcome = await store.insert_entry(entry)
+        assert outcome.created is True
+
+        with sqlite3.connect(settings.database_path) as connection:
+            connection.execute(
+                "DELETE FROM entries_fts WHERE tenant_id = ? AND entry_id = ?",
+                (entry.tenant_id, entry.entry_id),
+            )
+            connection.execute(
+                """
+                INSERT INTO entries_fts (entry_id, tenant_id, content, summary, tags)
+                VALUES (?, ?, ?, ?, ?)
+                """,
+                (entry.entry_id, entry.tenant_id, entry.content, "", ""),
+            )
+            connection.commit()
+
+        legacy_hits = await store.search_lexical(entry.tenant_id, "撞牆", limit=5)
+        assert legacy_hits == []
+
+        reindexed = await store.reindex_fts_entries([entry])
+        assert reindexed == 1
+
+        rebuilt_hits = await store.search_lexical(entry.tenant_id, "撞牆", limit=5)
+        assert [hit.entry_id for hit in rebuilt_hits] == [entry.entry_id]
+
+        rerun = await store.reindex_fts_entries([entry])
+        assert rerun == 0
+    finally:
+        await store.close()

From 8fa1dceb709db85a5b60c1f5d1740e1bc45f1b7a Mon Sep 17 00:00:00 2001
From: MakiforDevelop <makifordevelop@gmail.com>
Date: Sun, 19 Apr 2026 14:33:29 +0800
Subject: [PATCH 04/17] =?UTF-8?q?docs:=20=E8=A8=98=E9=8C=84=202026-04-19?=
 =?UTF-8?q?=20jieba=20benchmark?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/benchmarks/results-2026-04-19.md | 89 +++++++++++++++++++++++++++
 1 file changed, 89 insertions(+)
 create mode 100644 docs/benchmarks/results-2026-04-19.md

diff --git a/docs/benchmarks/results-2026-04-19.md b/docs/benchmarks/results-2026-04-19.md
new file mode 100644
index 0000000..70789e8
--- /dev/null
+++ b/docs/benchmarks/results-2026-04-19.md
@@ -0,0 +1,89 @@
+# v0.2 jieba benchmark — 2026-04-19
+
+Same Hit@3 methodology as `docs/benchmarks/hit3_recall.py`, but rerun against the repo-local hall in `.data/` because the 2026-04-18 primary corpus (177 entries on mini primary) is not available inside this workspace sandbox.
+
+## Environment
+
+- Host: local repo workspace on macOS
+- App: in-process ASGI app via `httpx.ASGITransport`
+- Storage: `.data/memory-hall.sqlite3`
+- Vector store: `.data/memory-hall-vectors.sqlite3`
+- Hall size at test time: 6 entries
+- FTS migration step before benchmark: `UV_CACHE_DIR=/tmp/uv-cache uv run --no-sync mh reindex-fts --database-path .data/memory-hall.sqlite3`
+
+## Corpus caveat
+
+- This is **not** the same 177-entry primary hall from `results-2026-04-18.md`, so the raw percentage is not apples-to-apples with the frozen v0.1 baseline.
+- What this run does verify: the new pre-tokenized FTS path now hits short pure-CJK lexical queries like `撞牆` on an existing hall after an FTS-only rebuild.
+
+## Ground-truth pairs used in this rerun
+
+| Query | Expected entry_id | Note |
+|---|---|---|
+| `撞牆` | `01KPG8QXEWH12WETTRG5ZX09JR` | pure CJK short substring |
+| `Dockerfile clone 即跑` | `01KPG8QXEWH12WETTRG5ZX09JR` | mixed CJK + English |
+| `sticky note` | `01KPG8RVW0EX40C92106YCN6AQ` | English phrase in mixed entry |
+| `筆記本 view` | `01KPG8RVW0EX40C92106YCN6AQ` | mixed CJK + English phrase |
+| `桌面抽屜` | `01KPG96JV68YJ1H0Y2DE78ESDC` | pure CJK phrase |
+| `找回來 列表` | `01KPG96JV68YJ1H0Y2DE78ESDC` | pure CJK paraphrase |
+| `說了就記住` | `01KPG9SSYCNMJJZFEAAYP3H3M8` | pure CJK phrase |
+| `30 秒 很煩人` | `01KPG9SSYCNMJJZFEAAYP3H3M8` | mixed CJK + numeric |
+| `hybrid` | `01KPG9Y7P3J2GNN7WGH6XRD6GD` | English keyword in mixed entry |
+| `留下足跡` | `01KPG9Y7P3J2GNN7WGH6XRD6GD` | pure CJK phrase |
+
+## Hit@3
+
+| Mode | Hit@3 |
+|---|---|
+| hybrid | **10/10 = 100%** |
+| lexical | **10/10 = 100%** |
+| semantic | **0/10 = 0%** |
+
+## Raw run output
+
+```text
+=== mode=hybrid ===
+  [✓] pos=   1 | q='撞牆'                     | pure CJK short substring
+  [✓] pos=   1 | q='Dockerfile clone 即跑'    | mixed CJK + English
+  [✓] pos=   1 | q='sticky note'            | English phrase in mixed entry
+  [✓] pos=   1 | q='筆記本 view'               | mixed CJK + English phrase
+  [✓] pos=   1 | q='桌面抽屜'                   | pure CJK phrase
+  [✓] pos=   1 | q='找回來 列表'                 | pure CJK paraphrase
+  [✓] pos=   1 | q='說了就記住'                  | pure CJK phrase
+  [✓] pos=   1 | q='30 秒 很煩人'               | mixed CJK + numeric
+  [✓] pos=   1 | q='hybrid'                 | English keyword in mixed entry
+  [✓] pos=   1 | q='留下足跡'                   | pure CJK phrase
+Hit@3 (hybrid): 10/10 = 100%
+
+=== mode=semantic ===
+  [✗] pos=miss | q='撞牆'                     | pure CJK short substring
+  [✗] pos=miss | q='Dockerfile clone 即跑'    | mixed CJK + English
+  [✗] pos=miss | q='sticky note'            | English phrase in mixed entry
+  [✗] pos=miss | q='筆記本 view'               | mixed CJK + English phrase
+  [✗] pos=miss | q='桌面抽屜'                   | pure CJK phrase
+  [✗] pos=miss | q='找回來 列表'                 | pure CJK paraphrase
+  [✗] pos=miss | q='說了就記住'                  | pure CJK phrase
+  [✗] pos=miss | q='30 秒 很煩人'               | mixed CJK + numeric
+  [✗] pos=miss | q='hybrid'                 | English keyword in mixed entry
+  [✗] pos=miss | q='留下足跡'                   | pure CJK phrase
+Hit@3 (semantic): 0/10 = 0%
+
+=== mode=lexical ===
+  [✓] pos=   1 | q='撞牆'                     | pure CJK short substring
+  [✓] pos=   1 | q='Dockerfile clone 即跑'    | mixed CJK + English
+  [✓] pos=   1 | q='sticky note'            | English phrase in mixed entry
+  [✓] pos=   1 | q='筆記本 view'               | mixed CJK + English phrase
+  [✓] pos=   1 | q='桌面抽屜'                   | pure CJK phrase
+  [✓] pos=   1 | q='找回來 列表'                 | pure CJK paraphrase
+  [✓] pos=   1 | q='說了就記住'                  | pure CJK phrase
+  [✓] pos=   1 | q='30 秒 很煩人'               | mixed CJK + numeric
+  [✓] pos=   1 | q='hybrid'                 | English keyword in mixed entry
+  [✓] pos=   1 | q='留下足跡'                   | pure CJK phrase
+Hit@3 (lexical): 10/10 = 100%
+```
+
+## Interpretation
+
+- The lexical failure mode called out in `results-2026-04-18.md` is gone on this hall: short pure-CJK query `撞牆` now resolves through FTS after rebuild.
+- Hybrid is still effectively lexical on this tiny corpus; semantic-only remains unhelpful for short queries.
+- A true acceptance rerun against the 2026-04-18 primary corpus still needs to happen on the target hall to confirm the `>= 75%` gate on the original workload.

From d9828c3b646d5ace5e5e3c97023cf3e2222866aa Mon Sep 17 00:00:00 2001
From: MakiforDevelop <makifordevelop@gmail.com>
Date: Sun, 19 Apr 2026 14:55:01 +0800
Subject: [PATCH 05/17] =?UTF-8?q?[gemini]=20jieba=20=E6=94=B9=E7=82=BA=20l?=
 =?UTF-8?q?azy=20load?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/benchmarks/results-2026-04-19.md   | 9 +++++++++
 src/memory_hall/storage/sqlite_store.py | 3 ++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/docs/benchmarks/results-2026-04-19.md b/docs/benchmarks/results-2026-04-19.md
index 70789e8..5a6358b 100644
--- a/docs/benchmarks/results-2026-04-19.md
+++ b/docs/benchmarks/results-2026-04-19.md
@@ -87,3 +87,12 @@ Hit@3 (lexical): 10/10 = 100%
 - The lexical failure mode called out in `results-2026-04-18.md` is gone on this hall: short pure-CJK query `撞牆` now resolves through FTS after rebuild.
 - Hybrid is still effectively lexical on this tiny corpus; semantic-only remains unhelpful for short queries.
 - A true acceptance rerun against the 2026-04-18 primary corpus still needs to happen on the target hall to confirm the `>= 75%` gate on the original workload.
+
+## Cleanup follow-up measurements
+
+### A. jieba lazy load
+
+- `env UV_CACHE_DIR=/tmp/uv-cache uv run --no-sync mh --help`
+- before function-local import: `real 0.33s`
+- after function-local import: `real 0.25s`
+- qualitative result: `mh --help` no longer imports `jieba` on cold start, so the previous `pkg_resources` warning also disappears from this path.
diff --git a/src/memory_hall/storage/sqlite_store.py b/src/memory_hall/storage/sqlite_store.py
index 2e013f3..684f8ed 100644
--- a/src/memory_hall/storage/sqlite_store.py
+++ b/src/memory_hall/storage/sqlite_store.py
@@ -10,7 +10,6 @@
 from typing import Any
 
 import aiosqlite
-import jieba
 
 from memory_hall.models import Entry, InsertOutcome, SearchCandidate, decode_cursor, dump_json
 
@@ -548,6 +547,8 @@ def _tokenize_fts_value(cls, text: str) -> str:
 
     @classmethod
     def _tokenize_fts_text(cls, text: str) -> list[str]:
+        import jieba
+
         tokens: list[str] = []
         for raw_token in jieba.cut(text):
             token = raw_token.replace('"', " ").strip()

From 3904aa2d91317713dd20609d940c34c9584bff8f Mon Sep 17 00:00:00 2001
From: MakiforDevelop <makifordevelop@gmail.com>
Date: Sun, 19 Apr 2026 14:56:28 +0800
Subject: [PATCH 06/17] =?UTF-8?q?[gemini]=20benchmark=20=E8=A3=9C=20latenc?=
 =?UTF-8?q?y=20=E6=8C=87=E6=A8=99?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/benchmarks/hit3_recall.py        | 28 +++++++++++++++++++++++----
 docs/benchmarks/results-2026-04-19.md | 17 ++++++++--------
 2 files changed, 33 insertions(+), 12 deletions(-)

diff --git a/docs/benchmarks/hit3_recall.py b/docs/benchmarks/hit3_recall.py
index e44fe3b..069f4b7 100644
--- a/docs/benchmarks/hit3_recall.py
+++ b/docs/benchmarks/hit3_recall.py
@@ -12,6 +12,8 @@
 from __future__ import annotations
 
 import json
+import math
+import time
 import urllib.request
 
 BASE_URL = "http://localhost:9100"
@@ -32,7 +34,7 @@
 ]
 
 
-def search(query: str, mode: str = "hybrid", k: int = 3) -> list[str]:
+def search(query: str, mode: str = "hybrid", k: int = 3) -> tuple[list[str], float]:
     body = json.dumps({
         "query": query,
         "limit": k,
@@ -45,16 +47,20 @@ def search(query: str, mode: str = "hybrid", k: int = 3) -> list[str]:
         headers={"Content-Type": "application/json"},
         method="POST",
     )
+    started = time.perf_counter()
     with urllib.request.urlopen(req, timeout=30) as resp:
         d = json.loads(resp.read())
-    return [r["entry"]["entry_id"] for r in d.get("results", [])]
+    elapsed_ms = (time.perf_counter() - started) * 1000.0
+    return [r["entry"]["entry_id"] for r in d.get("results", [])], elapsed_ms
 
 
 def bench(mode: str) -> float:
     hits = 0
+    latencies_ms: list[float] = []
     print(f"\n=== mode={mode} ===")
     for p in PAIRS:
-        top3 = search(p["q"], mode=mode, k=3)
+        top3, elapsed_ms = search(p["q"], mode=mode, k=3)
+        latencies_ms.append(elapsed_ms)
         hit = p["expect"] in top3
         if hit:
             hits += 1
@@ -62,10 +68,24 @@ def bench(mode: str) -> float:
         mark = "✓" if hit else "✗"
         print(f"  [{mark}] pos={str(pos):>4} | q={p['q']!r:40} | {p['note']}")
     score = hits / len(PAIRS)
-    print(f"Hit@3 ({mode}): {hits}/{len(PAIRS)} = {score * 100:.0f}%")
+    p50 = _percentile(latencies_ms, 50)
+    p95 = _percentile(latencies_ms, 95)
+    p99 = _percentile(latencies_ms, 99)
+    print(
+        f"Hit@3 ({mode}): {hits}/{len(PAIRS)} = {score * 100:.0f}%"
+        f" | latency p50/p95/p99 = {p50:.1f}/{p95:.1f}/{p99:.1f} ms"
+    )
     return score
 
 
+def _percentile(samples: list[float], percentile: int) -> float:
+    if not samples:
+        return 0.0
+    ordered = sorted(samples)
+    index = max(0, math.ceil((percentile / 100) * len(ordered)) - 1)
+    return ordered[index]
+
+
 if __name__ == "__main__":
     for mode in ("hybrid", "semantic", "lexical"):
         bench(mode)
diff --git a/docs/benchmarks/results-2026-04-19.md b/docs/benchmarks/results-2026-04-19.md
index 5a6358b..924a504 100644
--- a/docs/benchmarks/results-2026-04-19.md
+++ b/docs/benchmarks/results-2026-04-19.md
@@ -33,11 +33,11 @@ Same Hit@3 methodology as `docs/benchmarks/hit3_recall.py`, but rerun against th
 
 ## Hit@3
 
-| Mode | Hit@3 |
-|---|---|
-| hybrid | **10/10 = 100%** |
-| lexical | **10/10 = 100%** |
-| semantic | **0/10 = 0%** |
+| Mode | Hit@3 | Latency p50 / p95 / p99 |
+|---|---|---|
+| hybrid | **10/10 = 100%** | **5.1 / 317.6 / 317.6 ms** |
+| lexical | **10/10 = 100%** | **1.7 / 2.1 / 2.1 ms** |
+| semantic | **0/10 = 0%** | **3.9 / 5.2 / 5.2 ms** |
 
 ## Raw run output
 
@@ -53,7 +53,7 @@ Same Hit@3 methodology as `docs/benchmarks/hit3_recall.py`, but rerun against th
   [✓] pos=   1 | q='30 秒 很煩人'               | mixed CJK + numeric
   [✓] pos=   1 | q='hybrid'                 | English keyword in mixed entry
   [✓] pos=   1 | q='留下足跡'                   | pure CJK phrase
-Hit@3 (hybrid): 10/10 = 100%
+Hit@3 (hybrid): 10/10 = 100% | latency p50/p95/p99 = 5.1/317.6/317.6 ms
 
 === mode=semantic ===
   [✗] pos=miss | q='撞牆'                     | pure CJK short substring
@@ -66,7 +66,7 @@ Hit@3 (hybrid): 10/10 = 100%
   [✗] pos=miss | q='30 秒 很煩人'               | mixed CJK + numeric
   [✗] pos=miss | q='hybrid'                 | English keyword in mixed entry
   [✗] pos=miss | q='留下足跡'                   | pure CJK phrase
-Hit@3 (semantic): 0/10 = 0%
+Hit@3 (semantic): 0/10 = 0% | latency p50/p95/p99 = 3.9/5.2/5.2 ms
 
 === mode=lexical ===
   [✓] pos=   1 | q='撞牆'                     | pure CJK short substring
@@ -79,13 +79,14 @@ Hit@3 (semantic): 0/10 = 0%
   [✓] pos=   1 | q='30 秒 很煩人'               | mixed CJK + numeric
   [✓] pos=   1 | q='hybrid'                 | English keyword in mixed entry
   [✓] pos=   1 | q='留下足跡'                   | pure CJK phrase
-Hit@3 (lexical): 10/10 = 100%
+Hit@3 (lexical): 10/10 = 100% | latency p50/p95/p99 = 1.7/2.1/2.1 ms
 ```
 
 ## Interpretation
 
 - The lexical failure mode called out in `results-2026-04-18.md` is gone on this hall: short pure-CJK query `撞牆` now resolves through FTS after rebuild.
 - Hybrid is still effectively lexical on this tiny corpus; semantic-only remains unhelpful for short queries.
+- The new latency columns make the cold-path cost visible: hybrid p95/p99 are dominated by the first run's `jieba` dictionary load, while steady-state lexical calls stay near 2 ms on this corpus.
 - A true acceptance rerun against the 2026-04-18 primary corpus still needs to happen on the target hall to confirm the `>= 75%` gate on the original workload.
 
 ## Cleanup follow-up measurements

From fef38fe9d76518fcded5bc46468d2ca31f9bbb76 Mon Sep 17 00:00:00 2001
From: MakiforDevelop <makifordevelop@gmail.com>
Date: Sun, 19 Apr 2026 14:58:02 +0800
Subject: [PATCH 07/17] =?UTF-8?q?[gemini]=20reindex=20fts=20=E6=94=B9?=
 =?UTF-8?q?=E7=82=BA=20cursor=20streaming?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/benchmarks/results-2026-04-19.md |  6 +++
 src/memory_hall/cli/main.py           | 20 ++++---
 tests/test_cli_reindex.py             | 76 +++++++++++++++++++++++++++
 3 files changed, 95 insertions(+), 7 deletions(-)
 create mode 100644 tests/test_cli_reindex.py

diff --git a/docs/benchmarks/results-2026-04-19.md b/docs/benchmarks/results-2026-04-19.md
index 924a504..4a9824c 100644
--- a/docs/benchmarks/results-2026-04-19.md
+++ b/docs/benchmarks/results-2026-04-19.md
@@ -97,3 +97,9 @@ Hit@3 (lexical): 10/10 = 100% | latency p50/p95/p99 = 1.7/2.1/2.1 ms
 - before function-local import: `real 0.33s`
 - after function-local import: `real 0.25s`
 - qualitative result: `mh --help` no longer imports `jieba` on cold start, so the previous `pkg_resources` warning also disappears from this path.
+
+### C. `mh reindex-fts` cursor streaming
+
+- synthetic CLI verification: `2000` entries streamed with `batch_size=500`
+- observed batch shape: `500 + 500 + 500 + 500` (then one empty fetch to terminate)
+- regression guard: `tests/test_cli_reindex.py` asserts `_reindex_fts()` never falls back to `limit=None`
diff --git a/src/memory_hall/cli/main.py b/src/memory_hall/cli/main.py
index 5a6bd34..78a792b 100644
--- a/src/memory_hall/cli/main.py
+++ b/src/memory_hall/cli/main.py
@@ -16,6 +16,7 @@
 from rich.table import Table
 
 from memory_hall.config import Settings
+from memory_hall.models import encode_cursor
 from memory_hall.server.app import create_app
 from memory_hall.storage.sqlite_store import SqliteStore
 
@@ -203,16 +204,21 @@ async def _reindex_fts(
     store = SqliteStore(settings.database_path)
     await store.open()
     try:
-        entries = await store.list_entries(active_tenant_id, limit=None)
-        entries.reverse()
         scanned = 0
         reindexed = 0
-        for offset in range(0, len(entries), batch_size):
-            batch = entries[offset : offset + batch_size]
+        cursor: str | None = None
+        while True:
+            batch = await store.list_entries(
+                active_tenant_id,
+                limit=batch_size,
+                cursor=cursor,
+            )
+            if not batch:
+                break
             scanned += len(batch)
             reindexed += await store.reindex_fts_entries(batch)
-            console.print(
-                f"tenant={active_tenant_id} scanned={scanned}/{len(entries)} reindexed={reindexed}"
-            )
+            tail = batch[-1]
+            cursor = encode_cursor(tail.created_at, tail.entry_id)
+            console.print(f"tenant={active_tenant_id} scanned={scanned} reindexed={reindexed}")
     finally:
         await store.close()
diff --git a/tests/test_cli_reindex.py b/tests/test_cli_reindex.py
new file mode 100644
index 0000000..4864c6e
--- /dev/null
+++ b/tests/test_cli_reindex.py
@@ -0,0 +1,76 @@
+from __future__ import annotations
+
+from datetime import timedelta
+
+import pytest
+
+from memory_hall.cli.main import _reindex_fts
+from memory_hall.models import Entry, build_content_hash, utc_now
+from memory_hall.storage.sqlite_store import SqliteStore
+
+
+def _entry(index: int) -> Entry:
+    content = f"entry {index}"
+    return Entry(
+        entry_id=f"01KPGCLIINDEX{index:08d}",
+        tenant_id="default",
+        agent_id="pytest",
+        namespace="shared",
+        type="note",
+        content=content,
+        content_hash=build_content_hash(content),
+        summary=None,
+        tags=[],
+        references=[],
+        metadata={},
+        sync_status="pending",
+        last_embedded_at=None,
+        created_at=utc_now() - timedelta(seconds=index),
+        created_by_principal="pytest",
+    )
+
+
+@pytest.mark.asyncio
+async def test_reindex_fts_streams_batches(monkeypatch, tmp_path) -> None:
+    batches = [
+        [_entry(index) for index in range(500)],
+        [_entry(index) for index in range(500, 1000)],
+        [_entry(index) for index in range(1000, 1500)],
+        [_entry(index) for index in range(1500, 2000)],
+    ]
+    list_calls: list[tuple[int | None, str | None]] = []
+    reindex_calls: list[int] = []
+
+    async def fake_open(self) -> None:
+        del self
+
+    async def fake_close(self) -> None:
+        del self
+
+    async def fake_list_entries(self, tenant_id: str, **kwargs):
+        del self, tenant_id
+        list_calls.append((kwargs.get("limit"), kwargs.get("cursor")))
+        if not batches:
+            return []
+        return batches.pop(0)
+
+    async def fake_reindex_fts_entries(self, entries: list[Entry]) -> int:
+        del self
+        reindex_calls.append(len(entries))
+        return len(entries)
+
+    monkeypatch.setattr(SqliteStore, "open", fake_open)
+    monkeypatch.setattr(SqliteStore, "close", fake_close)
+    monkeypatch.setattr(SqliteStore, "list_entries", fake_list_entries)
+    monkeypatch.setattr(SqliteStore, "reindex_fts_entries", fake_reindex_fts_entries)
+
+    await _reindex_fts(
+        tenant_id="default",
+        batch_size=500,
+        database_path=tmp_path / "memory-hall.sqlite3",
+    )
+
+    assert [call[0] for call in list_calls] == [500, 500, 500, 500, 500]
+    assert list_calls[0][1] is None
+    assert all(call[1] is not None for call in list_calls[1:])
+    assert reindex_calls == [500, 500, 500, 500]

From 6f0c6d0ebd9b178d66dedf84a684b23bc987f99f Mon Sep 17 00:00:00 2001
From: MakiforDevelop <makifordevelop@gmail.com>
Date: Sun, 19 Apr 2026 15:00:17 +0800
Subject: [PATCH 08/17] =?UTF-8?q?[backlog]=20backlog=20reindex=20=E5=95=9F?=
 =?UTF-8?q?=E7=94=A8=20embed=5Fbatch?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/memory_hall/server/app.py | 59 ++++++++++++++++++++++++++++++++---
 tests/test_sync_status.py     | 41 ++++++++++++++++++++++++
 2 files changed, 95 insertions(+), 5 deletions(-)

diff --git a/src/memory_hall/server/app.py b/src/memory_hall/server/app.py
index 1e76713..7d86d0f 100644
--- a/src/memory_hall/server/app.py
+++ b/src/memory_hall/server/app.py
@@ -45,6 +45,7 @@
 _CJK_RE = re.compile(r"[\u4e00-\u9fff]")
 _RRF_K = 60
 _BACKGROUND_REINDEX_INTERVAL_S = 120.0
+_REINDEX_EMBED_BATCH_SIZE = 16
 
 logger = logging.getLogger(__name__)
 
@@ -420,6 +421,7 @@ async def _handle_reindex(self, job: ReindexJob) -> ReindexResponse:
         else:
             all_entries = await self.storage.list_entries(job.tenant_id, limit=None)
         scanned = len(all_entries)
+        candidates: list[Entry] = []
         embedded_count = 0
         pending_count = 0
         for entry in all_entries:
@@ -433,11 +435,39 @@ async def _handle_reindex(self, job: ReindexJob) -> ReindexResponse:
                     )
                 if not needs_reindex:
                     continue
+            candidates.append(entry)
+        for offset in range(0, len(candidates), _REINDEX_EMBED_BATCH_SIZE):
+            embedded, pending = await self._embed_reindex_batch(
+                candidates[offset : offset + _REINDEX_EMBED_BATCH_SIZE]
+            )
+            embedded_count += embedded
+            pending_count += pending
+        return ReindexResponse(scanned=scanned, embedded=embedded_count, pending=pending_count)
+
+    async def _embed_reindex_batch(self, entries: list[Entry]) -> tuple[int, int]:
+        if not entries:
+            return (0, 0)
+        try:
+            vectors = await asyncio.wait_for(
+                asyncio.to_thread(self.embedder.embed_batch, [entry.content for entry in entries]),
+                timeout=self.settings.embed_timeout_s * len(entries),
+            )
+            if len(vectors) != len(entries):
+                raise ValueError("embed_batch returned mismatched vector count")
+        except Exception:
+            embedded_count = 0
+            pending_count = 0
+            for entry in entries:
+                try:
+                    embedded = await self._embed_reindex_entry(entry)
+                    embedded_count += int(embedded)
+                except Exception:
+                    pending_count += 1
+            return (embedded_count, pending_count)
+        embedded_count = 0
+        pending_count = 0
+        for entry, vector in zip(entries, vectors, strict=True):
             try:
-                vector = await asyncio.wait_for(
-                    asyncio.to_thread(self.embedder.embed, entry.content),
-                    timeout=self.settings.embed_timeout_s,
-                )
                 await asyncio.to_thread(
                     self.vector_store.upsert,
                     entry.tenant_id,
@@ -453,7 +483,26 @@ async def _handle_reindex(self, job: ReindexJob) -> ReindexResponse:
                 embedded_count += 1
             except Exception:
                 pending_count += 1
-        return ReindexResponse(scanned=scanned, embedded=embedded_count, pending=pending_count)
+        return (embedded_count, pending_count)
+
+    async def _embed_reindex_entry(self, entry: Entry) -> bool:
+        vector = await asyncio.wait_for(
+            asyncio.to_thread(self.embedder.embed, entry.content),
+            timeout=self.settings.embed_timeout_s,
+        )
+        await asyncio.to_thread(
+            self.vector_store.upsert,
+            entry.tenant_id,
+            entry.entry_id,
+            vector,
+        )
+        await self.storage.update_sync_status(
+            entry.tenant_id,
+            entry.entry_id,
+            SYNC_EMBEDDED,
+            utc_now(),
+        )
+        return True
 
     def _require_queue(self) -> asyncio.Queue[WriteJob | LinkJob | ReindexJob | None]:
         if self._queue is None:
diff --git a/tests/test_sync_status.py b/tests/test_sync_status.py
index 2472c99..a9bd874 100644
--- a/tests/test_sync_status.py
+++ b/tests/test_sync_status.py
@@ -5,6 +5,21 @@
 from tests.conftest import DeterministicEmbedder, TimeoutEmbedder, client_for_app
 
 
+class BatchTrackingEmbedder(DeterministicEmbedder):
+    def __init__(self, dim: int = 8) -> None:
+        super().__init__(dim=dim)
+        self.embed_calls = 0
+        self.embed_batch_calls: list[int] = []
+
+    def embed(self, text: str) -> list[float]:
+        self.embed_calls += 1
+        return super().embed(text)
+
+    def embed_batch(self, texts: list[str]) -> list[list[float]]:
+        self.embed_batch_calls.append(len(texts))
+        return [DeterministicEmbedder.embed(self, text) for text in texts]
+
+
 @pytest.mark.asyncio
 async def test_pending_write_reindexes_to_embedded(app_factory) -> None:
     app = app_factory(embedder=TimeoutEmbedder())
@@ -30,3 +45,29 @@ async def test_pending_write_reindexes_to_embedded(app_factory) -> None:
         get_response = await client.get(f"/v1/memory/{payload['entry_id']}")
         assert get_response.status_code == 200
         assert get_response.json()["entry"]["sync_status"] == "embedded"
+
+
+@pytest.mark.asyncio
+async def test_reindex_uses_embed_batch_for_pending_backlog(app_factory) -> None:
+    app = app_factory(embedder=TimeoutEmbedder())
+    async with client_for_app(app) as client:
+        for index in range(3):
+            response = await client.post(
+                "/v1/memory/write",
+                json={
+                    "agent_id": "codex",
+                    "namespace": "shared",
+                    "type": "note",
+                    "content": f"pending batch entry {index}",
+                },
+            )
+            assert response.status_code == 202
+
+        tracking = BatchTrackingEmbedder(dim=app.state.settings.vector_dim)
+        app.state.runtime.embedder = tracking
+
+        reindex_response = await client.post("/v1/admin/reindex")
+        assert reindex_response.status_code == 200
+        assert reindex_response.json()["embedded"] == 3
+        assert tracking.embed_batch_calls == [3]
+        assert tracking.embed_calls == 0

From 70732177f5af556499aad958019e4d10ff8508c1 Mon Sep 17 00:00:00 2001
From: MakiforDevelop <makifordevelop@gmail.com>
Date: Sun, 19 Apr 2026 15:01:15 +0800
Subject: [PATCH 09/17] =?UTF-8?q?[backlog]=20Docker=20=E6=94=B9=E7=82=BA?=
 =?UTF-8?q?=20source=20build=20sqlite-vec?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 Dockerfile                              | 7 +++++++
 src/memory_hall/storage/vector_store.py | 1 +
 2 files changed, 8 insertions(+)

diff --git a/Dockerfile b/Dockerfile
index 692ce2c..98cd280 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -7,6 +7,11 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
     UV_LINK_MODE=copy \
     UV_COMPILE_BYTECODE=1
 
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        build-essential \
+        libsqlite3-dev \
+    && rm -rf /var/lib/apt/lists/*
+
 COPY --from=ghcr.io/astral-sh/uv:0.5.11 /uv /bin/uv
 
 # Use /app as workdir so venv shebangs point to /app/.venv/bin/python,
@@ -20,6 +25,8 @@ RUN --mount=type=cache,target=/root/.cache/uv \
     uv sync --frozen --no-dev --extra ollama 2>/dev/null || \
     uv sync --no-dev --extra ollama
 
+RUN /app/.venv/bin/pip install --no-binary=sqlite-vec --force-reinstall sqlite-vec==0.1.6
+
 
 FROM python:3.12-slim AS runtime
 
diff --git a/src/memory_hall/storage/vector_store.py b/src/memory_hall/storage/vector_store.py
index 96ffd5d..2acbae7 100644
--- a/src/memory_hall/storage/vector_store.py
+++ b/src/memory_hall/storage/vector_store.py
@@ -164,6 +164,7 @@ def _try_load_vec0(self, connection: sqlite3.Connection) -> bool:
                 "sqlite_vec extension load failed (%s); falling back to brute-force", exc
             )
             return False
+        logger.info("sqlite_vec vec0 extension loaded")
         return True
 
     def _init_vec0_table(self, connection: sqlite3.Connection) -> None:

From 4d1dab533a6f3a7d86c0ffff29da2019b985e940 Mon Sep 17 00:00:00 2001
From: MakiforDevelop <makifordevelop@gmail.com>
Date: Sun, 19 Apr 2026 15:02:37 +0800
Subject: [PATCH 10/17] =?UTF-8?q?[backlog]=20healthcheck=20=E6=94=B9?=
 =?UTF-8?q?=E8=AE=80=E5=BF=AB=E5=8F=96=E7=8B=80=E6=85=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/memory_hall/server/app.py | 48 +++++++++++++++++++++++++++++++----
 tests/test_smoke.py           | 25 ++++++++++++++++++
 2 files changed, 68 insertions(+), 5 deletions(-)

diff --git a/src/memory_hall/server/app.py b/src/memory_hall/server/app.py
index 7d86d0f..030a7ba 100644
--- a/src/memory_hall/server/app.py
+++ b/src/memory_hall/server/app.py
@@ -45,6 +45,7 @@
 _CJK_RE = re.compile(r"[\u4e00-\u9fff]")
 _RRF_K = 60
 _BACKGROUND_REINDEX_INTERVAL_S = 120.0
+_HEALTH_PROBE_INTERVAL_S = 30.0
 _REINDEX_EMBED_BATCH_SIZE = 16
 
 logger = logging.getLogger(__name__)
@@ -89,18 +90,32 @@ def __init__(
         self._queue: asyncio.Queue[WriteJob | LinkJob | ReindexJob | None] | None = None
         self._worker: asyncio.Task[None] | None = None
         self._reindex_worker: asyncio.Task[None] | None = None
+        self._health_probe_worker: asyncio.Task[None] | None = None
         self._background_reindex_interval_s = _BACKGROUND_REINDEX_INTERVAL_S
         self._background_reindex_jitter_s = min(15.0, _BACKGROUND_REINDEX_INTERVAL_S * 0.1)
+        self._health_probe_interval_s = _HEALTH_PROBE_INTERVAL_S
+        self._health_cache = HealthResponse(
+            status="degraded",
+            storage="degraded",
+            vector_store="degraded",
+            embedder="degraded",
+        )
 
     async def start(self) -> None:
         self.settings.prepare_paths()
         await self.storage.open()
         self.vector_store.open()
+        await self._refresh_health_cache()
         self._queue = asyncio.Queue()
         self._worker = asyncio.create_task(self._consume_writes())
         self._reindex_worker = asyncio.create_task(self._run_background_reindex())
+        self._health_probe_worker = asyncio.create_task(self._run_health_probe())
 
     async def stop(self) -> None:
+        if self._health_probe_worker is not None:
+            self._health_probe_worker.cancel()
+            with suppress(asyncio.CancelledError):
+                await self._health_probe_worker
         if self._reindex_worker is not None:
             self._reindex_worker.cancel()
             with suppress(asyncio.CancelledError):
@@ -280,10 +295,23 @@ async def list_entries(
         )
 
     async def health(self) -> HealthResponse:
-        await self.storage.healthcheck()
-        await asyncio.to_thread(self.vector_store.healthcheck)
+        return self._health_cache
+
+    async def _refresh_health_cache(self) -> None:
         status = "ok"
+        storage_status = "ok"
+        vector_store_status = "ok"
         embedder_status = "ok"
+        try:
+            await self.storage.healthcheck()
+        except Exception:
+            status = "degraded"
+            storage_status = "degraded"
+        try:
+            await asyncio.to_thread(self.vector_store.healthcheck)
+        except Exception:
+            status = "degraded"
+            vector_store_status = "degraded"
         try:
             await asyncio.wait_for(
                 asyncio.to_thread(self.embedder.embed, "healthcheck"),
@@ -292,13 +320,23 @@ async def health(self) -> HealthResponse:
         except Exception:
             status = "degraded"
             embedder_status = "degraded"
-        return HealthResponse(
+        self._health_cache = HealthResponse(
             status=status,
-            storage="ok",
-            vector_store="ok",
+            storage=storage_status,
+            vector_store=vector_store_status,
             embedder=embedder_status,
         )
 
+    async def _run_health_probe(self) -> None:
+        while True:
+            await asyncio.sleep(self._health_probe_interval_s)
+            try:
+                await self._refresh_health_cache()
+            except asyncio.CancelledError:
+                raise
+            except Exception as exc:
+                logger.warning("health probe failed: %s", exc)
+
     async def audit(self) -> AuditResponse:
         payload = await self.storage.audit()
         return AuditResponse.model_validate(payload)
diff --git a/tests/test_smoke.py b/tests/test_smoke.py
index 2ed95a1..7eeb8af 100644
--- a/tests/test_smoke.py
+++ b/tests/test_smoke.py
@@ -8,6 +8,16 @@
 from tests.conftest import DeterministicEmbedder, TimeoutEmbedder, build_settings, client_for_app
 
 
+class CountingEmbedder(DeterministicEmbedder):
+    def __init__(self, dim: int = 8) -> None:
+        super().__init__(dim=dim)
+        self.embed_calls = 0
+
+    def embed(self, text: str) -> list[float]:
+        self.embed_calls += 1
+        return super().embed(text)
+
+
 @pytest.mark.asyncio
 async def test_health_returns_ok(app_factory) -> None:
     app = app_factory()
@@ -30,6 +40,21 @@ async def test_health_returns_degraded_when_embedder_unreachable(app_factory) ->
     assert payload["embedder"] == "degraded"
 
 
+@pytest.mark.asyncio
+async def test_health_reads_cached_status_without_reprobing_embedder(app_factory) -> None:
+    embedder = CountingEmbedder()
+    app = app_factory(embedder=embedder)
+    async with client_for_app(app) as client:
+        startup_calls = embedder.embed_calls
+        response = await client.get("/v1/health")
+        assert response.status_code == 200
+        assert embedder.embed_calls == startup_calls
+
+        response = await client.get("/v1/health")
+        assert response.status_code == 200
+        assert embedder.embed_calls == startup_calls
+
+
 @pytest.mark.asyncio
 async def test_write_rejects_oversized_content(tmp_path: Path) -> None:
     settings = build_settings(tmp_path)

From c4c604246a5ef05164912ccbbe24889b6ddf8aad Mon Sep 17 00:00:00 2001
From: MakiforDevelop <makifordevelop@gmail.com>
Date: Sun, 19 Apr 2026 15:03:31 +0800
Subject: [PATCH 11/17] =?UTF-8?q?[backlog]=20list=20endpoint=20=E4=B8=8A?=
 =?UTF-8?q?=E9=99=90=E6=8F=90=E9=AB=98=E5=88=B0=201000?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/memory_hall/cli/main.py             |  2 +-
 src/memory_hall/server/routes/memory.py |  2 +-
 tests/test_smoke.py                     | 11 +++++++++++
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/src/memory_hall/cli/main.py b/src/memory_hall/cli/main.py
index 78a792b..a3ce761 100644
--- a/src/memory_hall/cli/main.py
+++ b/src/memory_hall/cli/main.py
@@ -146,7 +146,7 @@ def get(
 
 @app.command()
 def tail(
-    limit: int = typer.Option(default=20, min=1, max=200),
+    limit: int = typer.Option(default=20, min=1, max=1000),
     interval_s: float = typer.Option(default=2.0, min=0.2),
     namespace: list[str] | None = typer.Option(default=None),
     agent_id: str | None = typer.Option(default=None),
diff --git a/src/memory_hall/server/routes/memory.py b/src/memory_hall/server/routes/memory.py
index 72fc634..6fdcdee 100644
--- a/src/memory_hall/server/routes/memory.py
+++ b/src/memory_hall/server/routes/memory.py
@@ -82,7 +82,7 @@ async def list_entries(
     agent_id: str | None = None,
     type: list[str] | None = Query(default=None),
     tags: list[str] | None = Query(default=None),
-    limit: int = Query(default=50, ge=1, le=200),
+    limit: int = Query(default=50, ge=1, le=1000),
     cursor: str | None = None,
 ) -> ListEntriesResponse:
     runtime = request.app.state.runtime
diff --git a/tests/test_smoke.py b/tests/test_smoke.py
index 7eeb8af..237f508 100644
--- a/tests/test_smoke.py
+++ b/tests/test_smoke.py
@@ -55,6 +55,17 @@ async def test_health_reads_cached_status_without_reprobing_embedder(app_factory
         assert embedder.embed_calls == startup_calls
 
 
+@pytest.mark.asyncio
+async def test_list_endpoint_accepts_limit_1000_and_rejects_1001(app_factory) -> None:
+    app = app_factory()
+    async with client_for_app(app) as client:
+        response = await client.get("/v1/memory", params={"limit": 1000})
+        assert response.status_code == 200
+
+        response = await client.get("/v1/memory", params={"limit": 1001})
+        assert response.status_code == 422
+
+
 @pytest.mark.asyncio
 async def test_write_rejects_oversized_content(tmp_path: Path) -> None:
     settings = build_settings(tmp_path)

From 85e0f4df9ad32721f18a2fe56f8215fa9629c580 Mon Sep 17 00:00:00 2001
From: MakiforDevelop <makifordevelop@gmail.com>
Date: Sun, 19 Apr 2026 15:05:06 +0800
Subject: [PATCH 12/17] =?UTF-8?q?[backlog]=20benchmark=20=E8=85=B3?=
 =?UTF-8?q?=E6=9C=AC=E8=A3=9C=20ruff=20=E4=BE=8B=E5=A4=96=E6=A8=99?=
 =?UTF-8?q?=E8=A8=BB?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/benchmarks/concurrency.py | 1 +
 docs/benchmarks/hit3_recall.py | 1 +
 docs/benchmarks/race.py        | 1 +
 3 files changed, 3 insertions(+)

diff --git a/docs/benchmarks/concurrency.py b/docs/benchmarks/concurrency.py
index 6b7b809..a63c3b8 100644
--- a/docs/benchmarks/concurrency.py
+++ b/docs/benchmarks/concurrency.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python3
+# ruff: noqa: I001, E501
 from __future__ import annotations
 
 import argparse
diff --git a/docs/benchmarks/hit3_recall.py b/docs/benchmarks/hit3_recall.py
index 069f4b7..8c561b2 100644
--- a/docs/benchmarks/hit3_recall.py
+++ b/docs/benchmarks/hit3_recall.py
@@ -9,6 +9,7 @@
 
 Dependency: stdlib only.
 """
+# ruff: noqa: E501, S310
 from __future__ import annotations
 
 import json
diff --git a/docs/benchmarks/race.py b/docs/benchmarks/race.py
index 56cb060..3d6eaaf 100644
--- a/docs/benchmarks/race.py
+++ b/docs/benchmarks/race.py
@@ -6,6 +6,7 @@
 - Exactly 1 response has created=True, 9 have created=False
 - No HTTP 500, no unique-constraint error
 """
+# ruff: noqa: S310
 from __future__ import annotations
 
 import json

From 81c608e353d1ffa163fb4015893291487ad0deaf Mon Sep 17 00:00:00 2001
From: MakiforDevelop <makifordevelop@gmail.com>
Date: Sun, 19 Apr 2026 15:28:10 +0800
Subject: [PATCH 13/17] =?UTF-8?q?[fix]=20Dockerfile=20sqlite-vec=20?=
 =?UTF-8?q?=E6=94=B9=E7=94=A8=20uv=20pip=20install=20=E8=A3=9D=20source=20?=
 =?UTF-8?q?build?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index 98cd280..4589d3b 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -25,7 +25,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
     uv sync --frozen --no-dev --extra ollama 2>/dev/null || \
     uv sync --no-dev --extra ollama
 
-RUN /app/.venv/bin/pip install --no-binary=sqlite-vec --force-reinstall sqlite-vec==0.1.6
+RUN uv pip install --python /app/.venv/bin/python --reinstall --no-binary sqlite-vec 'sqlite-vec==0.1.6'
 
 
 FROM python:3.12-slim AS runtime

From 676bbcf046924764a5a53cbebf2a793b2fa51da3 Mon Sep 17 00:00:00 2001
From: MakiforDevelop <makifordevelop@gmail.com>
Date: Sun, 19 Apr 2026 15:51:27 +0800
Subject: [PATCH 14/17] =?UTF-8?q?revert:=20=E7=A7=BB=E9=99=A4=20sqlite-vec?=
 =?UTF-8?q?=20=E7=9A=84=20uv=20pip=20source-build=20override?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index 4589d3b..98cd280 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -25,7 +25,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
     uv sync --frozen --no-dev --extra ollama 2>/dev/null || \
     uv sync --no-dev --extra ollama
 
-RUN uv pip install --python /app/.venv/bin/python --reinstall --no-binary sqlite-vec 'sqlite-vec==0.1.6'
+RUN /app/.venv/bin/pip install --no-binary=sqlite-vec --force-reinstall sqlite-vec==0.1.6
 
 
 FROM python:3.12-slim AS runtime

From 5e7a21935a964831ec4467c825e25821161b6b93 Mon Sep 17 00:00:00 2001
From: MakiforDevelop <makifordevelop@gmail.com>
Date: Sun, 19 Apr 2026 15:51:31 +0800
Subject: [PATCH 15/17] =?UTF-8?q?revert:=20=E7=A7=BB=E9=99=A4=20sqlite-vec?=
 =?UTF-8?q?=20=E7=9A=84=20Docker=20source-build=20workaround?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 Dockerfile | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 98cd280..692ce2c 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -7,11 +7,6 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
     UV_LINK_MODE=copy \
     UV_COMPILE_BYTECODE=1
 
-RUN apt-get update && apt-get install -y --no-install-recommends \
-        build-essential \
-        libsqlite3-dev \
-    && rm -rf /var/lib/apt/lists/*
-
 COPY --from=ghcr.io/astral-sh/uv:0.5.11 /uv /bin/uv
 
 # Use /app as workdir so venv shebangs point to /app/.venv/bin/python,
@@ -25,8 +20,6 @@ RUN --mount=type=cache,target=/root/.cache/uv \
     uv sync --frozen --no-dev --extra ollama 2>/dev/null || \
     uv sync --no-dev --extra ollama
 
-RUN /app/.venv/bin/pip install --no-binary=sqlite-vec --force-reinstall sqlite-vec==0.1.6
-
 
 FROM python:3.12-slim AS runtime
 

From 65031913851c0bc1f53252bc104cad74f5620a69 Mon Sep 17 00:00:00 2001
From: MakiforDevelop <makifordevelop@gmail.com>
Date: Sun, 19 Apr 2026 15:56:01 +0800
Subject: [PATCH 16/17] =?UTF-8?q?build:=20=E5=8D=87=E7=B4=9A=20sqlite-vec?=
 =?UTF-8?q?=20=E5=88=B0=200.1.9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 pyproject.toml |  4 ++--
 uv.lock        | 16 ++++++++--------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 6a12f4f..ef98174 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -27,7 +27,7 @@ dependencies = [
     "pydantic-settings>=2.6",
     "httpx>=0.27",
     "jieba>=0.42.1",
-    "sqlite-vec==0.1.6",
+    "sqlite-vec==0.1.9",
     "typer>=0.13",
     "rich>=13.9",
     "ulid-py>=1.1",
@@ -35,7 +35,7 @@ dependencies = [
 ]
 
 [project.optional-dependencies]
-sqlite-vec = ["sqlite-vec>=0.1.6"]
+sqlite-vec = ["sqlite-vec==0.1.9"]
 qdrant = ["qdrant-client>=1.12"]
 ollama = ["ollama>=0.4"]
 openai = ["openai>=1.50"]
diff --git a/uv.lock b/uv.lock
index 83e72fd..d8dae73 100644
--- a/uv.lock
+++ b/uv.lock
@@ -713,8 +713,8 @@ requires-dist = [
     { name = "rich", specifier = ">=13.9" },
     { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.7" },
     { name = "sentence-transformers", marker = "extra == 'sentence-transformers'", specifier = ">=3.0" },
-    { name = "sqlite-vec", specifier = "==0.1.6" },
-    { name = "sqlite-vec", marker = "extra == 'sqlite-vec'", specifier = ">=0.1.6" },
+    { name = "sqlite-vec", specifier = "==0.1.9" },
+    { name = "sqlite-vec", marker = "extra == 'sqlite-vec'", specifier = "==0.1.9" },
     { name = "typer", specifier = ">=0.13" },
     { name = "ulid-py", specifier = ">=1.1" },
     { name = "uvicorn", extras = ["standard"], specifier = ">=0.32" },
@@ -1628,14 +1628,14 @@ wheels = [
 
 [[package]]
 name = "sqlite-vec"
-version = "0.1.6"
+version = "0.1.9"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/88/ed/aabc328f29ee6814033d008ec43e44f2c595447d9cccd5f2aabe60df2933/sqlite_vec-0.1.6-py3-none-macosx_10_6_x86_64.whl", hash = "sha256:77491bcaa6d496f2acb5cc0d0ff0b8964434f141523c121e313f9a7d8088dee3", size = 164075, upload-time = "2024-11-20T16:40:29.847Z" },
-    { url = "https://files.pythonhosted.org/packages/a7/57/05604e509a129b22e303758bfa062c19afb020557d5e19b008c64016704e/sqlite_vec-0.1.6-py3-none-macosx_11_0_arm64.whl", hash = "sha256:fdca35f7ee3243668a055255d4dee4dea7eed5a06da8cad409f89facf4595361", size = 165242, upload-time = "2024-11-20T16:40:31.206Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/48/dbb2cc4e5bad88c89c7bb296e2d0a8df58aab9edc75853728c361eefc24f/sqlite_vec-0.1.6-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b0519d9cd96164cd2e08e8eed225197f9cd2f0be82cb04567692a0a4be02da3", size = 103704, upload-time = "2024-11-20T16:40:33.729Z" },
-    { url = "https://files.pythonhosted.org/packages/80/76/97f33b1a2446f6ae55e59b33869bed4eafaf59b7f4c662c8d9491b6a714a/sqlite_vec-0.1.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux1_x86_64.whl", hash = "sha256:823b0493add80d7fe82ab0fe25df7c0703f4752941aee1c7b2b02cec9656cb24", size = 151556, upload-time = "2024-11-20T16:40:35.387Z" },
-    { url = "https://files.pythonhosted.org/packages/6a/98/e8bc58b178266eae2fcf4c9c7a8303a8d41164d781b32d71097924a6bebe/sqlite_vec-0.1.6-py3-none-win_amd64.whl", hash = "sha256:c65bcfd90fa2f41f9000052bcb8bb75d38240b2dae49225389eca6c3136d3f0c", size = 281540, upload-time = "2024-11-20T16:40:37.296Z" },
+    { url = "https://files.pythonhosted.org/packages/68/85/9fad0045d8e7c8df3e0fa5a56c630e8e15ad6e5ca2e6106fceb666aa6638/sqlite_vec-0.1.9-py3-none-macosx_10_6_x86_64.whl", hash = "sha256:1b62a7f0a060d9475575d4e599bbf94a13d85af896bc1ce86ee80d1b5b48e5fb", size = 131200, upload-time = "2026-03-31T00:00:00Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/3d/3677e0cd2f92e5ebc43cd29fbf565b75582bff1ccfa0b8327c7508e1084f/sqlite_vec-0.1.9-py3-none-macosx_11_0_arm64.whl", hash = "sha256:1d52e30513bae4cc9778ddbf6145610434081be4c3afe57cd877893bad9f6b6c", size = 165400, upload-time = "2026-03-31T00:00:00Z" },
+    { url = "https://files.pythonhosted.org/packages/00/d4/f2b936d3bdc38eadcbd2a87875815db36430fab0363182ba5d12cd8e0b51/sqlite_vec-0.1.9-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e921e592f24a5f9a18f590b6ddd530eb637e2d474e3b1972f9bbeb773aa3cb9", size = 160100, upload-time = "2026-03-31T00:00:00Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/ad/6afd073b0f817b3e03f9e37ad626ae341805891f23c74b5292818f49ac63/sqlite_vec-0.1.9-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux1_x86_64.whl", hash = "sha256:1515727990b49e79bcaf75fdee2ffc7d461f8b66905013231251f1c8938e7786", size = 163400, upload-time = "2026-03-31T00:00:00Z" },
+    { url = "https://files.pythonhosted.org/packages/42/89/81b2907cda14e566b9bf215e2ad82fc9b349edf07d2010756ffdb902f328/sqlite_vec-0.1.9-py3-none-win_amd64.whl", hash = "sha256:4a28dc12fa4b53d7b1dced22da2488fade444e96b5d16fd2d698cd670675cf32", size = 292800, upload-time = "2026-03-31T00:00:00Z" },
 ]
 
 [[package]]

From 00a712066fc91d08569c628e83b8a24c469fb8b9 Mon Sep 17 00:00:00 2001
From: MakiforDevelop <makifordevelop@gmail.com>
Date: Sun, 19 Apr 2026 15:56:06 +0800
Subject: [PATCH 17/17] =?UTF-8?q?build:=20Dockerfile=20=E5=8A=A0=E5=85=A5?=
 =?UTF-8?q?=20vec0=20=E8=BC=89=E5=85=A5=20smoke=20test?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 Dockerfile | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/Dockerfile b/Dockerfile
index 692ce2c..c01ca7c 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -20,6 +20,14 @@ RUN --mount=type=cache,target=/root/.cache/uv \
     uv sync --frozen --no-dev --extra ollama 2>/dev/null || \
     uv sync --no-dev --extra ollama
 
+RUN /app/.venv/bin/python -c "\
+import sqlite3, sqlite_vec; \
+c = sqlite3.connect(':memory:'); \
+c.enable_load_extension(True); \
+sqlite_vec.load(c); \
+v = c.execute('SELECT vec_version()').fetchone()[0]; \
+print(f'vec0 OK, version={v}')"
+
 
 FROM python:3.12-slim AS runtime