Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,14 @@ RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --frozen --no-dev --extra ollama 2>/dev/null || \
uv sync --no-dev --extra ollama

RUN /app/.venv/bin/python -c "\
import sqlite3, sqlite_vec; \
c = sqlite3.connect(':memory:'); \
c.enable_load_extension(True); \
sqlite_vec.load(c); \
v = c.execute('SELECT vec_version()').fetchone()[0]; \
print(f'vec0 OK, version={v}')"


FROM python:3.12-slim AS runtime

Expand Down
1 change: 1 addition & 0 deletions docs/benchmarks/concurrency.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/usr/bin/env python3
# ruff: noqa: I001, E501
from __future__ import annotations

import argparse
Expand Down
29 changes: 25 additions & 4 deletions docs/benchmarks/hit3_recall.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,12 @@

Dependency: stdlib only.
"""
# ruff: noqa: E501, S310
from __future__ import annotations

import json
import math
import time
import urllib.request

BASE_URL = "http://localhost:9100"
Expand All @@ -32,7 +35,7 @@
]


def search(query: str, mode: str = "hybrid", k: int = 3) -> list[str]:
def search(query: str, mode: str = "hybrid", k: int = 3) -> tuple[list[str], float]:
body = json.dumps({
"query": query,
"limit": k,
Expand All @@ -45,27 +48,45 @@ def search(query: str, mode: str = "hybrid", k: int = 3) -> list[str]:
headers={"Content-Type": "application/json"},
method="POST",
)
started = time.perf_counter()
with urllib.request.urlopen(req, timeout=30) as resp:
d = json.loads(resp.read())
return [r["entry"]["entry_id"] for r in d.get("results", [])]
elapsed_ms = (time.perf_counter() - started) * 1000.0
return [r["entry"]["entry_id"] for r in d.get("results", [])], elapsed_ms


def bench(mode: str) -> float:
hits = 0
latencies_ms: list[float] = []
print(f"\n=== mode={mode} ===")
for p in PAIRS:
top3 = search(p["q"], mode=mode, k=3)
top3, elapsed_ms = search(p["q"], mode=mode, k=3)
latencies_ms.append(elapsed_ms)
hit = p["expect"] in top3
if hit:
hits += 1
pos = top3.index(p["expect"]) + 1 if hit else "miss"
mark = "✓" if hit else "✗"
print(f" [{mark}] pos={str(pos):>4} | q={p['q']!r:40} | {p['note']}")
score = hits / len(PAIRS)
print(f"Hit@3 ({mode}): {hits}/{len(PAIRS)} = {score * 100:.0f}%")
p50 = _percentile(latencies_ms, 50)
p95 = _percentile(latencies_ms, 95)
p99 = _percentile(latencies_ms, 99)
print(
f"Hit@3 ({mode}): {hits}/{len(PAIRS)} = {score * 100:.0f}%"
f" | latency p50/p95/p99 = {p50:.1f}/{p95:.1f}/{p99:.1f} ms"
)
return score


def _percentile(samples: list[float], percentile: int) -> float:
if not samples:
return 0.0
ordered = sorted(samples)
index = max(0, math.ceil((percentile / 100) * len(ordered)) - 1)
return ordered[index]


if __name__ == "__main__":
for mode in ("hybrid", "semantic", "lexical"):
bench(mode)
1 change: 1 addition & 0 deletions docs/benchmarks/race.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
- Exactly 1 response has created=True, 9 have created=False
- No HTTP 500, no unique-constraint error
"""
# ruff: noqa: S310
from __future__ import annotations

import json
Expand Down
105 changes: 105 additions & 0 deletions docs/benchmarks/results-2026-04-19.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# v0.2 jieba benchmark — 2026-04-19

Same Hit@3 methodology as `docs/benchmarks/hit3_recall.py`, but rerun against the repo-local hall in `.data/` because the 2026-04-18 primary corpus (177 entries on mini primary) is not available inside this workspace sandbox.

## Environment

- Host: local repo workspace on macOS
- App: in-process ASGI app via `httpx.ASGITransport`
- Storage: `.data/memory-hall.sqlite3`
- Vector store: `.data/memory-hall-vectors.sqlite3`
- Hall size at test time: 6 entries
- FTS migration step before benchmark: `UV_CACHE_DIR=/tmp/uv-cache uv run --no-sync mh reindex-fts --database-path .data/memory-hall.sqlite3`

## Corpus caveat

- This is **not** the same 177-entry primary hall from `results-2026-04-18.md`, so the raw percentage is not apples-to-apples with the frozen v0.1 baseline.
- What this run does verify: the new pre-tokenized FTS path now hits short pure-CJK lexical queries like `撞牆` on an existing hall after an FTS-only rebuild.

## Ground-truth pairs used in this rerun

| Query | Expected entry_id | Note |
|---|---|---|
| `撞牆` | `01KPG8QXEWH12WETTRG5ZX09JR` | pure CJK short substring |
| `Dockerfile clone 即跑` | `01KPG8QXEWH12WETTRG5ZX09JR` | mixed CJK + English |
| `sticky note` | `01KPG8RVW0EX40C92106YCN6AQ` | English phrase in mixed entry |
| `筆記本 view` | `01KPG8RVW0EX40C92106YCN6AQ` | mixed CJK + English phrase |
| `桌面抽屜` | `01KPG96JV68YJ1H0Y2DE78ESDC` | pure CJK phrase |
| `找回來 列表` | `01KPG96JV68YJ1H0Y2DE78ESDC` | pure CJK paraphrase |
| `說了就記住` | `01KPG9SSYCNMJJZFEAAYP3H3M8` | pure CJK phrase |
| `30 秒 很煩人` | `01KPG9SSYCNMJJZFEAAYP3H3M8` | mixed CJK + numeric |
| `hybrid` | `01KPG9Y7P3J2GNN7WGH6XRD6GD` | English keyword in mixed entry |
| `留下足跡` | `01KPG9Y7P3J2GNN7WGH6XRD6GD` | pure CJK phrase |

## Hit@3

| Mode | Hit@3 | Latency p50 / p95 / p99 |
|---|---|---|
| hybrid | **10/10 = 100%** | **5.1 / 317.6 / 317.6 ms** |
| lexical | **10/10 = 100%** | **1.7 / 2.1 / 2.1 ms** |
| semantic | **0/10 = 0%** | **3.9 / 5.2 / 5.2 ms** |

## Raw run output

```text
=== mode=hybrid ===
[✓] pos= 1 | q='撞牆' | pure CJK short substring
[✓] pos= 1 | q='Dockerfile clone 即跑' | mixed CJK + English
[✓] pos= 1 | q='sticky note' | English phrase in mixed entry
[✓] pos= 1 | q='筆記本 view' | mixed CJK + English phrase
[✓] pos= 1 | q='桌面抽屜' | pure CJK phrase
[✓] pos= 1 | q='找回來 列表' | pure CJK paraphrase
[✓] pos= 1 | q='說了就記住' | pure CJK phrase
[✓] pos= 1 | q='30 秒 很煩人' | mixed CJK + numeric
[✓] pos= 1 | q='hybrid' | English keyword in mixed entry
[✓] pos= 1 | q='留下足跡' | pure CJK phrase
Hit@3 (hybrid): 10/10 = 100% | latency p50/p95/p99 = 5.1/317.6/317.6 ms

=== mode=semantic ===
[✗] pos=miss | q='撞牆' | pure CJK short substring
[✗] pos=miss | q='Dockerfile clone 即跑' | mixed CJK + English
[✗] pos=miss | q='sticky note' | English phrase in mixed entry
[✗] pos=miss | q='筆記本 view' | mixed CJK + English phrase
[✗] pos=miss | q='桌面抽屜' | pure CJK phrase
[✗] pos=miss | q='找回來 列表' | pure CJK paraphrase
[✗] pos=miss | q='說了就記住' | pure CJK phrase
[✗] pos=miss | q='30 秒 很煩人' | mixed CJK + numeric
[✗] pos=miss | q='hybrid' | English keyword in mixed entry
[✗] pos=miss | q='留下足跡' | pure CJK phrase
Hit@3 (semantic): 0/10 = 0% | latency p50/p95/p99 = 3.9/5.2/5.2 ms

=== mode=lexical ===
[✓] pos= 1 | q='撞牆' | pure CJK short substring
[✓] pos= 1 | q='Dockerfile clone 即跑' | mixed CJK + English
[✓] pos= 1 | q='sticky note' | English phrase in mixed entry
[✓] pos= 1 | q='筆記本 view' | mixed CJK + English phrase
[✓] pos= 1 | q='桌面抽屜' | pure CJK phrase
[✓] pos= 1 | q='找回來 列表' | pure CJK paraphrase
[✓] pos= 1 | q='說了就記住' | pure CJK phrase
[✓] pos= 1 | q='30 秒 很煩人' | mixed CJK + numeric
[✓] pos= 1 | q='hybrid' | English keyword in mixed entry
[✓] pos= 1 | q='留下足跡' | pure CJK phrase
Hit@3 (lexical): 10/10 = 100% | latency p50/p95/p99 = 1.7/2.1/2.1 ms
```

## Interpretation

- The lexical failure mode called out in `results-2026-04-18.md` is gone on this hall: short pure-CJK query `撞牆` now resolves through FTS after rebuild.
- Hybrid is still effectively lexical on this tiny corpus; semantic-only remains unhelpful for short queries.
- The new latency columns make the cold-path cost visible: hybrid p95/p99 are dominated by the first run's `jieba` dictionary load, while steady-state lexical calls stay near 2 ms on this corpus.
- A true acceptance rerun against the 2026-04-18 primary corpus still needs to happen on the target hall to confirm the `>= 75%` gate on the original workload.

## Cleanup follow-up measurements

### A. jieba lazy load

- `env UV_CACHE_DIR=/tmp/uv-cache uv run --no-sync mh --help`
- before function-local import: `real 0.33s`
- after function-local import: `real 0.25s`
- qualitative result: `mh --help` no longer imports `jieba` on cold start, so the previous `pkg_resources` warning also disappears from this path.

### C. `mh reindex-fts` cursor streaming

- synthetic CLI verification: `2000` entries streamed with `batch_size=500`
- observed batch shape: `500 + 500 + 500 + 500` (then one empty fetch to terminate)
- regression guard: `tests/test_cli_reindex.py` asserts `_reindex_fts()` never falls back to `limit=None`
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,16 @@ dependencies = [
"pydantic>=2.9",
"pydantic-settings>=2.6",
"httpx>=0.27",
"sqlite-vec==0.1.6",
"jieba>=0.42.1",
"sqlite-vec==0.1.9",
"typer>=0.13",
"rich>=13.9",
"ulid-py>=1.1",
"python-multipart>=0.0.12",
]

[project.optional-dependencies]
sqlite-vec = ["sqlite-vec>=0.1.6"]
sqlite-vec = ["sqlite-vec==0.1.9"]
qdrant = ["qdrant-client>=1.12"]
ollama = ["ollama>=0.4"]
openai = ["openai>=1.50"]
Expand Down
53 changes: 52 additions & 1 deletion src/memory_hall/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from __future__ import annotations

import asyncio
import json
import time
from pathlib import Path
Expand All @@ -15,7 +16,9 @@
from rich.table import Table

from memory_hall.config import Settings
from memory_hall.models import encode_cursor
from memory_hall.server.app import create_app
from memory_hall.storage.sqlite_store import SqliteStore

app = typer.Typer(no_args_is_help=True, add_completion=False)
console = Console()
Expand Down Expand Up @@ -143,7 +146,7 @@ def get(

@app.command()
def tail(
limit: int = typer.Option(default=20, min=1, max=200),
limit: int = typer.Option(default=20, min=1, max=1000),
interval_s: float = typer.Option(default=2.0, min=0.2),
namespace: list[str] | None = typer.Option(default=None),
agent_id: str | None = typer.Option(default=None),
Expand Down Expand Up @@ -171,3 +174,51 @@ def tail(
seen.add(item["entry_id"])
console.print(f"[{item['created_at']}] {item['entry_id']} {item['content']}")
time.sleep(interval_s)


@app.command("reindex-fts")
def reindex_fts(
tenant_id: str | None = typer.Option(default=None),
batch_size: int = typer.Option(default=500, min=1, max=5000),
database_path: Path | None = typer.Option(default=None),
) -> None:
asyncio.run(
_reindex_fts(
tenant_id=tenant_id,
batch_size=batch_size,
database_path=database_path,
)
)


async def _reindex_fts(
*,
tenant_id: str | None,
batch_size: int,
database_path: Path | None,
) -> None:
settings = _settings()
if database_path is not None:
settings.database_path = database_path
active_tenant_id = tenant_id or settings.default_tenant_id
store = SqliteStore(settings.database_path)
await store.open()
try:
scanned = 0
reindexed = 0
cursor: str | None = None
while True:
batch = await store.list_entries(
active_tenant_id,
limit=batch_size,
cursor=cursor,
)
if not batch:
break
scanned += len(batch)
reindexed += await store.reindex_fts_entries(batch)
tail = batch[-1]
cursor = encode_cursor(tail.created_at, tail.entry_id)
console.print(f"tenant={active_tenant_id} scanned={scanned} reindexed={reindexed}")
finally:
await store.close()
Loading
Loading