Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/brainlayer/enrichment_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,10 @@ def get_unsubmitted_export_files(*args, **kwargs):

def _get_gemini_client():
"""Create Gemini client. Uses regional endpoint when GOOGLE_CLOUD_REGION is set."""
from google import genai
try:
from google import genai
except ImportError:
raise RuntimeError("google-genai package not installed. Install with: pip install google-genai")

api_key = os.environ.get("GOOGLE_API_KEY") or os.environ.get("GOOGLE_GENERATIVE_AI_API_KEY")
if not api_key:
Expand Down
76 changes: 38 additions & 38 deletions tests/test_enrichment_controller.py
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟢 Low

def test_gemini_client_requires_api_key(monkeypatch):
from brainlayer.enrichment_controller import _get_gemini_client
monkeypatch.delenv("GOOGLE_API_KEY", raising=False)
monkeypatch.delenv("GOOGLE_GENERATIVE_AI_API_KEY", raising=False)
with pytest.raises(RuntimeError, match="not set|not installed"):
_get_gemini_client()

test_gemini_client_requires_api_key uses match="not set|not installed" which makes the test pass for two unrelated error conditions. When google-genai is not installed, the ImportError is raised before the API key check at lines 91-93 is ever reached, so the test passes without verifying that the API key validation logic works. Consider splitting this into two tests: one that mocks a failed import to verify the import error, and another that patches google.genai with an empty environment to verify the API key check specifically.

-    with pytest.raises(RuntimeError, match="not set|not installed"):
+    with pytest.raises(RuntimeError, match="not set"):
         _get_gemini_client()
🚀 Reply "fix it for me" or copy this AI Prompt for your agent:
In file tests/test_enrichment_controller.py around lines 434-440:

`test_gemini_client_requires_api_key` uses `match="not set|not installed"` which makes the test pass for two unrelated error conditions. When `google-genai` is not installed, the `ImportError` is raised before the API key check at lines 91-93 is ever reached, so the test passes without verifying that the API key validation logic works. Consider splitting this into two tests: one that mocks a failed import to verify the import error, and another that patches `google.genai` with an empty environment to verify the API key check specifically.

Evidence trail:
tests/test_enrichment_controller.py lines 434-441 (REVIEWED_COMMIT): Test `test_gemini_client_requires_api_key` uses `match="not set|not installed"`. src/brainlayer/enrichment_controller.py lines 82-94 (REVIEWED_COMMIT): `_get_gemini_client()` has ImportError handler at lines 87-89 raising "not installed" error, and API key check at lines 91-93 raising "not set" error. The ImportError is caught before the API key check is reached.

Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
Target: 35+ tests per A-R2 acceptance criteria.
"""

import json
from types import SimpleNamespace
from unittest.mock import MagicMock

Expand Down Expand Up @@ -251,19 +250,22 @@ def test_enrich_local_does_not_call_build_external_prompt(monkeypatch):
external_prompt.assert_not_called()


def test_enrich_batch_uses_checkpoint_db_for_resume(monkeypatch):
def test_enrich_batch_returns_early_for_no_candidates(monkeypatch):
from brainlayer import enrichment_controller as controller

store = MagicMock()
ensure_mock = MagicMock()
monkeypatch.setattr(controller, "ensure_checkpoint_table", ensure_mock)
monkeypatch.setattr(controller, "get_pending_jobs", MagicMock(return_value=[]))
monkeypatch.setattr(controller, "get_unsubmitted_export_files", MagicMock(return_value=[]))
store.get_enrichment_candidates.return_value = []

# _get_gemini_client should never be reached when there are no candidates
monkeypatch.setattr(
controller, "_get_gemini_client", lambda: (_ for _ in ()).throw(AssertionError("should not be called"))
)

result = controller.enrich_batch(store, phase="run", limit=100)
result = controller.enrich_batch(store, limit=100)

ensure_mock.assert_called_once_with(store)
store.get_enrichment_candidates.assert_called_once_with(limit=100, chunk_ids=None)
assert result.mode == "batch"
assert result.enriched == 0


# ── Content-hash dedup tests ─────────────────────────────────────────────────
Expand Down Expand Up @@ -434,7 +436,7 @@ def test_gemini_client_requires_api_key(monkeypatch):

monkeypatch.delenv("GOOGLE_API_KEY", raising=False)
monkeypatch.delenv("GOOGLE_GENERATIVE_AI_API_KEY", raising=False)
with pytest.raises(RuntimeError, match="not set"):
with pytest.raises(RuntimeError, match="not set|not installed"):
_get_gemini_client()


Expand Down Expand Up @@ -762,8 +764,10 @@ async def test_brain_enrich_handler_stats_mode(monkeypatch):

result = await _brain_enrich(stats=True)
assert result.isError is not True
data = json.loads(result.content[0].text)
assert "total_chunks" in data
text = result.content[0].text
# _enrich_stats returns formatted text with box-drawing chars, not JSON
assert "Total:" in text
assert "Enriched:" in text


@pytest.mark.asyncio
Expand All @@ -777,51 +781,47 @@ async def test_enrich_stats_returns_correct_structure():
store._read_cursor.return_value = cursor

result = await _enrich_stats(store)
data = json.loads(result.content[0].text)
text = result.content[0].text

assert data["total_chunks"] == 1000
assert data["enriched"] == 600
assert data["unenriched_eligible"] == 350
assert data["skipped_too_short"] == 50
assert data["enriched_pct"] == 60.0
assert data["enriched_last_24h"] == 20
# _enrich_stats returns formatted text lines, not JSON
assert "Total: 1,000" in text
assert "Enriched: 600" in text
assert "(60.0%)" in text
assert "Remaining: 350" in text
assert "Skipped: 50" in text
assert "Last 24h: 20" in text


# ── Batch mode tests ─────────────────────────────────────────────────────────


def test_enrich_batch_poll_phase_only_checks_pending(monkeypatch):
def test_enrich_batch_processes_candidates_with_gemini(monkeypatch):
from brainlayer import enrichment_controller as controller

store = MagicMock()
monkeypatch.setattr(controller, "ensure_checkpoint_table", MagicMock())
pending_mock = MagicMock(return_value=[{"id": "job1"}])
monkeypatch.setattr(controller, "get_pending_jobs", pending_mock)
export_mock = MagicMock(return_value=[])
monkeypatch.setattr(controller, "get_unsubmitted_export_files", export_mock)
store.get_enrichment_candidates.return_value = [_candidate("c1"), _candidate("c2")]
_patch_realtime_deps(monkeypatch, controller, store)

result = controller.enrich_batch(store, phase="poll")
result = controller.enrich_batch(store, limit=10)

pending_mock.assert_called_once()
export_mock.assert_not_called()
assert result.attempted == 1
assert result.mode == "batch"
assert result.attempted == 2
assert result.enriched == 2
Comment on lines +798 to +809
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick | 🔵 Trivial

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Check if _is_duplicate_content is called in enrich_batch
ast-grep --pattern $'def enrich_batch($$$):
  $$$
  _is_duplicate_content($$$)
  $$$
'

Repository: EtanHey/brainlayer

Length of output: 44


🏁 Script executed:

# First, find the enrichment_controller.py file and examine enrich_batch implementation
fd enrichment_controller.py -type f

Repository: EtanHey/brainlayer

Length of output: 232


🏁 Script executed:

# Search for _is_duplicate_content calls throughout the codebase
rg "_is_duplicate_content" -n --context=3

Repository: EtanHey/brainlayer

Length of output: 5915


🏁 Script executed:

# Check the test file to see what's actually being tested
fd test_enrichment_controller.py -type f

Repository: EtanHey/brainlayer

Length of output: 232


🏁 Script executed:

# View the _is_duplicate_content implementation
sed -n '116,140p' src/brainlayer/enrichment_controller.py

Repository: EtanHey/brainlayer

Length of output: 1020


🏁 Script executed:

# View the test in question (lines 798-809)
sed -n '798,809p' tests/test_enrichment_controller.py

Repository: EtanHey/brainlayer

Length of output: 518


🏁 Script executed:

# Check the _patch_realtime_deps helper to see what it patches
sed -n '40,60p' tests/test_enrichment_controller.py

Repository: EtanHey/brainlayer

Length of output: 1077


🏁 Script executed:

# Run the specific test to verify it passes
cd tests && python -m pytest test_enrichment_controller.py::test_enrich_batch_processes_candidates_with_gemini -v

Repository: EtanHey/brainlayer

Length of output: 3479


Explicitly mock _is_duplicate_content for deterministic test behavior.

The _is_duplicate_content function (line 452 in enrich_batch) is called for each chunk but is not explicitly mocked in this test. With a plain MagicMock store, the function's SQL query chain (store._read_cursor().execute(...).fetchone()) returns MagicMocks, causing row[0] > 0 to evaluate to a truthy MagicMock—potentially marking all chunks as duplicates and incorrectly skipping them.

While existing tests for enrich_realtime and enrich_local (lines 376, 392) explicitly mock this function, this test does not. Add an explicit mock for clarity and robustness:

♻️ Suggested improvement
 def test_enrich_batch_processes_candidates_with_gemini(monkeypatch):
     from brainlayer import enrichment_controller as controller

     store = MagicMock()
     store.get_enrichment_candidates.return_value = [_candidate("c1"), _candidate("c2")]
     _patch_realtime_deps(monkeypatch, controller, store)
+    monkeypatch.setattr(controller, "_is_duplicate_content", lambda s, c: False)

     result = controller.enrich_batch(store, limit=10)
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@tests/test_enrichment_controller.py` around lines 798 - 809, The test
test_enrich_batch_processes_candidates_with_gemini should explicitly mock
controller._is_duplicate_content so duplicate-detection doesn't rely on
MagicMock SQL return values; update the test to monkeypatch
controller._is_duplicate_content (used inside enrich_batch) to return False (or
a deterministic value) before calling controller.enrich_batch so the store
MagicMock behavior doesn't mark chunks as duplicates and the assertions for
attempted/enriched remain deterministic.



def test_enrich_batch_submit_phase_only_checks_exports(monkeypatch):
def test_enrich_batch_graceful_when_no_gemini_key(monkeypatch):
from brainlayer import enrichment_controller as controller

store = MagicMock()
monkeypatch.setattr(controller, "ensure_checkpoint_table", MagicMock())
pending_mock = MagicMock(return_value=[])
monkeypatch.setattr(controller, "get_pending_jobs", pending_mock)
export_mock = MagicMock(return_value=["f1.jsonl", "f2.jsonl"])
monkeypatch.setattr(controller, "get_unsubmitted_export_files", export_mock)
store.get_enrichment_candidates.return_value = [_candidate()]

result = controller.enrich_batch(store, phase="submit")
monkeypatch.setattr(controller, "_get_gemini_client", lambda: (_ for _ in ()).throw(RuntimeError("no key")))

pending_mock.assert_not_called()
export_mock.assert_called_once()
assert result.attempted == 2
result = controller.enrich_batch(store, limit=5)

assert result.mode == "batch"
assert result.enriched == 0
assert any("No Gemini client" in e for e in result.errors)


# ── Realtime chunk_ids filter test ────────────────────────────────────────────
Expand Down
6 changes: 3 additions & 3 deletions tests/test_phase3_digest.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,9 +290,9 @@ def test_brain_digest_description_teaches_routing():
assert "on schedule for backfill" in desc
assert "faceted tags" in desc
assert "sanitizes pii" in desc
assert "realtime" in desc
assert "batch" in desc
assert "local" in desc
assert "digest" in desc
assert "connect" in desc
assert "enrich" in desc


# --- Task 4: brain_entity MCP tool ---
Expand Down
4 changes: 3 additions & 1 deletion tests/test_phase6_critical.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,10 +394,12 @@ def test_compact_format_output_size(self):
# importance is now included in compact format for relevance visibility
assert "importance" in compact

# tags is now included in compact format for filtering
assert "tags" in compact

# Verbose fields dropped
for dropped in (
"content_type",
"tags",
"intent",
"source_file",
"session_summary",
Expand Down
4 changes: 2 additions & 2 deletions tests/test_think_recall_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,13 +246,13 @@ class TestMCPToolCount:
"""Verify MCP server has correct tool count."""

def test_tool_count(self):
"""MCP server should have 11 tools: search, store, recall, digest, entity, get_person, update, expand, tags, supersede, archive."""
"""MCP server should have 12 tools: search, store, recall, digest, entity, get_person, update, expand, tags, supersede, archive, enrich."""
import asyncio

from brainlayer.mcp import list_tools

tools = asyncio.run(list_tools())
assert len(tools) == 11
assert len(tools) == 12

def test_consolidated_tools_registered(self):
"""brain_search, brain_store, brain_recall are registered."""
Expand Down
Loading