From 1f4c1df2c780a9bd0b7a9773c80d433534bb3c90 Mon Sep 17 00:00:00 2001
From: carlos-alm <127798846+carlos-alm@users.noreply.github.com>
Date: Sun, 1 Mar 2026 16:45:23 -0700
Subject: [PATCH 1/5] feat: add hybrid BM25 + semantic search via FTS5

Add FTS5 full-text index alongside embeddings for BM25 keyword search.
The `search` command now defaults to hybrid mode (BM25 + semantic fused
via RRF), with `--mode semantic` and `--mode keyword` alternatives.
Falls back gracefully to semantic-only on older DBs without FTS5.

Impact: 9 functions changed, 6 affected
---
 src/cli.js                              |  30 ++
 src/embedder.js                         | 388 +++++++++++++++++++++---
 src/index.js                            |   4 +
 src/mcp.js                              | 131 +++++++-
 tests/search/embedder-search.test.js    | 321 ++++++++++++++++++--
 tests/search/embedding-strategy.test.js |  55 ++++
 6 files changed, 847 insertions(+), 82 deletions(-)
diff --git a/src/cli.js b/src/cli.js
index f63f96bb..bf957889 100644
--- a/src/cli.js
+++ b/src/cli.js
@@ -556,8 +556,14 @@ program
   .option('-k, --kind <kind>', 'Filter by kind: function, method, class')
   .option('--file <pattern>', 'Filter by file path pattern')
   .option('--rrf-k <number>', 'RRF k parameter for multi-query ranking', '60')
+  .option('--mode <mode>', 'Search mode: hybrid, semantic, keyword (default: hybrid)')
   .option('-j, --json', 'Output as JSON')
   .action(async (query, opts) => {
+    const validModes = ['hybrid', 'semantic', 'keyword'];
+    if (opts.mode && !validModes.includes(opts.mode)) {
+      console.error(`Invalid mode "${opts.mode}". Valid: ${validModes.join(', ')}`);
+      process.exit(1);
+    }
     await search(query, opts.db, {
       limit: parseInt(opts.limit, 10),
       noTests: resolveNoTests(opts),
@@ -566,6 +572,7 @@ program
       kind: opts.kind,
       filePattern: opts.file,
       rrfK: parseInt(opts.rrfK, 10),
+      mode: opts.mode,
       json: opts.json,
     });
   });
@@ -834,6 +841,29 @@ program
     });
   });
 
+program
+  .command('owners [target]')
+  .description('Show CODEOWNERS mapping for files and functions')
+  .option('-d, --db <path>', 'Path to graph.db')
+  .option('--owner <owner>', 'Filter to a specific owner')
+  .option('--boundary', 'Show cross-owner boundary edges')
+  .option('-f, --file <path>', 'Scope to a specific file')
+  .option('-k, --kind <kind>', 'Filter by symbol kind')
+  .option('-T, --no-tests', 'Exclude test/spec files')
+  .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
+  .option('-j, --json', 'Output as JSON')
+  .action(async (target, opts) => {
+    const { owners } = await import('./owners.js');
+    owners(opts.db, {
+      owner: opts.owner,
+      boundary: opts.boundary,
+      file: opts.file || target,
+      kind: opts.kind,
+      noTests: resolveNoTests(opts),
+      json: opts.json,
+    });
+  });
+
 program
   .command('branch-compare <base> <target>')
   .description('Compare code structure between two branches/refs')
diff --git a/src/embedder.js b/src/embedder.js
index 4b9d43f0..265f12a6 100644
--- a/src/embedder.js
+++ b/src/embedder.js
@@ -384,6 +384,22 @@ function initEmbeddingsSchema(db) {
       value TEXT
     );
   `);
+
+  // Add full_text column (idempotent — ignore if already exists)
+  try {
+    db.exec('ALTER TABLE embeddings ADD COLUMN full_text TEXT');
+  } catch {
+    /* column already exists */
+  }
+
+  // FTS5 virtual table for BM25 keyword search
+  db.exec(`
+    CREATE VIRTUAL TABLE IF NOT EXISTS fts_index USING fts5(
+      name,
+      content,
+      tokenize='unicode61'
+    );
+  `);
 }
 
 /**
@@ -411,6 +427,7 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath, options =
 
   db.exec('DELETE FROM embeddings');
   db.exec('DELETE FROM embedding_meta');
+  db.exec('DELETE FROM fts_index');
 
   const nodes = db
     .prepare(
@@ -445,6 +462,7 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath, options =
 
   const texts = [];
   const nodeIds = [];
+  const nodeNames = [];
   const previews = [];
   const config = getModelConfig(modelKey);
   const contextWindow = config.contextWindow;
@@ -476,6 +494,7 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath, options =
 
       texts.push(text);
       nodeIds.push(node.id);
+      nodeNames.push(node.name);
       previews.push(`${node.name} (${node.kind}) -- ${file}:${node.line}`);
     }
   }
@@ -490,16 +509,19 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath, options =
   const { vectors, dim } = await embed(texts, modelKey);
 
   const insert = db.prepare(
-    'INSERT OR REPLACE INTO embeddings (node_id, vector, text_preview) VALUES (?, ?, ?)',
+    'INSERT OR REPLACE INTO embeddings (node_id, vector, text_preview, full_text) VALUES (?, ?, ?, ?)',
   );
+  const insertFts = db.prepare('INSERT INTO fts_index(rowid, name, content) VALUES (?, ?, ?)');
   const insertMeta = db.prepare('INSERT OR REPLACE INTO embedding_meta (key, value) VALUES (?, ?)');
   const insertAll = db.transaction(() => {
     for (let i = 0; i < vectors.length; i++) {
-      insert.run(nodeIds[i], Buffer.from(vectors[i].buffer), previews[i]);
+      insert.run(nodeIds[i], Buffer.from(vectors[i].buffer), previews[i], texts[i]);
+      insertFts.run(nodeIds[i], nodeNames[i], texts[i]);
     }
     insertMeta.run('model', config.name);
     insertMeta.run('dim', String(dim));
     insertMeta.run('count', String(vectors.length));
+    insertMeta.run('fts_count', String(vectors.length));
     insertMeta.run('strategy', strategy);
     insertMeta.run('built_at', new Date().toISOString());
     if (overflowCount > 0) {
@@ -731,71 +753,361 @@ export async function multiSearchData(queries, customDbPath, opts = {}) {
 }
 
 /**
- * Semantic search with pre-filter support — CLI wrapper with multi-query detection.
+ * Sanitize a user query for FTS5 MATCH syntax.
+ * Wraps each token as an implicit OR and escapes special FTS5 characters.
+ */
+function sanitizeFtsQuery(query) {
+  // Remove FTS5 special chars that could cause syntax errors
+  const cleaned = query.replace(/[*"():^{}~<>]/g, ' ').trim();
+  if (!cleaned) return null;
+  // Split into tokens, wrap with OR for multi-token queries
+  const tokens = cleaned.split(/\s+/).filter((t) => t.length > 0);
+  if (tokens.length === 0) return null;
+  if (tokens.length === 1) return `"${tokens[0]}"`;
+  return tokens.map((t) => `"${t}"`).join(' OR ');
+}
+
+/**
+ * Check if the FTS5 index exists in the database.
+ * Returns true if fts_index table exists and has rows, false otherwise.
+ */
+function hasFtsIndex(db) {
+  try {
+    const row = db.prepare('SELECT COUNT(*) as c FROM fts_index').get();
+    return row.c > 0;
+  } catch {
+    return false;
+  }
+}
+
+/**
+ * BM25 keyword search via FTS5.
+ * Returns { results: [{ name, kind, file, line, bm25Score }] } or null if no FTS5 index.
+ */
+export function ftsSearchData(query, customDbPath, opts = {}) {
+  const limit = opts.limit || 15;
+  const noTests = opts.noTests || false;
+  const TEST_PATTERN = /\.(test|spec)\.|__test__|__tests__|\.stories\./;
+
+  const db = openReadonlyOrFail(customDbPath);
+
+  if (!hasFtsIndex(db)) {
+    db.close();
+    return null;
+  }
+
+  const ftsQuery = sanitizeFtsQuery(query);
+  if (!ftsQuery) {
+    db.close();
+    return { results: [] };
+  }
+
+  let sql = `
+    SELECT f.rowid AS node_id, rank AS bm25_score,
+           n.name, n.kind, n.file, n.line
+    FROM fts_index f
+    JOIN nodes n ON f.rowid = n.id
+    WHERE fts_index MATCH ?
+  `;
+  const params = [ftsQuery];
+
+  if (opts.kind) {
+    sql += ' AND n.kind = ?';
+    params.push(opts.kind);
+  }
+
+  const isGlob = opts.filePattern && /[*?[\]]/.test(opts.filePattern);
+  if (opts.filePattern && !isGlob) {
+    sql += ' AND n.file LIKE ?';
+    params.push(`%${opts.filePattern}%`);
+  }
+
+  sql += ' ORDER BY rank LIMIT ?';
+  params.push(limit * 5); // fetch generous set for post-filtering
+
+  let rows;
+  try {
+    rows = db.prepare(sql).all(...params);
+  } catch {
+    // Invalid FTS5 query syntax — return empty
+    db.close();
+    return { results: [] };
+  }
+
+  if (isGlob) {
+    rows = rows.filter((row) => globMatch(row.file, opts.filePattern));
+  }
+  if (noTests) {
+    rows = rows.filter((row) => !TEST_PATTERN.test(row.file));
+  }
+
+  db.close();
+
+  const results = rows.slice(0, limit).map((row) => ({
+    name: row.name,
+    kind: row.kind,
+    file: row.file,
+    line: row.line,
+    bm25Score: -row.bm25_score, // FTS5 rank is negative; negate for display
+  }));
+
+  return { results };
+}
+
+/**
+ * Hybrid BM25 + semantic search with RRF fusion.
+ * Returns { results: [{ name, kind, file, line, rrf, bm25Score, bm25Rank, similarity, semanticRank }] }
+ * or null if no FTS5 index (caller should fall back to semantic-only).
+ */
+export async function hybridSearchData(query, customDbPath, opts = {}) {
+  const limit = opts.limit || 15;
+  const k = opts.rrfK || 60;
+  const topK = (opts.limit || 15) * 5;
+
+  // Split semicolons for multi-query support
+  const queries =
+    typeof query === 'string'
+      ? query
+          .split(';')
+          .map((q) => q.trim())
+          .filter((q) => q.length > 0)
+      : [query];
+
+  // Check FTS5 availability first (sync, cheap)
+  const checkDb = openReadonlyOrFail(customDbPath);
+  const ftsAvailable = hasFtsIndex(checkDb);
+  checkDb.close();
+  if (!ftsAvailable) return null;
+
+  // Collect ranked lists: for each query, one BM25 list + one semantic list
+  const rankedLists = [];
+
+  for (const q of queries) {
+    // BM25 ranked list (sync)
+    const bm25Data = ftsSearchData(q, customDbPath, { ...opts, limit: topK });
+    if (bm25Data?.results) {
+      rankedLists.push(
+        bm25Data.results.map((r, idx) => ({
+          key: `${r.name}:${r.file}:${r.line}`,
+          rank: idx + 1,
+          source: 'bm25',
+          ...r,
+        })),
+      );
+    }
+
+    // Semantic ranked list (async)
+    const semData = await searchData(q, customDbPath, {
+      ...opts,
+      limit: topK,
+      minScore: opts.minScore || 0.2,
+    });
+    if (semData?.results) {
+      rankedLists.push(
+        semData.results.map((r, idx) => ({
+          key: `${r.name}:${r.file}:${r.line}`,
+          rank: idx + 1,
+          source: 'semantic',
+          ...r,
+        })),
+      );
+    }
+  }
+
+  // RRF fusion across all ranked lists
+  const fusionMap = new Map();
+  for (const list of rankedLists) {
+    for (const item of list) {
+      if (!fusionMap.has(item.key)) {
+        fusionMap.set(item.key, {
+          name: item.name,
+          kind: item.kind,
+          file: item.file,
+          line: item.line,
+          rrfScore: 0,
+          bm25Score: null,
+          bm25Rank: null,
+          similarity: null,
+          semanticRank: null,
+        });
+      }
+      const entry = fusionMap.get(item.key);
+      entry.rrfScore += 1 / (k + item.rank);
+      if (item.source === 'bm25') {
+        if (entry.bm25Rank === null || item.rank < entry.bm25Rank) {
+          entry.bm25Score = item.bm25Score;
+          entry.bm25Rank = item.rank;
+        }
+      } else {
+        if (entry.semanticRank === null || item.rank < entry.semanticRank) {
+          entry.similarity = item.similarity;
+          entry.semanticRank = item.rank;
+        }
+      }
+    }
+  }
+
+  const results = [...fusionMap.values()]
+    .sort((a, b) => b.rrfScore - a.rrfScore)
+    .slice(0, limit)
+    .map((e) => ({
+      name: e.name,
+      kind: e.kind,
+      file: e.file,
+      line: e.line,
+      rrf: e.rrfScore,
+      bm25Score: e.bm25Score,
+      bm25Rank: e.bm25Rank,
+      similarity: e.similarity,
+      semanticRank: e.semanticRank,
+    }));
+
+  return { results };
+}
+
+/**
+ * Search with mode support — CLI wrapper with multi-query detection.
+ * Modes: 'hybrid' (default), 'semantic', 'keyword'
  */
 export async function search(query, customDbPath, opts = {}) {
+  const mode = opts.mode || 'hybrid';
+
   // Split by semicolons, trim, filter empties
   const queries = query
     .split(';')
     .map((q) => q.trim())
     .filter((q) => q.length > 0);
 
-  if (queries.length <= 1) {
-    // Single-query path — preserve original output format
-    const singleQuery = queries[0] || query;
-    const data = await searchData(singleQuery, customDbPath, opts);
-    if (!data) return;
+  const kindIcon = (kind) => (kind === 'function' ? 'f' : kind === 'class' ? '*' : 'o');
+
+  // ─── Keyword-only mode ──────────────────────────────────────────────
+  if (mode === 'keyword') {
+    const singleQuery = queries.length === 1 ? queries[0] : query;
+    const data = ftsSearchData(singleQuery, customDbPath, opts);
+    if (!data) {
+      console.log('No FTS5 index found. Run `codegraph embed` to build the keyword index.');
+      return;
+    }
 
     if (opts.json) {
       console.log(JSON.stringify(data, null, 2));
       return;
     }
 
-    console.log(`\nSemantic search: "${singleQuery}"\n`);
-
+    console.log(`\nKeyword search: "${singleQuery}" (BM25)\n`);
     if (data.results.length === 0) {
-      console.log('  No results above threshold.');
+      console.log('  No results found.');
     } else {
       for (const r of data.results) {
-        const bar = '#'.repeat(Math.round(r.similarity * 20));
-        const kindIcon = r.kind === 'function' ? 'f' : r.kind === 'class' ? '*' : 'o';
-        console.log(`  ${(r.similarity * 100).toFixed(1)}% ${bar}`);
-        console.log(`    ${kindIcon} ${r.name} -- ${r.file}:${r.line}`);
+        console.log(
+          `  BM25 ${r.bm25Score.toFixed(2)}  ${kindIcon(r.kind)} ${r.name} -- ${r.file}:${r.line}`,
+        );
       }
     }
-
     console.log(`\n  ${data.results.length} results shown\n`);
-  } else {
-    // Multi-query path — RRF ranking
-    const data = await multiSearchData(queries, customDbPath, opts);
-    if (!data) return;
+    return;
+  }
 
-    if (opts.json) {
-      console.log(JSON.stringify(data, null, 2));
-      return;
-    }
+  // ─── Semantic-only mode ─────────────────────────────────────────────
+  if (mode === 'semantic') {
+    if (queries.length <= 1) {
+      const singleQuery = queries[0] || query;
+      const data = await searchData(singleQuery, customDbPath, opts);
+      if (!data) return;
 
-    console.log(`\nMulti-query semantic search (RRF, k=${opts.rrfK || 60}):`);
-    queries.forEach((q, i) => {
-      console.log(`  [${i + 1}] "${q}"`);
-    });
-    console.log();
+      if (opts.json) {
+        console.log(JSON.stringify(data, null, 2));
+        return;
+      }
 
-    if (data.results.length === 0) {
-      console.log('  No results above threshold.');
+      console.log(`\nSemantic search: "${singleQuery}"\n`);
+      if (data.results.length === 0) {
+        console.log('  No results above threshold.');
+      } else {
+        for (const r of data.results) {
+          const bar = '#'.repeat(Math.round(r.similarity * 20));
+          console.log(`  ${(r.similarity * 100).toFixed(1)}% ${bar}`);
+          console.log(`    ${kindIcon(r.kind)} ${r.name} -- ${r.file}:${r.line}`);
+        }
+      }
+      console.log(`\n  ${data.results.length} results shown\n`);
     } else {
-      for (const r of data.results) {
-        const kindIcon = r.kind === 'function' ? 'f' : r.kind === 'class' ? '*' : 'o';
-        console.log(`  RRF ${r.rrf.toFixed(4)}  ${kindIcon} ${r.name} -- ${r.file}:${r.line}`);
-        for (const qs of r.queryScores) {
-          const bar = '#'.repeat(Math.round(qs.similarity * 20));
+      const data = await multiSearchData(queries, customDbPath, opts);
+      if (!data) return;
+
+      if (opts.json) {
+        console.log(JSON.stringify(data, null, 2));
+        return;
+      }
+
+      console.log(`\nMulti-query semantic search (RRF, k=${opts.rrfK || 60}):`);
+      for (let i = 0; i < queries.length; i++) console.log(`  [${i + 1}] "${queries[i]}"`);
+      console.log();
+      if (data.results.length === 0) {
+        console.log('  No results above threshold.');
+      } else {
+        for (const r of data.results) {
           console.log(
-            `    [${queries.indexOf(qs.query) + 1}] ${(qs.similarity * 100).toFixed(1)}% ${bar} (rank ${qs.rank})`,
+            `  RRF ${r.rrf.toFixed(4)}  ${kindIcon(r.kind)} ${r.name} -- ${r.file}:${r.line}`,
           );
+          for (const qs of r.queryScores) {
+            const bar = '#'.repeat(Math.round(qs.similarity * 20));
+            console.log(
+              `    [${queries.indexOf(qs.query) + 1}] ${(qs.similarity * 100).toFixed(1)}% ${bar} (rank ${qs.rank})`,
+            );
+          }
         }
       }
+      console.log(`\n  ${data.results.length} results shown\n`);
     }
+    return;
+  }
 
-    console.log(`\n  ${data.results.length} results shown\n`);
+  // ─── Hybrid mode (default) ──────────────────────────────────────────
+  const data = await hybridSearchData(query, customDbPath, opts);
+
+  if (!data) {
+    // No FTS5 index — fall back to semantic-only
+    warn(
+      'FTS5 index not found — using semantic search only. Re-run `codegraph embed` to enable hybrid mode.',
+    );
+    return search(query, customDbPath, { ...opts, mode: 'semantic' });
   }
+
+  if (opts.json) {
+    console.log(JSON.stringify(data, null, 2));
+    return;
+  }
+
+  const rrfK = opts.rrfK || 60;
+  if (queries.length <= 1) {
+    const singleQuery = queries[0] || query;
+    console.log(`\nHybrid search: "${singleQuery}" (BM25 + semantic, RRF k=${rrfK})\n`);
+  } else {
+    console.log(`\nHybrid multi-query search (BM25 + semantic, RRF k=${rrfK}):`);
+    for (let i = 0; i < queries.length; i++) console.log(`  [${i + 1}] "${queries[i]}"`);
+    console.log();
+  }
+
+  if (data.results.length === 0) {
+    console.log('  No results found.');
+  } else {
+    for (const r of data.results) {
+      console.log(
+        `  RRF ${r.rrf.toFixed(4)}  ${kindIcon(r.kind)} ${r.name} -- ${r.file}:${r.line}`,
+      );
+      const parts = [];
+      if (r.bm25Rank != null) {
+        parts.push(`BM25: rank ${r.bm25Rank} (score ${r.bm25Score.toFixed(2)})`);
+      }
+      if (r.semanticRank != null) {
+        parts.push(`Semantic: rank ${r.semanticRank} (${(r.similarity * 100).toFixed(1)}%)`);
+      }
+      if (parts.length > 0) {
+        console.log(`    ${parts.join('  |  ')}`);
+      }
+    }
+  }
+
+  console.log(`\n  ${data.results.length} results shown\n`);
 }
diff --git a/src/index.js b/src/index.js
index 2b539e12..3cef9655 100644
--- a/src/index.js
+++ b/src/index.js
@@ -55,6 +55,8 @@ export {
   EMBEDDING_STRATEGIES,
   embed,
   estimateTokens,
+  ftsSearchData,
+  hybridSearchData,
   MODELS,
   multiSearchData,
   search,
@@ -70,6 +72,8 @@ export { setVerbose } from './logger.js';
 export { manifesto, manifestoData, RULE_DEFS } from './manifesto.js';
 // Native engine
 export { isNativeAvailable } from './native.js';
+// Ownership (CODEOWNERS)
+export { matchOwners, owners, ownersData, ownersForFiles, parseCodeowners } from './owners.js';
 // Pagination utilities
 export { MCP_DEFAULTS, MCP_MAX_LIMIT, paginate, paginateResult } from './paginate.js';
 
diff --git a/src/mcp.js b/src/mcp.js
index abd41893..ee11bb3c 100644
--- a/src/mcp.js
+++ b/src/mcp.js
@@ -247,13 +247,19 @@ const BASE_TOOLS = [
   {
     name: 'semantic_search',
     description:
-      'Search code symbols by meaning using embeddings (requires prior `codegraph embed`)',
+      'Search code symbols by meaning using embeddings and/or keyword matching (requires prior `codegraph embed`). Default hybrid mode combines BM25 keyword + semantic search for best results.',
     inputSchema: {
       type: 'object',
       properties: {
         query: { type: 'string', description: 'Natural language search query' },
         limit: { type: 'number', description: 'Max results to return', default: 15 },
         min_score: { type: 'number', description: 'Minimum similarity score (0-1)', default: 0.2 },
+        mode: {
+          type: 'string',
+          enum: ['hybrid', 'semantic', 'keyword'],
+          description:
+            'Search mode: hybrid (BM25 + semantic, default), semantic (embeddings only), keyword (BM25 only)',
+        },
       },
       required: ['query'],
     },
@@ -491,6 +497,48 @@ const BASE_TOOLS = [
       },
     },
   },
+  {
+    name: 'code_owners',
+    description:
+      'Show CODEOWNERS mapping for files and functions. Shows ownership coverage, per-owner breakdown, and cross-owner boundary edges.',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        file: { type: 'string', description: 'Scope to a specific file (partial match)' },
+        owner: { type: 'string', description: 'Filter to a specific owner (e.g. @team-name)' },
+        boundary: {
+          type: 'boolean',
+          description: 'Show cross-owner boundary edges',
+          default: false,
+        },
+        kind: {
+          type: 'string',
+          description: 'Filter by symbol kind (function, method, class, etc.)',
+        },
+        no_tests: { type: 'boolean', description: 'Exclude test files', default: false },
+      },
+    },
+  },
+  {
+    name: 'branch_compare',
+    description:
+      'Compare code structure between two git refs (branches, tags, commits). Shows added/removed/changed symbols and transitive caller impact using temporary git worktrees.',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        base: { type: 'string', description: 'Base git ref (branch, tag, or commit SHA)' },
+        target: { type: 'string', description: 'Target git ref to compare against base' },
+        depth: { type: 'number', description: 'Max transitive caller depth', default: 3 },
+        no_tests: { type: 'boolean', description: 'Exclude test files', default: false },
+        format: {
+          type: 'string',
+          enum: ['json', 'mermaid'],
+          description: 'Output format (default: json)',
+        },
+      },
+      required: ['base', 'target'],
+    },
+  },
 ];
 
 const LIST_REPOS_TOOL = {
@@ -704,18 +752,55 @@ export async function startMCPServer(customDbPath, options = {}) {
           }
           break;
         case 'semantic_search': {
-          const { searchData } = await import('./embedder.js');
-          result = await searchData(args.query, dbPath, {
-            limit: args.limit,
-            minScore: args.min_score,
-          });
-          if (result === null) {
-            return {
-              content: [
-                { type: 'text', text: 'Semantic search unavailable. Run `codegraph embed` first.' },
-              ],
-              isError: true,
-            };
+          const mode = args.mode || 'hybrid';
+          const searchOpts = { limit: args.limit, minScore: args.min_score };
+
+          if (mode === 'keyword') {
+            const { ftsSearchData } = await import('./embedder.js');
+            result = ftsSearchData(args.query, dbPath, searchOpts);
+            if (result === null) {
+              return {
+                content: [
+                  {
+                    type: 'text',
+                    text: 'No FTS5 index found. Run `codegraph embed` to build the keyword index.',
+                  },
+                ],
+                isError: true,
+              };
+            }
+          } else if (mode === 'semantic') {
+            const { searchData } = await import('./embedder.js');
+            result = await searchData(args.query, dbPath, searchOpts);
+            if (result === null) {
+              return {
+                content: [
+                  {
+                    type: 'text',
+                    text: 'Semantic search unavailable. Run `codegraph embed` first.',
+                  },
+                ],
+                isError: true,
+              };
+            }
+          } else {
+            // hybrid (default) — falls back to semantic if no FTS5
+            const { hybridSearchData, searchData } = await import('./embedder.js');
+            result = await hybridSearchData(args.query, dbPath, searchOpts);
+            if (result === null) {
+              result = await searchData(args.query, dbPath, searchOpts);
+              if (result === null) {
+                return {
+                  content: [
+                    {
+                      type: 'text',
+                      text: 'Semantic search unavailable. Run `codegraph embed` first.',
+                    },
+                  ],
+                  isError: true,
+                };
+              }
+            }
           }
           break;
         }
@@ -859,6 +944,26 @@ export async function startMCPServer(customDbPath, options = {}) {
           });
           break;
         }
+        case 'code_owners': {
+          const { ownersData } = await import('./owners.js');
+          result = ownersData(dbPath, {
+            file: args.file,
+            owner: args.owner,
+            boundary: args.boundary,
+            kind: args.kind,
+            noTests: args.no_tests,
+          });
+          break;
+        }
+        case 'branch_compare': {
+          const { branchCompareData, branchCompareMermaid } = await import('./branch-compare.js');
+          const bcData = await branchCompareData(args.base, args.target, {
+            depth: args.depth,
+            noTests: args.no_tests,
+          });
+          result = args.format === 'mermaid' ? branchCompareMermaid(bcData) : bcData;
+          break;
+        }
         case 'list_repos': {
           const { listRepos, pruneRegistry } = await import('./registry.js');
           pruneRegistry();
diff --git a/tests/search/embedder-search.test.js b/tests/search/embedder-search.test.js
index bbe57b67..93ea518c 100644
--- a/tests/search/embedder-search.test.js
+++ b/tests/search/embedder-search.test.js
@@ -31,7 +31,14 @@ vi.mock('@huggingface/transformers', () => ({
   cos_sim: () => 0,
 }));
 
-import { cosineSim, multiSearchData, search, searchData } from '../../src/embedder.js';
+import {
+  cosineSim,
+  ftsSearchData,
+  hybridSearchData,
+  multiSearchData,
+  search,
+  searchData,
+} from '../../src/embedder.js';
 
 // ─── Helpers ───────────────────────────────────────────────────────────
 
@@ -48,14 +55,30 @@ function insertNode(db, name, kind, file, line) {
     .run(name, kind, file, line).lastInsertRowid;
 }
 
-function insertEmbedding(db, nodeId, vec, preview) {
-  db.prepare('INSERT INTO embeddings (node_id, vector, text_preview) VALUES (?, ?, ?)').run(
+function insertEmbedding(db, nodeId, vec, preview, fullText) {
+  db.prepare(
+    'INSERT INTO embeddings (node_id, vector, text_preview, full_text) VALUES (?, ?, ?, ?)',
+  ).run(nodeId, Buffer.from(vec.buffer), preview, fullText || preview);
+}
+
+function insertFts(db, nodeId, name, content) {
+  db.prepare('INSERT INTO fts_index(rowid, name, content) VALUES (?, ?, ?)').run(
     nodeId,
-    Buffer.from(vec.buffer),
-    preview,
+    name,
+    content,
   );
 }
 
+/** Capture console.log calls and return joined output. */
+function captureLog(fn) {
+  const lines = [];
+  const spy = vi.spyOn(console, 'log').mockImplementation((...args) => lines.push(args.join(' ')));
+  return fn().then(() => {
+    spy.mockRestore();
+    return lines.join('\n');
+  });
+}
+
 // ─── Fixture DB ────────────────────────────────────────────────────────
 //
 // Nodes & vectors:
@@ -67,8 +90,15 @@ function insertEmbedding(db, nodeId, vec, preview) {
 // Query vectors:
 //   "auth"  → [1, 0, 0]   (cosine: A=1.0, C≈0.707)
 //   "jwt"   → [0, 1, 0]   (cosine: B=1.0, C≈0.707)
+//
+// FTS5 content:
+//   A: "function authenticate (authenticate) in src/auth.js"
+//   B: "function validateJWT (validate JWT) in src/jwt.js"
+//   C: "function authMiddleware (auth Middleware) in src/middleware.js"
+//   D: "function formatDate (format Date) in src/utils.js"
 
 let tmpDir, dbPath;
+let noFtsDir, noFtsDbPath;
 
 beforeAll(() => {
   tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-test-'));
@@ -83,12 +113,18 @@ beforeAll(() => {
       node_id INTEGER PRIMARY KEY,
       vector BLOB NOT NULL,
       text_preview TEXT,
+      full_text TEXT,
       FOREIGN KEY(node_id) REFERENCES nodes(id)
     );
     CREATE TABLE IF NOT EXISTS embedding_meta (
       key TEXT PRIMARY KEY,
       value TEXT
     );
+    CREATE VIRTUAL TABLE IF NOT EXISTS fts_index USING fts5(
+      name,
+      content,
+      tokenize='unicode61'
+    );
   `);
 
   const idA = insertNode(db, 'authenticate', 'function', 'src/auth.js', 10);
@@ -97,26 +133,80 @@ beforeAll(() => {
   const idD = insertNode(db, 'formatDate', 'function', 'src/utils.js', 1);
 
   const S = Math.SQRT1_2; // ≈ 0.7071
-  insertEmbedding(db, idA, makeVec([1, 0, 0]), 'authenticate (function) -- src/auth.js:10');
-  insertEmbedding(db, idB, makeVec([0, 1, 0]), 'validateJWT (function) -- src/jwt.js:20');
-  insertEmbedding(db, idC, makeVec([S, S, 0]), 'authMiddleware (function) -- src/middleware.js:5');
-  insertEmbedding(db, idD, makeVec([0, 0, 1]), 'formatDate (function) -- src/utils.js:1');
+  const textA = 'function authenticate (authenticate) in src/auth.js\nValidate user credentials';
+  const textB = 'function validateJWT (validate JWT) in src/jwt.js\nCheck JWT token validity';
+  const textC =
+    'function authMiddleware (auth Middleware) in src/middleware.js\nExpress auth middleware';
+  const textD = 'function formatDate (format Date) in src/utils.js\nFormat a date object';
+
+  insertEmbedding(db, idA, makeVec([1, 0, 0]), 'authenticate (function) -- src/auth.js:10', textA);
+  insertEmbedding(db, idB, makeVec([0, 1, 0]), 'validateJWT (function) -- src/jwt.js:20', textB);
+  insertEmbedding(
+    db,
+    idC,
+    makeVec([S, S, 0]),
+    'authMiddleware (function) -- src/middleware.js:5',
+    textC,
+  );
+  insertEmbedding(db, idD, makeVec([0, 0, 1]), 'formatDate (function) -- src/utils.js:1', textD);
+
+  // Populate FTS5 index
+  insertFts(db, idA, 'authenticate', textA);
+  insertFts(db, idB, 'validateJWT', textB);
+  insertFts(db, idC, 'authMiddleware', textC);
+  insertFts(db, idD, 'formatDate', textD);
 
   db.prepare(
     "INSERT INTO embedding_meta (key, value) VALUES ('model', 'Xenova/all-MiniLM-L6-v2')",
   ).run();
   db.prepare("INSERT INTO embedding_meta (key, value) VALUES ('dim', '384')").run();
   db.prepare("INSERT INTO embedding_meta (key, value) VALUES ('count', '4')").run();
+  db.prepare("INSERT INTO embedding_meta (key, value) VALUES ('fts_count', '4')").run();
   db.close();
 
   // Query vectors used by the mocked embed()
   QUERY_VECTORS.set('auth', makeVec([1, 0, 0]));
   QUERY_VECTORS.set('jwt', makeVec([0, 1, 0]));
   QUERY_VECTORS.set('authenticate', makeVec([0.99, 0.1, 0])); // very similar to 'auth'
+  QUERY_VECTORS.set('"authenticate"', makeVec([0.99, 0.1, 0]));
+  QUERY_VECTORS.set('"formatDate"', makeVec([0, 0, 1]));
+  QUERY_VECTORS.set('buildGraph', makeVec([0.2, 0.2, 0.2]));
+  QUERY_VECTORS.set('"buildGraph"', makeVec([0.2, 0.2, 0.2]));
+
+  // ─── Second DB without FTS5 (for fallback tests) ────────────────────
+  noFtsDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-nofts-'));
+  noFtsDbPath = path.join(noFtsDir, 'graph.db');
+  const db2 = new Database(noFtsDbPath);
+  db2.pragma('journal_mode = WAL');
+  initSchema(db2);
+  db2.exec(`
+    CREATE TABLE IF NOT EXISTS embeddings (
+      node_id INTEGER PRIMARY KEY,
+      vector BLOB NOT NULL,
+      text_preview TEXT,
+      FOREIGN KEY(node_id) REFERENCES nodes(id)
+    );
+    CREATE TABLE IF NOT EXISTS embedding_meta (
+      key TEXT PRIMARY KEY,
+      value TEXT
+    );
+  `);
+  const nfIdA = insertNode(db2, 'hello', 'function', 'src/hello.js', 1);
+  db2
+    .prepare('INSERT INTO embeddings (node_id, vector, text_preview) VALUES (?, ?, ?)')
+    .run(nfIdA, Buffer.from(makeVec([1, 0, 0]).buffer), 'hello (function) -- src/hello.js:1');
+  db2
+    .prepare("INSERT INTO embedding_meta (key, value) VALUES ('model', 'Xenova/all-MiniLM-L6-v2')")
+    .run();
+  db2.prepare("INSERT INTO embedding_meta (key, value) VALUES ('dim', '384')").run();
+  db2.prepare("INSERT INTO embedding_meta (key, value) VALUES ('count', '1')").run();
+  db2.close();
+  QUERY_VECTORS.set('hello', makeVec([1, 0, 0]));
 });
 
 afterAll(() => {
   fs.rmSync(tmpDir, { recursive: true, force: true });
+  fs.rmSync(noFtsDir, { recursive: true, force: true });
 });
 
 // ─── Tests ─────────────────────────────────────────────────────────────
@@ -238,42 +328,155 @@ describe('searchData file pattern', () => {
   });
 });
 
+// ─── FTS5 keyword search tests ─────────────────────────────────────────
+
+describe('ftsSearchData', () => {
+  test('returns results sorted by BM25 score', () => {
+    const data = ftsSearchData('authenticate', dbPath);
+    expect(data).not.toBeNull();
+    expect(data.results.length).toBeGreaterThan(0);
+    expect(data.results[0].name).toBe('authenticate');
+    expect(data.results[0].bm25Score).toBeGreaterThan(0);
+  });
+
+  test('respects limit parameter', () => {
+    const data = ftsSearchData('function', dbPath, { limit: 2 });
+    expect(data).not.toBeNull();
+    expect(data.results.length).toBeLessThanOrEqual(2);
+  });
+
+  test('respects kind filter', () => {
+    const data = ftsSearchData('authenticate', dbPath, { kind: 'function' });
+    expect(data).not.toBeNull();
+    for (const r of data.results) {
+      expect(r.kind).toBe('function');
+    }
+  });
+
+  test('respects noTests filter', () => {
+    // Our fixture has no test files, so all results should pass
+    const data = ftsSearchData('authenticate', dbPath, { noTests: true });
+    expect(data).not.toBeNull();
+    expect(data.results.length).toBeGreaterThan(0);
+  });
+
+  test('returns null when FTS5 table does not exist (old DB)', () => {
+    const data = ftsSearchData('hello', noFtsDbPath);
+    expect(data).toBeNull();
+  });
+
+  test('handles special characters in query without crashing', () => {
+    const data = ftsSearchData('auth*"()', dbPath);
+    expect(data).not.toBeNull();
+    // Should return results or empty array, not throw
+    expect(data.results).toBeInstanceOf(Array);
+  });
+
+  test('result shape has expected fields', () => {
+    const data = ftsSearchData('authenticate', dbPath);
+    const r = data.results[0];
+    expect(r).toHaveProperty('name');
+    expect(r).toHaveProperty('kind');
+    expect(r).toHaveProperty('file');
+    expect(r).toHaveProperty('line');
+    expect(r).toHaveProperty('bm25Score');
+  });
+
+  test('returns empty results for query with no matches', () => {
+    const data = ftsSearchData('zzzznonexistent', dbPath);
+    expect(data).not.toBeNull();
+    expect(data.results).toHaveLength(0);
+  });
+});
+
+// ─── Hybrid search tests ───────────────────────────────────────────────
+
+describe('hybridSearchData', () => {
+  test('RRF fusion produces higher scores for results in both BM25 and semantic', async () => {
+    const data = await hybridSearchData('authenticate', dbPath, { minScore: 0.01 });
+    expect(data).not.toBeNull();
+    expect(data.results.length).toBeGreaterThan(0);
+
+    // authenticate should rank high — it appears in both BM25 (exact match) and semantic
+    const auth = data.results.find((r) => r.name === 'authenticate');
+    expect(auth).toBeDefined();
+    expect(auth.rrf).toBeGreaterThan(0);
+  });
+
+  test('a result in BM25-only still appears', async () => {
+    // "formatDate" text matches BM25 for "format" but may not match semantically
+    const data = await hybridSearchData('formatDate', dbPath, { minScore: 0.01 });
+    expect(data).not.toBeNull();
+    const fd = data.results.find((r) => r.name === 'formatDate');
+    expect(fd).toBeDefined();
+  });
+
+  test('respects rrfK parameter', async () => {
+    const d60 = await hybridSearchData('authenticate', dbPath, { minScore: 0.01, rrfK: 60 });
+    const d10 = await hybridSearchData('authenticate', dbPath, { minScore: 0.01, rrfK: 10 });
+    expect(d60).not.toBeNull();
+    expect(d10).not.toBeNull();
+    // Lower k = higher RRF scores
+    expect(d10.results[0].rrf).toBeGreaterThan(d60.results[0].rrf);
+  });
+
+  test('falls back to null when no FTS5 index', async () => {
+    const data = await hybridSearchData('hello', noFtsDbPath, { minScore: 0.01 });
+    expect(data).toBeNull();
+  });
+
+  test('result shape has hybrid fields', async () => {
+    const data = await hybridSearchData('authenticate', dbPath, { minScore: 0.01 });
+    expect(data).not.toBeNull();
+    const r = data.results[0];
+    expect(r).toHaveProperty('rrf');
+    expect(r).toHaveProperty('name');
+    expect(r).toHaveProperty('kind');
+    expect(r).toHaveProperty('file');
+    expect(r).toHaveProperty('line');
+    // At least one of these should be non-null
+    expect(r.bm25Rank !== null || r.semanticRank !== null).toBe(true);
+  });
+
+  test('multi-query with semicolons works', async () => {
+    const data = await hybridSearchData('auth ; jwt', dbPath, { minScore: 0.01 });
+    expect(data).not.toBeNull();
+    expect(data.results.length).toBeGreaterThan(0);
+  });
+});
+
+// ─── search CLI wrapper tests ──────────────────────────────────────────
+
 describe('search (CLI wrapper)', () => {
-  /** Capture console.log calls and return joined output. */
-  function captureLog(fn) {
-    const lines = [];
-    const spy = vi
-      .spyOn(console, 'log')
-      .mockImplementation((...args) => lines.push(args.join(' ')));
-    return fn().then(() => {
-      spy.mockRestore();
-      return lines.join('\n');
-    });
-  }
-
-  test('single query prints similarity format', async () => {
-    const out = await captureLog(() => search('auth', dbPath, { minScore: 0.2 }));
+  test('mode: semantic — single query prints similarity format', async () => {
+    const out = await captureLog(() => search('auth', dbPath, { minScore: 0.2, mode: 'semantic' }));
     expect(out).toContain('Semantic search: "auth"');
     expect(out).toContain('%');
     expect(out).toContain('authenticate');
   });
 
-  test('semicolons trigger multi-query RRF format', async () => {
-    const out = await captureLog(() => search('auth ; jwt', dbPath, { minScore: 0.2 }));
+  test('mode: semantic — semicolons trigger multi-query RRF format', async () => {
+    const out = await captureLog(() =>
+      search('auth ; jwt', dbPath, { minScore: 0.2, mode: 'semantic' }),
+    );
     expect(out).toContain('Multi-query semantic search');
     expect(out).toContain('RRF');
     expect(out).toContain('[1] "auth"');
     expect(out).toContain('[2] "jwt"');
   });
 
-  test('trailing semicolons fall back to single-query', async () => {
-    const out = await captureLog(() => search('auth ;', dbPath, { minScore: 0.2 }));
+  test('mode: semantic — trailing semicolons fall back to single-query', async () => {
+    const out = await captureLog(() =>
+      search('auth ;', dbPath, { minScore: 0.2, mode: 'semantic' }),
+    );
     expect(out).toContain('Semantic search: "auth"');
     expect(out).not.toContain('Multi-query');
   });
 
-  test('single query with json: true outputs valid JSON with results array', async () => {
-    const out = await captureLog(() => search('auth', dbPath, { minScore: 0.2, json: true }));
+  test('mode: semantic — json output has similarity', async () => {
+    const out = await captureLog(() =>
+      search('auth', dbPath, { minScore: 0.2, json: true, mode: 'semantic' }),
+    );
     const parsed = JSON.parse(out);
     expect(parsed.results).toBeInstanceOf(Array);
     expect(parsed.results.length).toBeGreaterThan(0);
@@ -281,12 +484,68 @@ describe('search (CLI wrapper)', () => {
     expect(parsed.results[0]).toHaveProperty('name');
   });
 
-  test('multi query with json: true outputs valid JSON with rrf and queryScores', async () => {
-    const out = await captureLog(() => search('auth ; jwt', dbPath, { minScore: 0.2, json: true }));
+  test('mode: semantic — multi query json has rrf and queryScores', async () => {
+    const out = await captureLog(() =>
+      search('auth ; jwt', dbPath, { minScore: 0.2, json: true, mode: 'semantic' }),
+    );
     const parsed = JSON.parse(out);
     expect(parsed.results).toBeInstanceOf(Array);
     expect(parsed.results.length).toBeGreaterThan(0);
     expect(parsed.results[0]).toHaveProperty('rrf');
     expect(parsed.results[0]).toHaveProperty('queryScores');
   });
+
+  test('mode: keyword — prints BM25 format', async () => {
+    const out = await captureLog(() => search('authenticate', dbPath, { mode: 'keyword' }));
+    expect(out).toContain('Keyword search');
+    expect(out).toContain('BM25');
+    expect(out).toContain('authenticate');
+  });
+
+  test('mode: keyword — json output has bm25Score', async () => {
+    const out = await captureLog(() =>
+      search('authenticate', dbPath, { json: true, mode: 'keyword' }),
+    );
+    const parsed = JSON.parse(out);
+    expect(parsed.results).toBeInstanceOf(Array);
+    expect(parsed.results.length).toBeGreaterThan(0);
+    expect(parsed.results[0]).toHaveProperty('bm25Score');
+  });
+
+  test('mode: keyword — no FTS5 index prints error', async () => {
+    const out = await captureLog(() => search('hello', noFtsDbPath, { mode: 'keyword' }));
+    expect(out).toContain('No FTS5 index found');
+  });
+
+  test('default mode (hybrid) — prints hybrid format', async () => {
+    const out = await captureLog(() => search('authenticate', dbPath, { minScore: 0.01 }));
+    expect(out).toContain('Hybrid search');
+    expect(out).toContain('BM25 + semantic');
+  });
+
+  test('default mode (hybrid) — json output has rrf and bm25/semantic breakdown', async () => {
+    const out = await captureLog(() =>
+      search('authenticate', dbPath, { minScore: 0.01, json: true }),
+    );
+    const parsed = JSON.parse(out);
+    expect(parsed.results).toBeInstanceOf(Array);
+    expect(parsed.results.length).toBeGreaterThan(0);
+    expect(parsed.results[0]).toHaveProperty('rrf');
+  });
+
+  test('hybrid mode falls back to semantic when no FTS5 index', async () => {
+    const spy = vi.spyOn(process.stderr, 'write').mockImplementation(() => {});
+    const out = await captureLog(() => search('hello', noFtsDbPath, { minScore: 0.01 }));
+    const stderrOutput = spy.mock.calls.map((c) => c[0]).join('');
+    spy.mockRestore();
+    // Should fall back gracefully and still produce output
+    expect(stderrOutput).toContain('FTS5 index not found');
+    expect(out).toContain('Semantic search');
+  });
+
+  test('multi-query hybrid works', async () => {
+    const out = await captureLog(() => search('auth ; jwt', dbPath, { minScore: 0.01 }));
+    expect(out).toContain('Hybrid');
+    expect(out).toContain('RRF');
+  });
 });
diff --git a/tests/search/embedding-strategy.test.js b/tests/search/embedding-strategy.test.js
index 5db82bb9..e1553678 100644
--- a/tests/search/embedding-strategy.test.js
+++ b/tests/search/embedding-strategy.test.js
@@ -234,6 +234,61 @@ describe('buildEmbeddings defaults to structured', () => {
   });
 });
 
+describe('FTS5 index built alongside embeddings', () => {
+  test('full_text column is populated in embeddings table', async () => {
+    EMBEDDED_TEXTS.length = 0;
+    await buildEmbeddings(tmpDir, 'minilm', dbPath, { strategy: 'structured' });
+
+    const db = new Database(dbPath, { readonly: true });
+    const rows = db.prepare('SELECT full_text FROM embeddings WHERE full_text IS NOT NULL').all();
+    db.close();
+    expect(rows.length).toBeGreaterThan(0);
+    // Each full_text should contain structured text content
+    for (const row of rows) {
+      expect(row.full_text.length).toBeGreaterThan(0);
+    }
+  });
+
+  test('FTS5 row count matches embedding count', async () => {
+    const db = new Database(dbPath, { readonly: true });
+    const embCount = db.prepare('SELECT COUNT(*) as c FROM embeddings').get().c;
+    const ftsCount = db.prepare('SELECT COUNT(*) as c FROM fts_index').get().c;
+    db.close();
+    expect(ftsCount).toBe(embCount);
+  });
+
+  test('FTS5 content matches the structured/source text', async () => {
+    const db = new Database(dbPath, { readonly: true });
+    // FTS5 rowid matches embeddings.node_id
+    const emb = db.prepare('SELECT node_id, full_text FROM embeddings').all();
+    for (const row of emb) {
+      const fts = db.prepare('SELECT content FROM fts_index WHERE rowid = ?').get(row.node_id);
+      expect(fts).toBeDefined();
+      expect(fts.content).toBe(row.full_text);
+    }
+    db.close();
+  });
+
+  test('fts_count is stored in metadata', async () => {
+    const db = new Database(dbPath, { readonly: true });
+    const row = db.prepare("SELECT value FROM embedding_meta WHERE key = 'fts_count'").get();
+    db.close();
+    expect(row).toBeDefined();
+    expect(Number(row.value)).toBeGreaterThan(0);
+  });
+
+  test('FTS5 name column contains symbol names', async () => {
+    const db = new Database(dbPath, { readonly: true });
+    const results = db
+      .prepare("SELECT rowid, name FROM fts_index WHERE fts_index MATCH 'add'")
+      .all();
+    db.close();
+    expect(results.length).toBeGreaterThan(0);
+    const names = results.map((r) => r.name);
+    expect(names).toContain('add');
+  });
+});
+
 describe('context window overflow detection', () => {
   let bigDir, bigDbPath;
 

From b9f597270ca9cdcd1be5b1762e5ad8fb51da3dc4 Mon Sep 17 00:00:00 2001
From: carlos-alm <127798846+carlos-alm@users.noreply.github.com>
Date: Sun, 1 Mar 2026 16:53:24 -0700
Subject: [PATCH 2/5] fix: update MCP test tool list and remove premature
 owners.js export

Add code_owners and branch_compare to ALL_TOOL_NAMES in mcp.test.js
to match the tools added in the previous commit. Remove owners.js
re-export from index.js since that module is not yet on this branch.
---
 src/index.js           | 2 --
 tests/unit/mcp.test.js | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/index.js b/src/index.js
index 3cef9655..73fcb01c 100644
--- a/src/index.js
+++ b/src/index.js
@@ -72,8 +72,6 @@ export { setVerbose } from './logger.js';
 export { manifesto, manifestoData, RULE_DEFS } from './manifesto.js';
 // Native engine
 export { isNativeAvailable } from './native.js';
-// Ownership (CODEOWNERS)
-export { matchOwners, owners, ownersData, ownersForFiles, parseCodeowners } from './owners.js';
 // Pagination utilities
 export { MCP_DEFAULTS, MCP_MAX_LIMIT, paginate, paginateResult } from './paginate.js';
 
diff --git a/tests/unit/mcp.test.js b/tests/unit/mcp.test.js
index a221fafb..1c082085 100644
--- a/tests/unit/mcp.test.js
+++ b/tests/unit/mcp.test.js
@@ -33,6 +33,8 @@ const ALL_TOOL_NAMES = [
   'complexity',
   'manifesto',
   'communities',
+  'code_owners',
+  'branch_compare',
   'list_repos',
 ];
 

From d1eaf8be90497560c5992a99b97bc2627bada967 Mon Sep 17 00:00:00 2001
From: carlos-alm <127798846+carlos-alm@users.noreply.github.com>
Date: Sun, 1 Mar 2026 17:42:30 -0700
Subject: [PATCH 3/5] fix(ci): escape dots in version grep patterns and add
 duplicate-skip guards

Escape dots in VERSION before using as PCRE pattern to prevent false
matches (e.g. 2.4.0 matching 2x4x0). Also add "Check for existing
benchmark" steps to skip re-running benchmarks for already-reported
versions, and detect untracked files in change detection.
---
 .github/workflows/benchmark.yml | 136 ++++++++++++++++++++++++++------
 1 file changed, 112 insertions(+), 24 deletions(-)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index 4e952b56..864ee594 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -56,8 +56,22 @@ jobs:
             echo "version=${{ inputs.version }}" >> "$GITHUB_OUTPUT"
           fi
 
+      - name: Check for existing benchmark
+        id: existing
+        run: |
+          VERSION="${{ steps.mode.outputs.version }}"
+          VERSION_RE="${VERSION//./\\.}"
+          if [ "$VERSION" = "dev" ]; then
+            echo "skip=false" >> "$GITHUB_OUTPUT"
+          elif grep -qP '"version":\s*"'"$VERSION_RE"'"' generated/BUILD-BENCHMARKS.md 2>/dev/null; then
+            echo "Benchmark for $VERSION already exists in BUILD-BENCHMARKS.md — skipping"
+            echo "skip=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "skip=false" >> "$GITHUB_OUTPUT"
+          fi
+
       - name: Wait for npm propagation
-        if: steps.mode.outputs.source == 'npm'
+        if: steps.existing.outputs.skip != 'true' && steps.mode.outputs.source == 'npm'
         run: |
           VERSION="${{ steps.mode.outputs.version }}"
           echo "Waiting for @optave/codegraph@${VERSION} on npm..."
@@ -73,6 +87,7 @@ jobs:
           exit 1
 
       - name: Run build benchmark
+        if: steps.existing.outputs.skip != 'true'
         run: |
           ARGS="--version ${{ steps.mode.outputs.version }}"
           if [ "${{ steps.mode.outputs.source }}" = "npm" ]; then
@@ -81,25 +96,33 @@ jobs:
           node scripts/benchmark.js $ARGS 2>/dev/null > benchmark-result.json
 
       - name: Update build report
+        if: steps.existing.outputs.skip != 'true'
         run: node scripts/update-benchmark-report.js benchmark-result.json
 
       - name: Upload build result
+        if: steps.existing.outputs.skip != 'true'
         uses: actions/upload-artifact@v7
         with:
           name: build-benchmark-result
           path: benchmark-result.json
 
       - name: Check for changes
+        if: steps.existing.outputs.skip != 'true'
         id: changes
         run: |
-          if git diff --quiet HEAD -- generated/BUILD-BENCHMARKS.md README.md; then
-            echo "changed=false" >> "$GITHUB_OUTPUT"
-          else
-            echo "changed=true" >> "$GITHUB_OUTPUT"
+          CHANGED=false
+          # Detect modified tracked files
+          if ! git diff --quiet HEAD -- generated/BUILD-BENCHMARKS.md README.md 2>/dev/null; then
+            CHANGED=true
+          fi
+          # Detect newly created (untracked) files
+          if [ -n "$(git ls-files --others --exclude-standard generated/BUILD-BENCHMARKS.md)" ]; then
+            CHANGED=true
           fi
+          echo "changed=$CHANGED" >> "$GITHUB_OUTPUT"
 
       - name: Commit and push via PR
-        if: steps.changes.outputs.changed == 'true'
+        if: steps.existing.outputs.skip != 'true' && steps.changes.outputs.changed == 'true'
         env:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           VERSION: ${{ steps.mode.outputs.version }}
@@ -164,8 +187,22 @@ jobs:
             echo "version=${{ inputs.version }}" >> "$GITHUB_OUTPUT"
           fi
 
+      - name: Check for existing benchmark
+        id: existing
+        run: |
+          VERSION="${{ steps.mode.outputs.version }}"
+          VERSION_RE="${VERSION//./\\.}"
+          if [ "$VERSION" = "dev" ]; then
+            echo "skip=false" >> "$GITHUB_OUTPUT"
+          elif grep -qP '"version":\s*"'"$VERSION_RE"'"' generated/EMBEDDING-BENCHMARKS.md 2>/dev/null; then
+            echo "Benchmark for $VERSION already exists in EMBEDDING-BENCHMARKS.md — skipping"
+            echo "skip=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "skip=false" >> "$GITHUB_OUTPUT"
+          fi
+
       - name: Wait for npm propagation
-        if: steps.mode.outputs.source == 'npm'
+        if: steps.existing.outputs.skip != 'true' && steps.mode.outputs.source == 'npm'
         run: |
           VERSION="${{ steps.mode.outputs.version }}"
           echo "Waiting for @optave/codegraph@${VERSION} on npm..."
@@ -181,6 +218,7 @@ jobs:
           exit 1
 
       - name: Cache HuggingFace models
+        if: steps.existing.outputs.skip != 'true'
         uses: actions/cache@v5
         with:
           path: ~/.cache/huggingface
@@ -188,9 +226,11 @@ jobs:
           restore-keys: hf-models-${{ runner.os }}-
 
       - name: Build graph
+        if: steps.existing.outputs.skip != 'true'
         run: node src/cli.js build .
 
       - name: Run embedding benchmark
+        if: steps.existing.outputs.skip != 'true'
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
         run: |
@@ -201,25 +241,31 @@ jobs:
           node scripts/embedding-benchmark.js $ARGS 2>/dev/null > embedding-benchmark-result.json
 
       - name: Update embedding report
+        if: steps.existing.outputs.skip != 'true'
         run: node scripts/update-embedding-report.js embedding-benchmark-result.json
 
       - name: Upload embedding result
+        if: steps.existing.outputs.skip != 'true'
         uses: actions/upload-artifact@v7
         with:
           name: embedding-benchmark-result
           path: embedding-benchmark-result.json
 
       - name: Check for changes
+        if: steps.existing.outputs.skip != 'true'
         id: changes
         run: |
-          if git diff --quiet HEAD -- generated/EMBEDDING-BENCHMARKS.md; then
-            echo "changed=false" >> "$GITHUB_OUTPUT"
-          else
-            echo "changed=true" >> "$GITHUB_OUTPUT"
+          CHANGED=false
+          if ! git diff --quiet HEAD -- generated/EMBEDDING-BENCHMARKS.md 2>/dev/null; then
+            CHANGED=true
+          fi
+          if [ -n "$(git ls-files --others --exclude-standard generated/EMBEDDING-BENCHMARKS.md)" ]; then
+            CHANGED=true
           fi
+          echo "changed=$CHANGED" >> "$GITHUB_OUTPUT"
 
       - name: Commit and push via PR
-        if: steps.changes.outputs.changed == 'true'
+        if: steps.existing.outputs.skip != 'true' && steps.changes.outputs.changed == 'true'
         env:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           VERSION: ${{ steps.mode.outputs.version }}
@@ -284,8 +330,22 @@ jobs:
             echo "version=${{ inputs.version }}" >> "$GITHUB_OUTPUT"
           fi
 
+      - name: Check for existing benchmark
+        id: existing
+        run: |
+          VERSION="${{ steps.mode.outputs.version }}"
+          VERSION_RE="${VERSION//./\\.}"
+          if [ "$VERSION" = "dev" ]; then
+            echo "skip=false" >> "$GITHUB_OUTPUT"
+          elif grep -qP '"version":\s*"'"$VERSION_RE"'"' generated/QUERY-BENCHMARKS.md 2>/dev/null; then
+            echo "Benchmark for $VERSION already exists in QUERY-BENCHMARKS.md — skipping"
+            echo "skip=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "skip=false" >> "$GITHUB_OUTPUT"
+          fi
+
       - name: Wait for npm propagation
-        if: steps.mode.outputs.source == 'npm'
+        if: steps.existing.outputs.skip != 'true' && steps.mode.outputs.source == 'npm'
         run: |
           VERSION="${{ steps.mode.outputs.version }}"
           echo "Waiting for @optave/codegraph@${VERSION} on npm..."
@@ -301,6 +361,7 @@ jobs:
           exit 1
 
       - name: Run query benchmark
+        if: steps.existing.outputs.skip != 'true'
         run: |
           ARGS="--version ${{ steps.mode.outputs.version }}"
           if [ "${{ steps.mode.outputs.source }}" = "npm" ]; then
@@ -309,25 +370,31 @@ jobs:
           node scripts/query-benchmark.js $ARGS 2>/dev/null > query-benchmark-result.json
 
       - name: Update query report
+        if: steps.existing.outputs.skip != 'true'
         run: node scripts/update-query-report.js query-benchmark-result.json
 
       - name: Upload query result
+        if: steps.existing.outputs.skip != 'true'
         uses: actions/upload-artifact@v7
         with:
           name: query-benchmark-result
           path: query-benchmark-result.json
 
       - name: Check for changes
+        if: steps.existing.outputs.skip != 'true'
         id: changes
         run: |
-          if git diff --quiet HEAD -- generated/QUERY-BENCHMARKS.md; then
-            echo "changed=false" >> "$GITHUB_OUTPUT"
-          else
-            echo "changed=true" >> "$GITHUB_OUTPUT"
+          CHANGED=false
+          if ! git diff --quiet HEAD -- generated/QUERY-BENCHMARKS.md 2>/dev/null; then
+            CHANGED=true
+          fi
+          if [ -n "$(git ls-files --others --exclude-standard generated/QUERY-BENCHMARKS.md)" ]; then
+            CHANGED=true
           fi
+          echo "changed=$CHANGED" >> "$GITHUB_OUTPUT"
 
       - name: Commit and push via PR
-        if: steps.changes.outputs.changed == 'true'
+        if: steps.existing.outputs.skip != 'true' && steps.changes.outputs.changed == 'true'
         env:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           VERSION: ${{ steps.mode.outputs.version }}
@@ -392,8 +459,22 @@ jobs:
             echo "version=${{ inputs.version }}" >> "$GITHUB_OUTPUT"
           fi
 
+      - name: Check for existing benchmark
+        id: existing
+        run: |
+          VERSION="${{ steps.mode.outputs.version }}"
+          VERSION_RE="${VERSION//./\\.}"
+          if [ "$VERSION" = "dev" ]; then
+            echo "skip=false" >> "$GITHUB_OUTPUT"
+          elif grep -qP '"version":\s*"'"$VERSION_RE"'"' generated/INCREMENTAL-BENCHMARKS.md 2>/dev/null; then
+            echo "Benchmark for $VERSION already exists in INCREMENTAL-BENCHMARKS.md — skipping"
+            echo "skip=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "skip=false" >> "$GITHUB_OUTPUT"
+          fi
+
       - name: Wait for npm propagation
-        if: steps.mode.outputs.source == 'npm'
+        if: steps.existing.outputs.skip != 'true' && steps.mode.outputs.source == 'npm'
         run: |
           VERSION="${{ steps.mode.outputs.version }}"
           echo "Waiting for @optave/codegraph@${VERSION} on npm..."
@@ -409,6 +490,7 @@ jobs:
           exit 1
 
       - name: Run incremental benchmark
+        if: steps.existing.outputs.skip != 'true'
         run: |
           ARGS="--version ${{ steps.mode.outputs.version }}"
           if [ "${{ steps.mode.outputs.source }}" = "npm" ]; then
@@ -417,25 +499,31 @@ jobs:
           node scripts/incremental-benchmark.js $ARGS 2>/dev/null > incremental-benchmark-result.json
 
       - name: Update incremental report
+        if: steps.existing.outputs.skip != 'true'
         run: node scripts/update-incremental-report.js incremental-benchmark-result.json
 
       - name: Upload incremental result
+        if: steps.existing.outputs.skip != 'true'
         uses: actions/upload-artifact@v7
         with:
           name: incremental-benchmark-result
           path: incremental-benchmark-result.json
 
       - name: Check for changes
+        if: steps.existing.outputs.skip != 'true'
         id: changes
         run: |
-          if git diff --quiet HEAD -- generated/INCREMENTAL-BENCHMARKS.md; then
-            echo "changed=false" >> "$GITHUB_OUTPUT"
-          else
-            echo "changed=true" >> "$GITHUB_OUTPUT"
+          CHANGED=false
+          if ! git diff --quiet HEAD -- generated/INCREMENTAL-BENCHMARKS.md 2>/dev/null; then
+            CHANGED=true
+          fi
+          if [ -n "$(git ls-files --others --exclude-standard generated/INCREMENTAL-BENCHMARKS.md)" ]; then
+            CHANGED=true
           fi
+          echo "changed=$CHANGED" >> "$GITHUB_OUTPUT"
 
       - name: Commit and push via PR
-        if: steps.changes.outputs.changed == 'true'
+        if: steps.existing.outputs.skip != 'true' && steps.changes.outputs.changed == 'true'
         env:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           VERSION: ${{ steps.mode.outputs.version }}

From 8ea86afc6a1aad8d9e2a8943e1fb42a7d642f6f7 Mon Sep 17 00:00:00 2001
From: carlos-alm <127798846+carlos-alm@users.noreply.github.com>
Date: Sun, 1 Mar 2026 17:49:33 -0700
Subject: [PATCH 4/5] Revert "fix(ci): escape dots in version grep patterns and
 add duplicate-skip guards"

This reverts commit d1eaf8be90497560c5992a99b97bc2627bada967.
---
 .github/workflows/benchmark.yml | 136 ++++++--------------------------
 1 file changed, 24 insertions(+), 112 deletions(-)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index 864ee594..4e952b56 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -56,22 +56,8 @@ jobs:
             echo "version=${{ inputs.version }}" >> "$GITHUB_OUTPUT"
           fi
 
-      - name: Check for existing benchmark
-        id: existing
-        run: |
-          VERSION="${{ steps.mode.outputs.version }}"
-          VERSION_RE="${VERSION//./\\.}"
-          if [ "$VERSION" = "dev" ]; then
-            echo "skip=false" >> "$GITHUB_OUTPUT"
-          elif grep -qP '"version":\s*"'"$VERSION_RE"'"' generated/BUILD-BENCHMARKS.md 2>/dev/null; then
-            echo "Benchmark for $VERSION already exists in BUILD-BENCHMARKS.md — skipping"
-            echo "skip=true" >> "$GITHUB_OUTPUT"
-          else
-            echo "skip=false" >> "$GITHUB_OUTPUT"
-          fi
-
       - name: Wait for npm propagation
-        if: steps.existing.outputs.skip != 'true' && steps.mode.outputs.source == 'npm'
+        if: steps.mode.outputs.source == 'npm'
         run: |
           VERSION="${{ steps.mode.outputs.version }}"
           echo "Waiting for @optave/codegraph@${VERSION} on npm..."
@@ -87,7 +73,6 @@ jobs:
           exit 1
 
       - name: Run build benchmark
-        if: steps.existing.outputs.skip != 'true'
         run: |
           ARGS="--version ${{ steps.mode.outputs.version }}"
           if [ "${{ steps.mode.outputs.source }}" = "npm" ]; then
@@ -96,33 +81,25 @@ jobs:
           node scripts/benchmark.js $ARGS 2>/dev/null > benchmark-result.json
 
       - name: Update build report
-        if: steps.existing.outputs.skip != 'true'
         run: node scripts/update-benchmark-report.js benchmark-result.json
 
       - name: Upload build result
-        if: steps.existing.outputs.skip != 'true'
         uses: actions/upload-artifact@v7
         with:
           name: build-benchmark-result
           path: benchmark-result.json
 
       - name: Check for changes
-        if: steps.existing.outputs.skip != 'true'
         id: changes
         run: |
-          CHANGED=false
-          # Detect modified tracked files
-          if ! git diff --quiet HEAD -- generated/BUILD-BENCHMARKS.md README.md 2>/dev/null; then
-            CHANGED=true
-          fi
-          # Detect newly created (untracked) files
-          if [ -n "$(git ls-files --others --exclude-standard generated/BUILD-BENCHMARKS.md)" ]; then
-            CHANGED=true
+          if git diff --quiet HEAD -- generated/BUILD-BENCHMARKS.md README.md; then
+            echo "changed=false" >> "$GITHUB_OUTPUT"
+          else
+            echo "changed=true" >> "$GITHUB_OUTPUT"
           fi
-          echo "changed=$CHANGED" >> "$GITHUB_OUTPUT"
 
       - name: Commit and push via PR
-        if: steps.existing.outputs.skip != 'true' && steps.changes.outputs.changed == 'true'
+        if: steps.changes.outputs.changed == 'true'
         env:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           VERSION: ${{ steps.mode.outputs.version }}
@@ -187,22 +164,8 @@ jobs:
             echo "version=${{ inputs.version }}" >> "$GITHUB_OUTPUT"
           fi
 
-      - name: Check for existing benchmark
-        id: existing
-        run: |
-          VERSION="${{ steps.mode.outputs.version }}"
-          VERSION_RE="${VERSION//./\\.}"
-          if [ "$VERSION" = "dev" ]; then
-            echo "skip=false" >> "$GITHUB_OUTPUT"
-          elif grep -qP '"version":\s*"'"$VERSION_RE"'"' generated/EMBEDDING-BENCHMARKS.md 2>/dev/null; then
-            echo "Benchmark for $VERSION already exists in EMBEDDING-BENCHMARKS.md — skipping"
-            echo "skip=true" >> "$GITHUB_OUTPUT"
-          else
-            echo "skip=false" >> "$GITHUB_OUTPUT"
-          fi
-
       - name: Wait for npm propagation
-        if: steps.existing.outputs.skip != 'true' && steps.mode.outputs.source == 'npm'
+        if: steps.mode.outputs.source == 'npm'
         run: |
           VERSION="${{ steps.mode.outputs.version }}"
           echo "Waiting for @optave/codegraph@${VERSION} on npm..."
@@ -218,7 +181,6 @@ jobs:
           exit 1
 
       - name: Cache HuggingFace models
-        if: steps.existing.outputs.skip != 'true'
         uses: actions/cache@v5
         with:
           path: ~/.cache/huggingface
@@ -226,11 +188,9 @@ jobs:
           restore-keys: hf-models-${{ runner.os }}-
 
       - name: Build graph
-        if: steps.existing.outputs.skip != 'true'
         run: node src/cli.js build .
 
       - name: Run embedding benchmark
-        if: steps.existing.outputs.skip != 'true'
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
         run: |
@@ -241,31 +201,25 @@ jobs:
           node scripts/embedding-benchmark.js $ARGS 2>/dev/null > embedding-benchmark-result.json
 
       - name: Update embedding report
-        if: steps.existing.outputs.skip != 'true'
         run: node scripts/update-embedding-report.js embedding-benchmark-result.json
 
       - name: Upload embedding result
-        if: steps.existing.outputs.skip != 'true'
         uses: actions/upload-artifact@v7
         with:
           name: embedding-benchmark-result
           path: embedding-benchmark-result.json
 
       - name: Check for changes
-        if: steps.existing.outputs.skip != 'true'
         id: changes
         run: |
-          CHANGED=false
-          if ! git diff --quiet HEAD -- generated/EMBEDDING-BENCHMARKS.md 2>/dev/null; then
-            CHANGED=true
-          fi
-          if [ -n "$(git ls-files --others --exclude-standard generated/EMBEDDING-BENCHMARKS.md)" ]; then
-            CHANGED=true
+          if git diff --quiet HEAD -- generated/EMBEDDING-BENCHMARKS.md; then
+            echo "changed=false" >> "$GITHUB_OUTPUT"
+          else
+            echo "changed=true" >> "$GITHUB_OUTPUT"
           fi
-          echo "changed=$CHANGED" >> "$GITHUB_OUTPUT"
 
       - name: Commit and push via PR
-        if: steps.existing.outputs.skip != 'true' && steps.changes.outputs.changed == 'true'
+        if: steps.changes.outputs.changed == 'true'
         env:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           VERSION: ${{ steps.mode.outputs.version }}
@@ -330,22 +284,8 @@ jobs:
             echo "version=${{ inputs.version }}" >> "$GITHUB_OUTPUT"
           fi
 
-      - name: Check for existing benchmark
-        id: existing
-        run: |
-          VERSION="${{ steps.mode.outputs.version }}"
-          VERSION_RE="${VERSION//./\\.}"
-          if [ "$VERSION" = "dev" ]; then
-            echo "skip=false" >> "$GITHUB_OUTPUT"
-          elif grep -qP '"version":\s*"'"$VERSION_RE"'"' generated/QUERY-BENCHMARKS.md 2>/dev/null; then
-            echo "Benchmark for $VERSION already exists in QUERY-BENCHMARKS.md — skipping"
-            echo "skip=true" >> "$GITHUB_OUTPUT"
-          else
-            echo "skip=false" >> "$GITHUB_OUTPUT"
-          fi
-
       - name: Wait for npm propagation
-        if: steps.existing.outputs.skip != 'true' && steps.mode.outputs.source == 'npm'
+        if: steps.mode.outputs.source == 'npm'
         run: |
           VERSION="${{ steps.mode.outputs.version }}"
           echo "Waiting for @optave/codegraph@${VERSION} on npm..."
@@ -361,7 +301,6 @@ jobs:
           exit 1
 
       - name: Run query benchmark
-        if: steps.existing.outputs.skip != 'true'
         run: |
           ARGS="--version ${{ steps.mode.outputs.version }}"
           if [ "${{ steps.mode.outputs.source }}" = "npm" ]; then
@@ -370,31 +309,25 @@ jobs:
           node scripts/query-benchmark.js $ARGS 2>/dev/null > query-benchmark-result.json
 
       - name: Update query report
-        if: steps.existing.outputs.skip != 'true'
         run: node scripts/update-query-report.js query-benchmark-result.json
 
       - name: Upload query result
-        if: steps.existing.outputs.skip != 'true'
         uses: actions/upload-artifact@v7
         with:
           name: query-benchmark-result
           path: query-benchmark-result.json
 
       - name: Check for changes
-        if: steps.existing.outputs.skip != 'true'
         id: changes
         run: |
-          CHANGED=false
-          if ! git diff --quiet HEAD -- generated/QUERY-BENCHMARKS.md 2>/dev/null; then
-            CHANGED=true
-          fi
-          if [ -n "$(git ls-files --others --exclude-standard generated/QUERY-BENCHMARKS.md)" ]; then
-            CHANGED=true
+          if git diff --quiet HEAD -- generated/QUERY-BENCHMARKS.md; then
+            echo "changed=false" >> "$GITHUB_OUTPUT"
+          else
+            echo "changed=true" >> "$GITHUB_OUTPUT"
           fi
-          echo "changed=$CHANGED" >> "$GITHUB_OUTPUT"
 
       - name: Commit and push via PR
-        if: steps.existing.outputs.skip != 'true' && steps.changes.outputs.changed == 'true'
+        if: steps.changes.outputs.changed == 'true'
         env:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           VERSION: ${{ steps.mode.outputs.version }}
@@ -459,22 +392,8 @@ jobs:
             echo "version=${{ inputs.version }}" >> "$GITHUB_OUTPUT"
           fi
 
-      - name: Check for existing benchmark
-        id: existing
-        run: |
-          VERSION="${{ steps.mode.outputs.version }}"
-          VERSION_RE="${VERSION//./\\.}"
-          if [ "$VERSION" = "dev" ]; then
-            echo "skip=false" >> "$GITHUB_OUTPUT"
-          elif grep -qP '"version":\s*"'"$VERSION_RE"'"' generated/INCREMENTAL-BENCHMARKS.md 2>/dev/null; then
-            echo "Benchmark for $VERSION already exists in INCREMENTAL-BENCHMARKS.md — skipping"
-            echo "skip=true" >> "$GITHUB_OUTPUT"
-          else
-            echo "skip=false" >> "$GITHUB_OUTPUT"
-          fi
-
       - name: Wait for npm propagation
-        if: steps.existing.outputs.skip != 'true' && steps.mode.outputs.source == 'npm'
+        if: steps.mode.outputs.source == 'npm'
         run: |
           VERSION="${{ steps.mode.outputs.version }}"
           echo "Waiting for @optave/codegraph@${VERSION} on npm..."
@@ -490,7 +409,6 @@ jobs:
           exit 1
 
       - name: Run incremental benchmark
-        if: steps.existing.outputs.skip != 'true'
         run: |
           ARGS="--version ${{ steps.mode.outputs.version }}"
           if [ "${{ steps.mode.outputs.source }}" = "npm" ]; then
@@ -499,31 +417,25 @@ jobs:
           node scripts/incremental-benchmark.js $ARGS 2>/dev/null > incremental-benchmark-result.json
 
       - name: Update incremental report
-        if: steps.existing.outputs.skip != 'true'
         run: node scripts/update-incremental-report.js incremental-benchmark-result.json
 
       - name: Upload incremental result
-        if: steps.existing.outputs.skip != 'true'
         uses: actions/upload-artifact@v7
         with:
           name: incremental-benchmark-result
           path: incremental-benchmark-result.json
 
       - name: Check for changes
-        if: steps.existing.outputs.skip != 'true'
         id: changes
         run: |
-          CHANGED=false
-          if ! git diff --quiet HEAD -- generated/INCREMENTAL-BENCHMARKS.md 2>/dev/null; then
-            CHANGED=true
-          fi
-          if [ -n "$(git ls-files --others --exclude-standard generated/INCREMENTAL-BENCHMARKS.md)" ]; then
-            CHANGED=true
+          if git diff --quiet HEAD -- generated/INCREMENTAL-BENCHMARKS.md; then
+            echo "changed=false" >> "$GITHUB_OUTPUT"
+          else
+            echo "changed=true" >> "$GITHUB_OUTPUT"
           fi
-          echo "changed=$CHANGED" >> "$GITHUB_OUTPUT"
 
       - name: Commit and push via PR
-        if: steps.existing.outputs.skip != 'true' && steps.changes.outputs.changed == 'true'
+        if: steps.changes.outputs.changed == 'true'
         env:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           VERSION: ${{ steps.mode.outputs.version }}

From dc9ca9f0e7e501990a74c19bf5ce4d2e2bb0de6d Mon Sep 17 00:00:00 2001
From: carlos-alm <127798846+carlos-alm@users.noreply.github.com>
Date: Sun, 1 Mar 2026 18:22:06 -0700
Subject: [PATCH 5/5] feat: universal pagination, NDJSON streaming, and
 generator APIs (#207)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat: universal pagination, NDJSON streaming, and generator APIs

Extend limit/offset pagination to all 21 MCP tools that return arrays,
add --ndjson streaming to ~14 CLI commands, and introduce generator
functions for memory-efficient iteration on large codebases.

Phase 1 — Universal Pagination:
- Add PAGINATION_PROPS to 15 additional MCP tool schemas
- Wire limit/offset through all MCP handlers with per-tool defaults
- Apply paginateResult in data functions across queries, complexity,
  communities, manifesto, flow, cochange, and structure modules
- Expand MCP_DEFAULTS with sensible caps for each tool

Phase 2 — NDJSON Streaming:
- Add printNdjson helper to paginate.js
- Add --ndjson/--limit/--offset options to CLI commands
- Refactor existing NDJSON handlers to use shared helper

Phase 3 — Generator/Iterator APIs:
- Add iterListFunctions, iterRoles, iterWhere generators (queries.js)
- Add iterComplexity generator (complexity.js)
- All use better-sqlite3 .iterate() with try/finally cleanup
- Export from index.js public API

Impact: 30 functions changed, 19 affected

* perf(queries): hoist prepared statements out of iterWhere loop

Move crossFileCallers and uses prepared statements above the iteration
loop so they are created once and reused per row, instead of being
re-prepared on every iteration.
---
 src/cli.js                           |  98 +++++++++-
 src/cochange.js                      |   7 +-
 src/communities.js                   |   8 +-
 src/complexity.js                    |  93 ++++++++-
 src/flow.js                          |   8 +-
 src/index.js                         |   6 +-
 src/manifesto.js                     |   9 +-
 src/mcp.js                           |  67 ++++++-
 src/paginate.js                      |  34 ++++
 src/queries.js                       | 191 ++++++++++++++++--
 src/structure.js                     |   7 +-
 tests/integration/context.test.js    |  16 +-
 tests/integration/pagination.test.js | 276 ++++++++++++++++++++++++++-
 tests/unit/mcp.test.js               |  10 +
 14 files changed, 777 insertions(+), 53 deletions(-)

diff --git a/src/cli.js b/src/cli.js
index 41f14272..c6d72cf7 100644
--- a/src/cli.js
+++ b/src/cli.js
@@ -16,6 +16,7 @@ import {
 } from './embedder.js';
 import { exportDOT, exportJSON, exportMermaid } from './export.js';
 import { setVerbose } from './logger.js';
+import { printNdjson } from './paginate.js';
 import {
   ALL_SYMBOL_KINDS,
   context,
@@ -127,8 +128,17 @@ program
   .option('-T, --no-tests', 'Exclude test/spec files from results')
   .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
   .option('-j, --json', 'Output as JSON')
+  .option('--limit <number>', 'Max results to return')
+  .option('--offset <number>', 'Skip N results (default: 0)')
+  .option('--ndjson', 'Newline-delimited JSON output')
   .action((file, opts) => {
-    impactAnalysis(file, opts.db, { noTests: resolveNoTests(opts), json: opts.json });
+    impactAnalysis(file, opts.db, {
+      noTests: resolveNoTests(opts),
+      json: opts.json,
+      limit: opts.limit ? parseInt(opts.limit, 10) : undefined,
+      offset: opts.offset ? parseInt(opts.offset, 10) : undefined,
+      ndjson: opts.ndjson,
+    });
   });
 
 program
@@ -164,8 +174,17 @@ program
   .option('-T, --no-tests', 'Exclude test/spec files from results')
   .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
   .option('-j, --json', 'Output as JSON')
+  .option('--limit <number>', 'Max results to return')
+  .option('--offset <number>', 'Skip N results (default: 0)')
+  .option('--ndjson', 'Newline-delimited JSON output')
   .action((file, opts) => {
-    fileDeps(file, opts.db, { noTests: resolveNoTests(opts), json: opts.json });
+    fileDeps(file, opts.db, {
+      noTests: resolveNoTests(opts),
+      json: opts.json,
+      limit: opts.limit ? parseInt(opts.limit, 10) : undefined,
+      offset: opts.offset ? parseInt(opts.offset, 10) : undefined,
+      ndjson: opts.ndjson,
+    });
   });
 
 program
@@ -178,6 +197,9 @@ program
   .option('-T, --no-tests', 'Exclude test/spec files from results')
   .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
   .option('-j, --json', 'Output as JSON')
+  .option('--limit <number>', 'Max results to return')
+  .option('--offset <number>', 'Skip N results (default: 0)')
+  .option('--ndjson', 'Newline-delimited JSON output')
   .action((name, opts) => {
     if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) {
       console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`);
@@ -189,6 +211,9 @@ program
       kind: opts.kind,
       noTests: resolveNoTests(opts),
       json: opts.json,
+      limit: opts.limit ? parseInt(opts.limit, 10) : undefined,
+      offset: opts.offset ? parseInt(opts.offset, 10) : undefined,
+      ndjson: opts.ndjson,
     });
   });
 
@@ -202,6 +227,9 @@ program
   .option('-T, --no-tests', 'Exclude test/spec files from results')
   .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
   .option('-j, --json', 'Output as JSON')
+  .option('--limit <number>', 'Max results to return')
+  .option('--offset <number>', 'Skip N results (default: 0)')
+  .option('--ndjson', 'Newline-delimited JSON output')
   .action((name, opts) => {
     if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) {
       console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`);
@@ -213,6 +241,9 @@ program
       kind: opts.kind,
       noTests: resolveNoTests(opts),
       json: opts.json,
+      limit: opts.limit ? parseInt(opts.limit, 10) : undefined,
+      offset: opts.offset ? parseInt(opts.offset, 10) : undefined,
+      ndjson: opts.ndjson,
     });
   });
 
@@ -258,6 +289,9 @@ program
   .option('-T, --no-tests', 'Exclude test/spec files from results')
   .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
   .option('-j, --json', 'Output as JSON')
+  .option('--limit <number>', 'Max results to return')
+  .option('--offset <number>', 'Skip N results (default: 0)')
+  .option('--ndjson', 'Newline-delimited JSON output')
   .action((name, opts) => {
     if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) {
       console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`);
@@ -271,6 +305,9 @@ program
       noTests: resolveNoTests(opts),
       includeTests: opts.withTestSource,
       json: opts.json,
+      limit: opts.limit ? parseInt(opts.limit, 10) : undefined,
+      offset: opts.offset ? parseInt(opts.offset, 10) : undefined,
+      ndjson: opts.ndjson,
     });
   });
 
@@ -282,11 +319,17 @@ program
   .option('-T, --no-tests', 'Exclude test/spec files from results')
   .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
   .option('-j, --json', 'Output as JSON')
+  .option('--limit <number>', 'Max results to return')
+  .option('--offset <number>', 'Skip N results (default: 0)')
+  .option('--ndjson', 'Newline-delimited JSON output')
   .action((target, opts) => {
     explain(target, opts.db, {
       depth: parseInt(opts.depth, 10),
       noTests: resolveNoTests(opts),
       json: opts.json,
+      limit: opts.limit ? parseInt(opts.limit, 10) : undefined,
+      offset: opts.offset ? parseInt(opts.offset, 10) : undefined,
+      ndjson: opts.ndjson,
     });
   });
 
@@ -327,6 +370,9 @@ program
   .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
   .option('-j, --json', 'Output as JSON')
   .option('-f, --format <format>', 'Output format: text, mermaid, json', 'text')
+  .option('--limit <number>', 'Max results to return')
+  .option('--offset <number>', 'Skip N results (default: 0)')
+  .option('--ndjson', 'Newline-delimited JSON output')
   .action((ref, opts) => {
     diffImpact(opts.db, {
       ref,
@@ -335,6 +381,9 @@ program
       noTests: resolveNoTests(opts),
       json: opts.json,
       format: opts.format,
+      limit: opts.limit ? parseInt(opts.limit, 10) : undefined,
+      offset: opts.offset ? parseInt(opts.offset, 10) : undefined,
+      ndjson: opts.ndjson,
     });
   });
 
@@ -640,6 +689,8 @@ program
   .option('--rrf-k <number>', 'RRF k parameter for multi-query ranking', '60')
   .option('--mode <mode>', 'Search mode: hybrid, semantic, keyword (default: hybrid)')
   .option('-j, --json', 'Output as JSON')
+  .option('--offset <number>', 'Skip N results (default: 0)')
+  .option('--ndjson', 'Newline-delimited JSON output')
   .action(async (query, opts) => {
     const validModes = ['hybrid', 'semantic', 'keyword'];
     if (opts.mode && !validModes.includes(opts.mode)) {
@@ -671,6 +722,9 @@ program
   .option('-T, --no-tests', 'Exclude test/spec files')
   .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
   .option('-j, --json', 'Output as JSON')
+  .option('--limit <number>', 'Max results to return')
+  .option('--offset <number>', 'Skip N results (default: 0)')
+  .option('--ndjson', 'Newline-delimited JSON output')
   .action(async (dir, opts) => {
     const { structureData, formatStructure } = await import('./structure.js');
     const data = structureData(opts.db, {
@@ -679,8 +733,12 @@ program
       sort: opts.sort,
       full: opts.full,
       noTests: resolveNoTests(opts),
+      limit: opts.limit ? parseInt(opts.limit, 10) : undefined,
+      offset: opts.offset ? parseInt(opts.offset, 10) : undefined,
     });
-    if (opts.json) {
+    if (opts.ndjson) {
+      printNdjson(data, 'directories');
+    } else if (opts.json) {
       console.log(JSON.stringify(data, null, 2));
     } else {
       console.log(formatStructure(data));
@@ -699,15 +757,20 @@ program
   .option('-T, --no-tests', 'Exclude test/spec files from results')
   .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
   .option('-j, --json', 'Output as JSON')
+  .option('--offset <number>', 'Skip N results (default: 0)')
+  .option('--ndjson', 'Newline-delimited JSON output')
   .action(async (opts) => {
     const { hotspotsData, formatHotspots } = await import('./structure.js');
     const data = hotspotsData(opts.db, {
       metric: opts.metric,
       level: opts.level,
       limit: parseInt(opts.limit, 10),
+      offset: opts.offset ? parseInt(opts.offset, 10) : undefined,
       noTests: resolveNoTests(opts),
     });
-    if (opts.json) {
+    if (opts.ndjson) {
+      printNdjson(data, 'hotspots');
+    } else if (opts.json) {
       console.log(JSON.stringify(data, null, 2));
     } else {
       console.log(formatHotspots(data));
@@ -757,6 +820,8 @@ program
   .option('-T, --no-tests', 'Exclude test/spec files')
   .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
   .option('-j, --json', 'Output as JSON')
+  .option('--offset <number>', 'Skip N results (default: 0)')
+  .option('--ndjson', 'Newline-delimited JSON output')
   .action(async (file, opts) => {
     const { analyzeCoChanges, coChangeData, coChangeTopData, formatCoChange, formatCoChangeTop } =
       await import('./cochange.js');
@@ -783,20 +848,25 @@ program
 
     const queryOpts = {
       limit: parseInt(opts.limit, 10),
+      offset: opts.offset ? parseInt(opts.offset, 10) : undefined,
       minJaccard: opts.minJaccard ? parseFloat(opts.minJaccard) : config.coChange?.minJaccard,
       noTests: resolveNoTests(opts),
     };
 
     if (file) {
       const data = coChangeData(file, opts.db, queryOpts);
-      if (opts.json) {
+      if (opts.ndjson) {
+        printNdjson(data, 'partners');
+      } else if (opts.json) {
         console.log(JSON.stringify(data, null, 2));
       } else {
         console.log(formatCoChange(data));
       }
     } else {
       const data = coChangeTopData(opts.db, queryOpts);
-      if (opts.json) {
+      if (opts.ndjson) {
+        printNdjson(data, 'pairs');
+      } else if (opts.json) {
         console.log(JSON.stringify(data, null, 2));
       } else {
         console.log(formatCoChangeTop(data));
@@ -860,6 +930,8 @@ program
   .option('-T, --no-tests', 'Exclude test/spec files from results')
   .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
   .option('-j, --json', 'Output as JSON')
+  .option('--offset <number>', 'Skip N results (default: 0)')
+  .option('--ndjson', 'Newline-delimited JSON output')
   .action(async (target, opts) => {
     if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) {
       console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`);
@@ -869,6 +941,7 @@ program
     complexity(opts.db, {
       target,
       limit: parseInt(opts.limit, 10),
+      offset: opts.offset ? parseInt(opts.offset, 10) : undefined,
       sort: opts.sort,
       aboveThreshold: opts.aboveThreshold,
       health: opts.health,
@@ -876,6 +949,7 @@ program
       kind: opts.kind,
       noTests: resolveNoTests(opts),
       json: opts.json,
+      ndjson: opts.ndjson,
     });
   });
 
@@ -888,6 +962,9 @@ program
   .option('-f, --file <path>', 'Scope to file (partial match)')
   .option('-k, --kind <kind>', 'Filter by symbol kind')
   .option('-j, --json', 'Output as JSON')
+  .option('--limit <number>', 'Max results to return')
+  .option('--offset <number>', 'Skip N results (default: 0)')
+  .option('--ndjson', 'Newline-delimited JSON output')
   .action(async (opts) => {
     if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) {
       console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`);
@@ -899,6 +976,9 @@ program
       kind: opts.kind,
       noTests: resolveNoTests(opts),
       json: opts.json,
+      limit: opts.limit ? parseInt(opts.limit, 10) : undefined,
+      offset: opts.offset ? parseInt(opts.offset, 10) : undefined,
+      ndjson: opts.ndjson,
     });
   });
 
@@ -912,6 +992,9 @@ program
   .option('-T, --no-tests', 'Exclude test/spec files from results')
   .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
   .option('-j, --json', 'Output as JSON')
+  .option('--limit <number>', 'Max results to return')
+  .option('--offset <number>', 'Skip N results (default: 0)')
+  .option('--ndjson', 'Newline-delimited JSON output')
   .action(async (opts) => {
     const { communities } = await import('./communities.js');
     communities(opts.db, {
@@ -920,6 +1003,9 @@ program
       drift: opts.drift,
       noTests: resolveNoTests(opts),
       json: opts.json,
+      limit: opts.limit ? parseInt(opts.limit, 10) : undefined,
+      offset: opts.offset ? parseInt(opts.offset, 10) : undefined,
+      ndjson: opts.ndjson,
     });
   });
 
diff --git a/src/cochange.js b/src/cochange.js
index b08ce8db..d1fb2ed3 100644
--- a/src/cochange.js
+++ b/src/cochange.js
@@ -11,6 +11,7 @@ import path from 'node:path';
 import { normalizePath } from './constants.js';
 import { closeDb, findDbPath, initSchema, openDb, openReadonlyOrFail } from './db.js';
 import { warn } from './logger.js';
+import { paginateResult } from './paginate.js';
 import { isTestFile } from './queries.js';
 
 /**
@@ -313,7 +314,8 @@ export function coChangeData(file, customDbPath, opts = {}) {
   const meta = getCoChangeMeta(db);
   closeDb(db);
 
-  return { file: resolvedFile, partners, meta };
+  const base = { file: resolvedFile, partners, meta };
+  return paginateResult(base, 'partners', { limit: opts.limit, offset: opts.offset });
 }
 
 /**
@@ -365,7 +367,8 @@ export function coChangeTopData(customDbPath, opts = {}) {
   const meta = getCoChangeMeta(db);
   closeDb(db);
 
-  return { pairs, meta };
+  const base = { pairs, meta };
+  return paginateResult(base, 'pairs', { limit: opts.limit, offset: opts.offset });
 }
 
 /**
diff --git a/src/communities.js b/src/communities.js
index 7eba4071..926b611b 100644
--- a/src/communities.js
+++ b/src/communities.js
@@ -2,6 +2,7 @@ import path from 'node:path';
 import Graph from 'graphology';
 import louvain from 'graphology-communities-louvain';
 import { openReadonlyOrFail } from './db.js';
+import { paginateResult, printNdjson } from './paginate.js';
 import { isTestFile } from './queries.js';
 
 // ─── Graph Construction ───────────────────────────────────────────────
@@ -201,7 +202,7 @@ export function communitiesData(customDbPath, opts = {}) {
 
   const driftScore = Math.round(((splitRatio + mergeRatio) / 2) * 100);
 
-  return {
+  const base = {
     communities: opts.drift ? [] : communities,
     modularity: +modularity.toFixed(4),
     drift: { splitCandidates, mergeCandidates },
@@ -212,6 +213,7 @@ export function communitiesData(customDbPath, opts = {}) {
       driftScore,
     },
   };
+  return paginateResult(base, 'communities', { limit: opts.limit, offset: opts.offset });
 }
 
 /**
@@ -238,6 +240,10 @@ export function communitySummaryForStats(customDbPath, opts = {}) {
 export function communities(customDbPath, opts = {}) {
   const data = communitiesData(customDbPath, opts);
 
+  if (opts.ndjson) {
+    printNdjson(data, 'communities');
+    return;
+  }
   if (opts.json) {
     console.log(JSON.stringify(data, null, 2));
     return;
diff --git a/src/complexity.js b/src/complexity.js
index 01ffee18..30bee701 100644
--- a/src/complexity.js
+++ b/src/complexity.js
@@ -3,6 +3,7 @@ import path from 'node:path';
 import { loadConfig } from './config.js';
 import { openReadonlyOrFail } from './db.js';
 import { info } from './logger.js';
+import { paginateResult, printNdjson } from './paginate.js';
 import { LANGUAGE_REGISTRY } from './parser.js';
 import { isTestFile } from './queries.js';
 
@@ -1887,10 +1888,9 @@ export function complexityData(customDbPath, opts = {}) {
        FROM function_complexity fc
        JOIN nodes n ON fc.node_id = n.id
        ${where} ${having}
-       ORDER BY ${orderBy}
-       LIMIT ?`,
+       ORDER BY ${orderBy}`,
       )
-      .all(...params, limit);
+      .all(...params);
   } catch {
     db.close();
     return { functions: [], summary: null, thresholds };
@@ -1980,7 +1980,88 @@ export function complexityData(customDbPath, opts = {}) {
   }
 
   db.close();
-  return { functions, summary, thresholds };
+  const base = { functions, summary, thresholds };
+  return paginateResult(base, 'functions', { limit: opts.limit, offset: opts.offset });
+}
+
+/**
+ * Generator: stream complexity rows one-by-one using .iterate() for memory efficiency.
+ * @param {string} [customDbPath]
+ * @param {object} [opts]
+ * @param {boolean} [opts.noTests]
+ * @param {string} [opts.file]
+ * @param {string} [opts.target]
+ * @param {string} [opts.kind]
+ * @param {string} [opts.sort]
+ * @yields {{ name: string, kind: string, file: string, line: number, cognitive: number, cyclomatic: number, maxNesting: number, loc: number, sloc: number }}
+ */
+export function* iterComplexity(customDbPath, opts = {}) {
+  const db = openReadonlyOrFail(customDbPath);
+  try {
+    const noTests = opts.noTests || false;
+    const sort = opts.sort || 'cognitive';
+
+    let where = "WHERE n.kind IN ('function','method')";
+    const params = [];
+
+    if (noTests) {
+      where += ` AND n.file NOT LIKE '%.test.%'
+         AND n.file NOT LIKE '%.spec.%'
+         AND n.file NOT LIKE '%__test__%'
+         AND n.file NOT LIKE '%__tests__%'
+         AND n.file NOT LIKE '%.stories.%'`;
+    }
+    if (opts.target) {
+      where += ' AND n.name LIKE ?';
+      params.push(`%${opts.target}%`);
+    }
+    if (opts.file) {
+      where += ' AND n.file LIKE ?';
+      params.push(`%${opts.file}%`);
+    }
+    if (opts.kind) {
+      where += ' AND n.kind = ?';
+      params.push(opts.kind);
+    }
+
+    const orderMap = {
+      cognitive: 'fc.cognitive DESC',
+      cyclomatic: 'fc.cyclomatic DESC',
+      nesting: 'fc.max_nesting DESC',
+      mi: 'fc.maintainability_index ASC',
+      volume: 'fc.halstead_volume DESC',
+      effort: 'fc.halstead_effort DESC',
+      bugs: 'fc.halstead_bugs DESC',
+      loc: 'fc.loc DESC',
+    };
+    const orderBy = orderMap[sort] || 'fc.cognitive DESC';
+
+    const stmt = db.prepare(
+      `SELECT n.name, n.kind, n.file, n.line, n.end_line,
+              fc.cognitive, fc.cyclomatic, fc.max_nesting, fc.loc, fc.sloc
+       FROM function_complexity fc
+       JOIN nodes n ON fc.node_id = n.id
+       ${where}
+       ORDER BY ${orderBy}`,
+    );
+    for (const r of stmt.iterate(...params)) {
+      if (noTests && isTestFile(r.file)) continue;
+      yield {
+        name: r.name,
+        kind: r.kind,
+        file: r.file,
+        line: r.line,
+        endLine: r.end_line || null,
+        cognitive: r.cognitive,
+        cyclomatic: r.cyclomatic,
+        maxNesting: r.max_nesting,
+        loc: r.loc || 0,
+        sloc: r.sloc || 0,
+      };
+    }
+  } finally {
+    db.close();
+  }
 }
 
 /**
@@ -1989,6 +2070,10 @@ export function complexityData(customDbPath, opts = {}) {
 export function complexity(customDbPath, opts = {}) {
   const data = complexityData(customDbPath, opts);
 
+  if (opts.ndjson) {
+    printNdjson(data, 'functions');
+    return;
+  }
   if (opts.json) {
     console.log(JSON.stringify(data, null, 2));
     return;
diff --git a/src/flow.js b/src/flow.js
index 93381652..ab59fe45 100644
--- a/src/flow.js
+++ b/src/flow.js
@@ -6,7 +6,7 @@
  */
 
 import { openReadonlyOrFail } from './db.js';
-import { paginateResult } from './paginate.js';
+import { paginateResult, printNdjson } from './paginate.js';
 import { isTestFile, kindIcon } from './queries.js';
 import { FRAMEWORK_ENTRY_PREFIXES } from './structure.js';
 
@@ -204,7 +204,7 @@ export function flowData(name, dbPath, opts = {}) {
   }
 
   db.close();
-  return {
+  const base = {
     entry,
     depth: maxDepth,
     steps,
@@ -213,6 +213,7 @@ export function flowData(name, dbPath, opts = {}) {
     totalReached: visited.size - 1, // exclude the entry node itself
     truncated,
   };
+  return paginateResult(base, 'steps', { limit: opts.limit, offset: opts.offset });
 }
 
 /**
@@ -293,8 +294,7 @@ export function flow(name, dbPath, opts = {}) {
       offset: opts.offset,
     });
     if (opts.ndjson) {
-      if (data._pagination) console.log(JSON.stringify({ _meta: data._pagination }));
-      for (const e of data.entries) console.log(JSON.stringify(e));
+      printNdjson(data, 'entries');
       return;
     }
     if (opts.json) {
diff --git a/src/index.js b/src/index.js
index b195d8c6..c9f5f862 100644
--- a/src/index.js
+++ b/src/index.js
@@ -30,6 +30,7 @@ export {
   computeLOCMetrics,
   computeMaintainabilityIndex,
   HALSTEAD_RULES,
+  iterComplexity,
 } from './complexity.js';
 // Configuration
 export { loadConfig } from './config.js';
@@ -75,7 +76,7 @@ export { isNativeAvailable } from './native.js';
 // Ownership (CODEOWNERS)
 export { matchOwners, owners, ownersData, ownersForFiles, parseCodeowners } from './owners.js';
 // Pagination utilities
-export { MCP_DEFAULTS, MCP_MAX_LIMIT, paginate, paginateResult } from './paginate.js';
+export { MCP_DEFAULTS, MCP_MAX_LIMIT, paginate, paginateResult, printNdjson } from './paginate.js';
 
 // Unified parser API
 export { getActiveEngine, parseFileAuto, parseFilesAuto } from './parser.js';
@@ -92,6 +93,9 @@ export {
   fnDepsData,
   fnImpactData,
   impactAnalysisData,
+  iterListFunctions,
+  iterRoles,
+  iterWhere,
   kindIcon,
   moduleMapData,
   pathData,
diff --git a/src/manifesto.js b/src/manifesto.js
index 8fc907ff..3549860a 100644
--- a/src/manifesto.js
+++ b/src/manifesto.js
@@ -2,6 +2,7 @@ import { loadConfig } from './config.js';
 import { findCycles } from './cycles.js';
 import { openReadonlyOrFail } from './db.js';
 import { debug } from './logger.js';
+import { paginateResult, printNdjson } from './paginate.js';
 
 // ─── Rule Definitions ─────────────────────────────────────────────────
 
@@ -354,12 +355,13 @@ export function manifestoData(customDbPath, opts = {}) {
       violationCount: violations.length,
     };
 
-    return {
+    const base = {
       rules: ruleResults,
       violations,
       summary,
       passed: failViolations.length === 0,
     };
+    return paginateResult(base, 'violations', { limit: opts.limit, offset: opts.offset });
   } finally {
     db.close();
   }
@@ -371,6 +373,11 @@ export function manifestoData(customDbPath, opts = {}) {
 export function manifesto(customDbPath, opts = {}) {
   const data = manifestoData(customDbPath, opts);
 
+  if (opts.ndjson) {
+    printNdjson(data, 'violations');
+    if (!data.passed) process.exit(1);
+    return;
+  }
   if (opts.json) {
     console.log(JSON.stringify(data, null, 2));
     if (!data.passed) process.exit(1);
diff --git a/src/mcp.js b/src/mcp.js
index ee11bb3c..19732931 100644
--- a/src/mcp.js
+++ b/src/mcp.js
@@ -50,6 +50,7 @@ const BASE_TOOLS = [
       properties: {
         file: { type: 'string', description: 'File path (partial match supported)' },
         no_tests: { type: 'boolean', description: 'Exclude test files', default: false },
+        ...PAGINATION_PROPS,
       },
       required: ['file'],
     },
@@ -62,6 +63,7 @@ const BASE_TOOLS = [
       properties: {
         file: { type: 'string', description: 'File path to analyze' },
         no_tests: { type: 'boolean', description: 'Exclude test files', default: false },
+        ...PAGINATION_PROPS,
       },
       required: ['file'],
     },
@@ -103,6 +105,7 @@ const BASE_TOOLS = [
           description: 'Filter to a specific symbol kind',
         },
         no_tests: { type: 'boolean', description: 'Exclude test files', default: false },
+        ...PAGINATION_PROPS,
       },
       required: ['name'],
     },
@@ -126,6 +129,7 @@ const BASE_TOOLS = [
           description: 'Filter to a specific symbol kind',
         },
         no_tests: { type: 'boolean', description: 'Exclude test files', default: false },
+        ...PAGINATION_PROPS,
       },
       required: ['name'],
     },
@@ -190,6 +194,7 @@ const BASE_TOOLS = [
           description: 'Include test file source code',
           default: false,
         },
+        ...PAGINATION_PROPS,
       },
       required: ['name'],
     },
@@ -203,6 +208,7 @@ const BASE_TOOLS = [
       properties: {
         target: { type: 'string', description: 'File path or function name' },
         no_tests: { type: 'boolean', description: 'Exclude test files', default: false },
+        ...PAGINATION_PROPS,
       },
       required: ['target'],
     },
@@ -241,6 +247,7 @@ const BASE_TOOLS = [
           enum: ['json', 'mermaid'],
           description: 'Output format (default: json)',
         },
+        ...PAGINATION_PROPS,
       },
     },
   },
@@ -260,6 +267,7 @@ const BASE_TOOLS = [
           description:
             'Search mode: hybrid (BM25 + semantic, default), semantic (embeddings only), keyword (BM25 only)',
         },
+        ...PAGINATION_PROPS,
       },
       required: ['query'],
     },
@@ -318,6 +326,7 @@ const BASE_TOOLS = [
           description: 'Return all files without limit',
           default: false,
         },
+        ...PAGINATION_PROPS,
       },
     },
   },
@@ -358,6 +367,7 @@ const BASE_TOOLS = [
         },
         limit: { type: 'number', description: 'Number of results to return', default: 10 },
         no_tests: { type: 'boolean', description: 'Exclude test files', default: false },
+        offset: { type: 'number', description: 'Skip this many results (pagination, default: 0)' },
       },
     },
   },
@@ -379,6 +389,7 @@ const BASE_TOOLS = [
           default: 0.3,
         },
         no_tests: { type: 'boolean', description: 'Exclude test files', default: false },
+        offset: { type: 'number', description: 'Skip this many results (pagination, default: 0)' },
       },
     },
   },
@@ -405,6 +416,7 @@ const BASE_TOOLS = [
           description: 'Filter to a specific symbol kind',
         },
         no_tests: { type: 'boolean', description: 'Exclude test files', default: false },
+        ...PAGINATION_PROPS,
       },
       required: ['name'],
     },
@@ -452,6 +464,7 @@ const BASE_TOOLS = [
           type: 'string',
           description: 'Filter by symbol kind (function, method, class, etc.)',
         },
+        offset: { type: 'number', description: 'Skip this many results (pagination, default: 0)' },
       },
     },
   },
@@ -468,6 +481,7 @@ const BASE_TOOLS = [
           type: 'string',
           description: 'Filter by symbol kind (function, method, class, etc.)',
         },
+        ...PAGINATION_PROPS,
       },
     },
   },
@@ -494,6 +508,7 @@ const BASE_TOOLS = [
           default: false,
         },
         no_tests: { type: 'boolean', description: 'Exclude test files', default: false },
+        ...PAGINATION_PROPS,
       },
     },
   },
@@ -671,10 +686,18 @@ export async function startMCPServer(customDbPath, options = {}) {
           });
           break;
         case 'file_deps':
-          result = fileDepsData(args.file, dbPath, { noTests: args.no_tests });
+          result = fileDepsData(args.file, dbPath, {
+            noTests: args.no_tests,
+            limit: Math.min(args.limit ?? MCP_DEFAULTS.file_deps, MCP_MAX_LIMIT),
+            offset: args.offset ?? 0,
+          });
           break;
         case 'impact_analysis':
-          result = impactAnalysisData(args.file, dbPath, { noTests: args.no_tests });
+          result = impactAnalysisData(args.file, dbPath, {
+            noTests: args.no_tests,
+            limit: Math.min(args.limit ?? MCP_DEFAULTS.impact_analysis, MCP_MAX_LIMIT),
+            offset: args.offset ?? 0,
+          });
           break;
         case 'find_cycles': {
           const db = new Database(findDbPath(dbPath), { readonly: true });
@@ -692,6 +715,8 @@ export async function startMCPServer(customDbPath, options = {}) {
             file: args.file,
             kind: args.kind,
             noTests: args.no_tests,
+            limit: Math.min(args.limit ?? MCP_DEFAULTS.fn_deps, MCP_MAX_LIMIT),
+            offset: args.offset ?? 0,
           });
           break;
         case 'fn_impact':
@@ -700,6 +725,8 @@ export async function startMCPServer(customDbPath, options = {}) {
             file: args.file,
             kind: args.kind,
             noTests: args.no_tests,
+            limit: Math.min(args.limit ?? MCP_DEFAULTS.fn_impact, MCP_MAX_LIMIT),
+            offset: args.offset ?? 0,
           });
           break;
         case 'symbol_path':
@@ -721,10 +748,16 @@ export async function startMCPServer(customDbPath, options = {}) {
             noSource: args.no_source,
             noTests: args.no_tests,
             includeTests: args.include_tests,
+            limit: Math.min(args.limit ?? MCP_DEFAULTS.context, MCP_MAX_LIMIT),
+            offset: args.offset ?? 0,
           });
           break;
         case 'explain':
-          result = explainData(args.target, dbPath, { noTests: args.no_tests });
+          result = explainData(args.target, dbPath, {
+            noTests: args.no_tests,
+            limit: Math.min(args.limit ?? MCP_DEFAULTS.explain, MCP_MAX_LIMIT),
+            offset: args.offset ?? 0,
+          });
           break;
         case 'where':
           result = whereData(args.target, dbPath, {
@@ -748,12 +781,18 @@ export async function startMCPServer(customDbPath, options = {}) {
               ref: args.ref,
               depth: args.depth,
               noTests: args.no_tests,
+              limit: Math.min(args.limit ?? MCP_DEFAULTS.diff_impact, MCP_MAX_LIMIT),
+              offset: args.offset ?? 0,
             });
           }
           break;
         case 'semantic_search': {
           const mode = args.mode || 'hybrid';
-          const searchOpts = { limit: args.limit, minScore: args.min_score };
+          const searchOpts = {
+            limit: Math.min(args.limit ?? MCP_DEFAULTS.semantic_search, MCP_MAX_LIMIT),
+            offset: args.offset ?? 0,
+            minScore: args.min_score,
+          };
 
           if (mode === 'keyword') {
             const { ftsSearchData } = await import('./embedder.js');
@@ -864,6 +903,8 @@ export async function startMCPServer(customDbPath, options = {}) {
             depth: args.depth,
             sort: args.sort,
             full: args.full,
+            limit: Math.min(args.limit ?? MCP_DEFAULTS.structure, MCP_MAX_LIMIT),
+            offset: args.offset ?? 0,
           });
           break;
         }
@@ -872,7 +913,8 @@ export async function startMCPServer(customDbPath, options = {}) {
           result = hotspotsData(dbPath, {
             metric: args.metric,
             level: args.level,
-            limit: args.limit,
+            limit: Math.min(args.limit ?? MCP_DEFAULTS.hotspots, MCP_MAX_LIMIT),
+            offset: args.offset ?? 0,
             noTests: args.no_tests,
           });
           break;
@@ -881,12 +923,14 @@ export async function startMCPServer(customDbPath, options = {}) {
           const { coChangeData, coChangeTopData } = await import('./cochange.js');
           result = args.file
             ? coChangeData(args.file, dbPath, {
-                limit: args.limit,
+                limit: Math.min(args.limit ?? MCP_DEFAULTS.co_changes, MCP_MAX_LIMIT),
+                offset: args.offset ?? 0,
                 minJaccard: args.min_jaccard,
                 noTests: args.no_tests,
               })
             : coChangeTopData(dbPath, {
-                limit: args.limit,
+                limit: Math.min(args.limit ?? MCP_DEFAULTS.co_changes, MCP_MAX_LIMIT),
+                offset: args.offset ?? 0,
                 minJaccard: args.min_jaccard,
                 noTests: args.no_tests,
               });
@@ -899,6 +943,8 @@ export async function startMCPServer(customDbPath, options = {}) {
             file: args.file,
             kind: args.kind,
             noTests: args.no_tests,
+            limit: Math.min(args.limit ?? MCP_DEFAULTS.execution_flow, MCP_MAX_LIMIT),
+            offset: args.offset ?? 0,
           });
           break;
         }
@@ -916,7 +962,8 @@ export async function startMCPServer(customDbPath, options = {}) {
           result = complexityData(dbPath, {
             target: args.name,
             file: args.file,
-            limit: args.limit,
+            limit: Math.min(args.limit ?? MCP_DEFAULTS.complexity, MCP_MAX_LIMIT),
+            offset: args.offset ?? 0,
             sort: args.sort,
             aboveThreshold: args.above_threshold,
             health: args.health,
@@ -931,6 +978,8 @@ export async function startMCPServer(customDbPath, options = {}) {
             file: args.file,
             noTests: args.no_tests,
             kind: args.kind,
+            limit: Math.min(args.limit ?? MCP_DEFAULTS.manifesto, MCP_MAX_LIMIT),
+            offset: args.offset ?? 0,
           });
           break;
         }
@@ -941,6 +990,8 @@ export async function startMCPServer(customDbPath, options = {}) {
             resolution: args.resolution,
             drift: args.drift,
             noTests: args.no_tests,
+            limit: Math.min(args.limit ?? MCP_DEFAULTS.communities, MCP_MAX_LIMIT),
+            offset: args.offset ?? 0,
           });
           break;
         }
diff --git a/src/paginate.js b/src/paginate.js
index 7109f0bc..a93ec1da 100644
--- a/src/paginate.js
+++ b/src/paginate.js
@@ -7,12 +7,29 @@
 
 /** Default limits applied by MCP tool handlers (not by the programmatic API). */
 export const MCP_DEFAULTS = {
+  // Existing
   list_functions: 100,
   query_function: 50,
   where: 50,
   node_roles: 100,
   list_entry_points: 100,
   export_graph: 500,
+  // Smaller defaults for rich/nested results
+  fn_deps: 10,
+  fn_impact: 5,
+  context: 5,
+  explain: 10,
+  file_deps: 20,
+  diff_impact: 30,
+  impact_analysis: 20,
+  semantic_search: 20,
+  execution_flow: 50,
+  hotspots: 20,
+  co_changes: 20,
+  complexity: 30,
+  manifesto: 50,
+  communities: 20,
+  structure: 30,
 };
 
 /** Hard cap to prevent abuse via MCP. */
@@ -68,3 +85,20 @@ export function paginateResult(result, field, { limit, offset } = {}) {
   const { items, pagination } = paginate(arr, { limit, offset });
   return { ...result, [field]: items, _pagination: pagination };
 }
+
+/**
+ * Print data as newline-delimited JSON (NDJSON).
+ *
+ * Emits a `_meta` line with pagination info (if present), then one JSON
+ * line per item in the named array field.
+ *
+ * @param {object} data   - Result object (may contain `_pagination`)
+ * @param {string} field  - Array field name to stream (e.g. `'results'`)
+ */
+export function printNdjson(data, field) {
+  if (data._pagination) console.log(JSON.stringify({ _meta: data._pagination }));
+  const items = data[field];
+  if (Array.isArray(items)) {
+    for (const item of items) console.log(JSON.stringify(item));
+  }
+}
diff --git a/src/queries.js b/src/queries.js
index 9b1929ab..2a8df478 100644
--- a/src/queries.js
+++ b/src/queries.js
@@ -6,7 +6,7 @@ import { findCycles } from './cycles.js';
 import { findDbPath, openReadonlyOrFail } from './db.js';
 import { debug } from './logger.js';
 import { ownersForFiles } from './owners.js';
-import { paginateResult } from './paginate.js';
+import { paginateResult, printNdjson } from './paginate.js';
 import { LANGUAGE_REGISTRY } from './parser.js';
 
 /**
@@ -392,7 +392,8 @@ export function fileDepsData(file, customDbPath, opts = {}) {
   });
 
   db.close();
-  return { file, results };
+  const base = { file, results };
+  return paginateResult(base, 'results', { limit: opts.limit, offset: opts.offset });
 }
 
 export function fnDepsData(name, customDbPath, opts = {}) {
@@ -512,7 +513,8 @@ export function fnDepsData(name, customDbPath, opts = {}) {
   });
 
   db.close();
-  return { name, results };
+  const base = { name, results };
+  return paginateResult(base, 'results', { limit: opts.limit, offset: opts.offset });
 }
 
 export function fnImpactData(name, customDbPath, opts = {}) {
@@ -526,7 +528,7 @@ export function fnImpactData(name, customDbPath, opts = {}) {
     return { name, results: [] };
   }
 
-  const results = nodes.slice(0, 3).map((node) => {
+  const results = nodes.map((node) => {
     const visited = new Set([node.id]);
     const levels = {};
     let frontier = [node.id];
@@ -565,7 +567,8 @@ export function fnImpactData(name, customDbPath, opts = {}) {
   });
 
   db.close();
-  return { name, results };
+  const base = { name, results };
+  return paginateResult(base, 'results', { limit: opts.limit, offset: opts.offset });
 }
 
 export function pathData(from, to, customDbPath, opts = {}) {
@@ -1016,7 +1019,7 @@ export function diffImpactData(customDbPath, opts = {}) {
   }
 
   db.close();
-  return {
+  const base = {
     changedFiles: changedRanges.size,
     newFiles: [...newFiles],
     affectedFunctions: functionResults,
@@ -1031,6 +1034,7 @@ export function diffImpactData(customDbPath, opts = {}) {
       ownersAffected: ownership ? ownership.affectedOwners.length : 0,
     },
   };
+  return paginateResult(base, 'affectedFunctions', { limit: opts.limit, offset: opts.offset });
 }
 
 export function diffImpactMermaid(customDbPath, opts = {}) {
@@ -1178,6 +1182,131 @@ export function listFunctionsData(customDbPath, opts = {}) {
   return paginateResult(base, 'functions', { limit: opts.limit, offset: opts.offset });
 }
 
+/**
+ * Generator: stream functions one-by-one using .iterate() for memory efficiency.
+ * @param {string} [customDbPath]
+ * @param {object} [opts]
+ * @param {boolean} [opts.noTests]
+ * @param {string} [opts.file]
+ * @param {string} [opts.pattern]
+ * @yields {{ name: string, kind: string, file: string, line: number, role: string|null }}
+ */
+export function* iterListFunctions(customDbPath, opts = {}) {
+  const db = openReadonlyOrFail(customDbPath);
+  try {
+    const noTests = opts.noTests || false;
+    const kinds = ['function', 'method', 'class'];
+    const placeholders = kinds.map(() => '?').join(', ');
+
+    const conditions = [`kind IN (${placeholders})`];
+    const params = [...kinds];
+
+    if (opts.file) {
+      conditions.push('file LIKE ?');
+      params.push(`%${opts.file}%`);
+    }
+    if (opts.pattern) {
+      conditions.push('name LIKE ?');
+      params.push(`%${opts.pattern}%`);
+    }
+
+    const stmt = db.prepare(
+      `SELECT name, kind, file, line, role FROM nodes WHERE ${conditions.join(' AND ')} ORDER BY file, line`,
+    );
+    for (const row of stmt.iterate(...params)) {
+      if (noTests && isTestFile(row.file)) continue;
+      yield { name: row.name, kind: row.kind, file: row.file, line: row.line, role: row.role };
+    }
+  } finally {
+    db.close();
+  }
+}
+
+/**
+ * Generator: stream role-classified symbols one-by-one.
+ * @param {string} [customDbPath]
+ * @param {object} [opts]
+ * @param {boolean} [opts.noTests]
+ * @param {string} [opts.role]
+ * @param {string} [opts.file]
+ * @yields {{ name: string, kind: string, file: string, line: number, role: string }}
+ */
+export function* iterRoles(customDbPath, opts = {}) {
+  const db = openReadonlyOrFail(customDbPath);
+  try {
+    const noTests = opts.noTests || false;
+    const conditions = ['role IS NOT NULL'];
+    const params = [];
+
+    if (opts.role) {
+      conditions.push('role = ?');
+      params.push(opts.role);
+    }
+    if (opts.file) {
+      conditions.push('file LIKE ?');
+      params.push(`%${opts.file}%`);
+    }
+
+    const stmt = db.prepare(
+      `SELECT name, kind, file, line, role FROM nodes WHERE ${conditions.join(' AND ')} ORDER BY role, file, line`,
+    );
+    for (const row of stmt.iterate(...params)) {
+      if (noTests && isTestFile(row.file)) continue;
+      yield { name: row.name, kind: row.kind, file: row.file, line: row.line, role: row.role };
+    }
+  } finally {
+    db.close();
+  }
+}
+
+/**
+ * Generator: stream symbol lookup results one-by-one.
+ * @param {string} target - Symbol name to search for (partial match)
+ * @param {string} [customDbPath]
+ * @param {object} [opts]
+ * @param {boolean} [opts.noTests]
+ * @yields {{ name: string, kind: string, file: string, line: number, role: string|null, exported: boolean, uses: object[] }}
+ */
+export function* iterWhere(target, customDbPath, opts = {}) {
+  const db = openReadonlyOrFail(customDbPath);
+  try {
+    const noTests = opts.noTests || false;
+    const placeholders = ALL_SYMBOL_KINDS.map(() => '?').join(', ');
+    const stmt = db.prepare(
+      `SELECT * FROM nodes WHERE name LIKE ? AND kind IN (${placeholders}) ORDER BY file, line`,
+    );
+    const crossFileCallersStmt = db.prepare(
+      `SELECT COUNT(*) as cnt FROM edges e JOIN nodes n ON e.source_id = n.id
+       WHERE e.target_id = ? AND e.kind = 'calls' AND n.file != ?`,
+    );
+    const usesStmt = db.prepare(
+      `SELECT n.name, n.file, n.line FROM edges e JOIN nodes n ON e.source_id = n.id
+       WHERE e.target_id = ? AND e.kind = 'calls'`,
+    );
+    for (const node of stmt.iterate(`%${target}%`, ...ALL_SYMBOL_KINDS)) {
+      if (noTests && isTestFile(node.file)) continue;
+
+      const crossFileCallers = crossFileCallersStmt.get(node.id, node.file);
+      const exported = crossFileCallers.cnt > 0;
+
+      let uses = usesStmt.all(node.id);
+      if (noTests) uses = uses.filter((u) => !isTestFile(u.file));
+
+      yield {
+        name: node.name,
+        kind: node.kind,
+        file: node.file,
+        line: node.line,
+        role: node.role || null,
+        exported,
+        uses: uses.map((u) => ({ name: u.name, file: u.file, line: u.line })),
+      };
+    }
+  } finally {
+    db.close();
+  }
+}
+
 export function statsData(customDbPath, opts = {}) {
   const db = openReadonlyOrFail(customDbPath);
   const noTests = opts.noTests || false;
@@ -1572,8 +1701,7 @@ export function queryName(name, customDbPath, opts = {}) {
     offset: opts.offset,
   });
   if (opts.ndjson) {
-    if (data._pagination) console.log(JSON.stringify({ _meta: data._pagination }));
-    for (const r of data.results) console.log(JSON.stringify(r));
+    printNdjson(data, 'results');
     return;
   }
   if (opts.json) {
@@ -1605,7 +1733,11 @@ export function queryName(name, customDbPath, opts = {}) {
 }
 
 export function impactAnalysis(file, customDbPath, opts = {}) {
-  const data = impactAnalysisData(file, customDbPath, { noTests: opts.noTests });
+  const data = impactAnalysisData(file, customDbPath, opts);
+  if (opts.ndjson) {
+    printNdjson(data, 'sources');
+    return;
+  }
   if (opts.json) {
     console.log(JSON.stringify(data, null, 2));
     return;
@@ -1664,7 +1796,11 @@ export function moduleMap(customDbPath, limit = 20, opts = {}) {
 }
 
 export function fileDeps(file, customDbPath, opts = {}) {
-  const data = fileDepsData(file, customDbPath, { noTests: opts.noTests });
+  const data = fileDepsData(file, customDbPath, opts);
+  if (opts.ndjson) {
+    printNdjson(data, 'results');
+    return;
+  }
   if (opts.json) {
     console.log(JSON.stringify(data, null, 2));
     return;
@@ -1695,6 +1831,10 @@ export function fileDeps(file, customDbPath, opts = {}) {
 
 export function fnDeps(name, customDbPath, opts = {}) {
   const data = fnDepsData(name, customDbPath, opts);
+  if (opts.ndjson) {
+    printNdjson(data, 'results');
+    return;
+  }
   if (opts.json) {
     console.log(JSON.stringify(data, null, 2));
     return;
@@ -1863,8 +2003,7 @@ export function contextData(name, customDbPath, opts = {}) {
     return { name, results: [] };
   }
 
-  // Limit to first 5 results
-  nodes = nodes.slice(0, 5);
+  // No hardcoded slice — pagination handles bounding via limit/offset
 
   // File-lines cache to avoid re-reading the same file
   const fileCache = new Map();
@@ -2069,11 +2208,16 @@ export function contextData(name, customDbPath, opts = {}) {
   });
 
   db.close();
-  return { name, results };
+  const base = { name, results };
+  return paginateResult(base, 'results', { limit: opts.limit, offset: opts.offset });
 }
 
 export function context(name, customDbPath, opts = {}) {
   const data = contextData(name, customDbPath, opts);
+  if (opts.ndjson) {
+    printNdjson(data, 'results');
+    return;
+  }
   if (opts.json) {
     console.log(JSON.stringify(data, null, 2));
     return;
@@ -2429,11 +2573,16 @@ export function explainData(target, customDbPath, opts = {}) {
   }
 
   db.close();
-  return { target, kind, results };
+  const base = { target, kind, results };
+  return paginateResult(base, 'results', { limit: opts.limit, offset: opts.offset });
 }
 
 export function explain(target, customDbPath, opts = {}) {
   const data = explainData(target, customDbPath, opts);
+  if (opts.ndjson) {
+    printNdjson(data, 'results');
+    return;
+  }
   if (opts.json) {
     console.log(JSON.stringify(data, null, 2));
     return;
@@ -2664,8 +2813,7 @@ export function whereData(target, customDbPath, opts = {}) {
 export function where(target, customDbPath, opts = {}) {
   const data = whereData(target, customDbPath, opts);
   if (opts.ndjson) {
-    if (data._pagination) console.log(JSON.stringify({ _meta: data._pagination }));
-    for (const r of data.results) console.log(JSON.stringify(r));
+    printNdjson(data, 'results');
     return;
   }
   if (opts.json) {
@@ -2756,8 +2904,7 @@ export function rolesData(customDbPath, opts = {}) {
 export function roles(customDbPath, opts = {}) {
   const data = rolesData(customDbPath, opts);
   if (opts.ndjson) {
-    if (data._pagination) console.log(JSON.stringify({ _meta: data._pagination }));
-    for (const s of data.symbols) console.log(JSON.stringify(s));
+    printNdjson(data, 'symbols');
     return;
   }
   if (opts.json) {
@@ -2798,6 +2945,10 @@ export function roles(customDbPath, opts = {}) {
 
 export function fnImpact(name, customDbPath, opts = {}) {
   const data = fnImpactData(name, customDbPath, opts);
+  if (opts.ndjson) {
+    printNdjson(data, 'results');
+    return;
+  }
   if (opts.json) {
     console.log(JSON.stringify(data, null, 2));
     return;
@@ -2830,6 +2981,10 @@ export function diffImpact(customDbPath, opts = {}) {
     return;
   }
   const data = diffImpactData(customDbPath, opts);
+  if (opts.ndjson) {
+    printNdjson(data, 'affectedFunctions');
+    return;
+  }
   if (opts.json || opts.format === 'json') {
     console.log(JSON.stringify(data, null, 2));
     return;
diff --git a/src/structure.js b/src/structure.js
index ca92ed51..a4c28f41 100644
--- a/src/structure.js
+++ b/src/structure.js
@@ -2,6 +2,7 @@ import path from 'node:path';
 import { normalizePath } from './constants.js';
 import { openReadonlyOrFail } from './db.js';
 import { debug } from './logger.js';
+import { paginateResult } from './paginate.js';
 import { isTestFile } from './queries.js';
 
 // ─── Build-time: insert directory nodes, contains edges, and metrics ────
@@ -463,7 +464,8 @@ export function structureData(customDbPath, opts = {}) {
     }
   }
 
-  return { directories: result, count: result.length };
+  const base = { directories: result, count: result.length };
+  return paginateResult(base, 'directories', { limit: opts.limit, offset: opts.offset });
 }
 
 /**
@@ -534,7 +536,8 @@ export function hotspotsData(customDbPath, opts = {}) {
   }));
 
   db.close();
-  return { metric, level, limit, hotspots };
+  const base = { metric, level, limit, hotspots };
+  return paginateResult(base, 'hotspots', { limit: opts.limit, offset: opts.offset });
 }
 
 /**
diff --git a/tests/integration/context.test.js b/tests/integration/context.test.js
index 39070576..fd2779a7 100644
--- a/tests/integration/context.test.js
+++ b/tests/integration/context.test.js
@@ -229,10 +229,18 @@ describe('contextData', () => {
     expect(pfResult).toBeDefined();
   });
 
-  test('limits results to 5', () => {
-    // We only have a few functions, so this mainly checks the cap logic doesn't crash
-    const data = contextData('', dbPath); // empty name matches everything via LIKE '%%'
-    expect(data.results.length).toBeLessThanOrEqual(5);
+  test('limits results with pagination', () => {
+    // Without limit, all matches are returned (no hardcoded cap)
+    const all = contextData('', dbPath); // empty name matches everything via LIKE '%%'
+    expect(all.results.length).toBeGreaterThan(0);
+
+    // With limit, results are capped and pagination metadata is present
+    const data = contextData('', dbPath, { limit: 2, offset: 0 });
+    expect(data.results.length).toBeLessThanOrEqual(2);
+    if (all.results.length > 2) {
+      expect(data._pagination).toBeDefined();
+      expect(data._pagination.hasMore).toBe(true);
+    }
   });
 
   test('includeTests includes test source', () => {
diff --git a/tests/integration/pagination.test.js b/tests/integration/pagination.test.js
index 4bf652f8..46824881 100644
--- a/tests/integration/pagination.test.js
+++ b/tests/integration/pagination.test.js
@@ -21,8 +21,27 @@ import { afterAll, beforeAll, describe, expect, test } from 'vitest';
 import { initSchema } from '../../src/db.js';
 import { exportDOT, exportJSON, exportMermaid } from '../../src/export.js';
 import { listEntryPointsData } from '../../src/flow.js';
-import { MCP_DEFAULTS, MCP_MAX_LIMIT, paginate, paginateResult } from '../../src/paginate.js';
-import { listFunctionsData, queryNameData, rolesData, whereData } from '../../src/queries.js';
+import {
+  MCP_DEFAULTS,
+  MCP_MAX_LIMIT,
+  paginate,
+  paginateResult,
+  printNdjson,
+} from '../../src/paginate.js';
+import {
+  contextData,
+  explainData,
+  fileDepsData,
+  fnDepsData,
+  fnImpactData,
+  iterListFunctions,
+  iterRoles,
+  iterWhere,
+  listFunctionsData,
+  queryNameData,
+  rolesData,
+  whereData,
+} from '../../src/queries.js';
 
 // ─── Helpers ───────────────────────────────────────────────────────────
 
@@ -297,6 +316,259 @@ describe('listEntryPointsData with pagination', () => {
   });
 });
 
+// ─── fileDepsData with pagination ─────────────────────────────────────
+
+describe('fileDepsData with pagination', () => {
+  test('backward compat: no limit returns all', () => {
+    const data = fileDepsData('a.js', dbPath);
+    expect(data._pagination).toBeUndefined();
+    expect(data.results.length).toBeGreaterThan(0);
+  });
+
+  test('paginated results', () => {
+    const full = fileDepsData('', dbPath);
+    if (full.results.length > 1) {
+      const paginated = fileDepsData('', dbPath, { limit: 1 });
+      expect(paginated.results).toHaveLength(1);
+      expect(paginated._pagination).toBeDefined();
+      expect(paginated._pagination.hasMore).toBe(true);
+    }
+  });
+});
+
+// ─── fnDepsData with pagination ──────────────────────────────────────
+
+describe('fnDepsData with pagination', () => {
+  test('backward compat: no limit returns all', () => {
+    const data = fnDepsData('alpha', dbPath);
+    expect(data._pagination).toBeUndefined();
+    expect(data.results.length).toBeGreaterThan(0);
+  });
+
+  test('paginated results', () => {
+    const full = fnDepsData('a', dbPath);
+    if (full.results.length > 1) {
+      const paginated = fnDepsData('a', dbPath, { limit: 1 });
+      expect(paginated.results).toHaveLength(1);
+      expect(paginated._pagination).toBeDefined();
+      expect(paginated._pagination.hasMore).toBe(true);
+    }
+  });
+});
+
+// ─── fnImpactData with pagination ────────────────────────────────────
+
+describe('fnImpactData with pagination', () => {
+  test('backward compat: no limit returns all', () => {
+    const data = fnImpactData('alpha', dbPath);
+    expect(data._pagination).toBeUndefined();
+    expect(data.results.length).toBeGreaterThan(0);
+  });
+
+  test('paginated results', () => {
+    const full = fnImpactData('a', dbPath);
+    if (full.results.length > 1) {
+      const paginated = fnImpactData('a', dbPath, { limit: 1 });
+      expect(paginated.results).toHaveLength(1);
+      expect(paginated._pagination).toBeDefined();
+    }
+  });
+});
+
+// ─── contextData with pagination ─────────────────────────────────────
+
+describe('contextData with pagination', () => {
+  test('backward compat: no limit returns all', () => {
+    const data = contextData('alpha', dbPath);
+    expect(data._pagination).toBeUndefined();
+    expect(data.results.length).toBeGreaterThan(0);
+  });
+
+  test('paginated results', () => {
+    const full = contextData('a', dbPath);
+    if (full.results.length > 1) {
+      const paginated = contextData('a', dbPath, { limit: 1 });
+      expect(paginated.results).toHaveLength(1);
+      expect(paginated._pagination).toBeDefined();
+    }
+  });
+});
+
+// ─── explainData with pagination ─────────────────────────────────────
+
+describe('explainData with pagination', () => {
+  test('backward compat: no limit returns all', () => {
+    const data = explainData('a.js', dbPath);
+    expect(data._pagination).toBeUndefined();
+    expect(data.results.length).toBeGreaterThan(0);
+  });
+
+  test('paginated results', () => {
+    const full = explainData('', dbPath);
+    if (full.results.length > 1) {
+      const paginated = explainData('', dbPath, { limit: 1 });
+      expect(paginated.results).toHaveLength(1);
+      expect(paginated._pagination).toBeDefined();
+    }
+  });
+});
+
+// ─── MCP new defaults ────────────────────────────────────────────────
+
+describe('MCP new defaults', () => {
+  test('MCP_DEFAULTS has new pagination keys', () => {
+    expect(MCP_DEFAULTS.fn_deps).toBe(10);
+    expect(MCP_DEFAULTS.fn_impact).toBe(5);
+    expect(MCP_DEFAULTS.context).toBe(5);
+    expect(MCP_DEFAULTS.explain).toBe(10);
+    expect(MCP_DEFAULTS.file_deps).toBe(20);
+    expect(MCP_DEFAULTS.diff_impact).toBe(30);
+    expect(MCP_DEFAULTS.semantic_search).toBe(20);
+    expect(MCP_DEFAULTS.execution_flow).toBe(50);
+    expect(MCP_DEFAULTS.hotspots).toBe(20);
+    expect(MCP_DEFAULTS.co_changes).toBe(20);
+    expect(MCP_DEFAULTS.complexity).toBe(30);
+    expect(MCP_DEFAULTS.manifesto).toBe(50);
+    expect(MCP_DEFAULTS.communities).toBe(20);
+    expect(MCP_DEFAULTS.structure).toBe(30);
+  });
+});
+
+// ─── Iterator/Generator APIs ─────────────────────────────────────────
+
+describe('iterListFunctions', () => {
+  test('yields all functions matching listFunctionsData', () => {
+    const full = listFunctionsData(dbPath);
+    const iter = [...iterListFunctions(dbPath)];
+    expect(iter.length).toBe(full.functions.length);
+    for (const item of iter) {
+      expect(item).toHaveProperty('name');
+      expect(item).toHaveProperty('kind');
+      expect(item).toHaveProperty('file');
+      expect(item).toHaveProperty('line');
+    }
+  });
+
+  test('early break closes DB (no leak)', () => {
+    let count = 0;
+    for (const _item of iterListFunctions(dbPath)) {
+      count++;
+      if (count >= 2) break;
+    }
+    expect(count).toBe(2);
+    // If the DB leaked, subsequent operations would fail
+    const data = listFunctionsData(dbPath);
+    expect(data.functions.length).toBeGreaterThan(0);
+  });
+
+  test('noTests filtering works', () => {
+    const all = [...iterListFunctions(dbPath)];
+    const noTests = [...iterListFunctions(dbPath, { noTests: true })];
+    // Should not include test files (fixture has none, so counts equal)
+    expect(noTests.length).toBeLessThanOrEqual(all.length);
+  });
+});
+
+describe('iterRoles', () => {
+  test('yields all role-classified symbols', () => {
+    const full = rolesData(dbPath);
+    const iter = [...iterRoles(dbPath)];
+    expect(iter.length).toBe(full.count);
+    for (const item of iter) {
+      expect(item.role).toBeTruthy();
+    }
+  });
+
+  test('role filter works', () => {
+    const coreOnly = [...iterRoles(dbPath, { role: 'core' })];
+    for (const item of coreOnly) {
+      expect(item.role).toBe('core');
+    }
+  });
+
+  test('early break closes DB (no leak)', () => {
+    let count = 0;
+    for (const _item of iterRoles(dbPath)) {
+      count++;
+      if (count >= 1) break;
+    }
+    expect(count).toBe(1);
+    const data = rolesData(dbPath);
+    expect(data.count).toBeGreaterThan(0);
+  });
+});
+
+describe('iterWhere', () => {
+  test('yields matching symbols with uses', () => {
+    const iter = [...iterWhere('alpha', dbPath)];
+    expect(iter.length).toBeGreaterThan(0);
+    const alpha = iter.find((r) => r.name === 'alpha');
+    expect(alpha).toBeDefined();
+    expect(alpha).toHaveProperty('exported');
+    expect(alpha).toHaveProperty('uses');
+    expect(Array.isArray(alpha.uses)).toBe(true);
+  });
+
+  test('early break closes DB (no leak)', () => {
+    let count = 0;
+    for (const _item of iterWhere('a', dbPath)) {
+      count++;
+      if (count >= 1) break;
+    }
+    expect(count).toBe(1);
+    const data = whereData('alpha', dbPath);
+    expect(data.results.length).toBeGreaterThan(0);
+  });
+});
+
+// ─── printNdjson utility ─────────────────────────────────────────────
+
+describe('printNdjson', () => {
+  test('outputs JSON lines for array field', () => {
+    const logs = [];
+    const origLog = console.log;
+    console.log = (...args) => logs.push(args.join(' '));
+    try {
+      printNdjson({ items: [{ a: 1 }, { b: 2 }] }, 'items');
+      expect(logs).toHaveLength(2);
+      expect(JSON.parse(logs[0])).toEqual({ a: 1 });
+      expect(JSON.parse(logs[1])).toEqual({ b: 2 });
+    } finally {
+      console.log = origLog;
+    }
+  });
+
+  test('emits _meta when _pagination exists', () => {
+    const logs = [];
+    const origLog = console.log;
+    console.log = (...args) => logs.push(args.join(' '));
+    try {
+      printNdjson(
+        { items: [{ x: 1 }], _pagination: { total: 10, offset: 0, limit: 1, hasMore: true } },
+        'items',
+      );
+      expect(logs).toHaveLength(2);
+      const meta = JSON.parse(logs[0]);
+      expect(meta._meta).toBeDefined();
+      expect(meta._meta.total).toBe(10);
+    } finally {
+      console.log = origLog;
+    }
+  });
+
+  test('handles empty array', () => {
+    const logs = [];
+    const origLog = console.log;
+    console.log = (...args) => logs.push(args.join(' '));
+    try {
+      printNdjson({ items: [] }, 'items');
+      expect(logs).toHaveLength(0);
+    } finally {
+      console.log = origLog;
+    }
+  });
+});
+
 // ─── MCP default limits ──────────────────────────────────────────────
 
 describe('MCP defaults', () => {
diff --git a/tests/unit/mcp.test.js b/tests/unit/mcp.test.js
index 1c082085..3df9d876 100644
--- a/tests/unit/mcp.test.js
+++ b/tests/unit/mcp.test.js
@@ -340,6 +340,8 @@ describe('startMCPServer handler dispatch', () => {
       file: 'src/app.js',
       kind: 'function',
       noTests: true,
+      limit: 10,
+      offset: 0,
     });
 
     vi.doUnmock('@modelcontextprotocol/sdk/server/index.js');
@@ -392,7 +394,11 @@ describe('startMCPServer handler dispatch', () => {
     expect(result.isError).toBeUndefined();
     expect(fnImpactMock).toHaveBeenCalledWith('handleClick', '/tmp/test.db', {
       depth: undefined,
+      file: undefined,
+      kind: undefined,
       noTests: undefined,
+      limit: 5,
+      offset: 0,
     });
 
     vi.doUnmock('@modelcontextprotocol/sdk/server/index.js');
@@ -448,6 +454,8 @@ describe('startMCPServer handler dispatch', () => {
       ref: undefined,
       depth: undefined,
       noTests: undefined,
+      limit: 30,
+      offset: 0,
     });
 
     vi.doUnmock('@modelcontextprotocol/sdk/server/index.js');
@@ -1067,8 +1075,10 @@ describe('startMCPServer handler dispatch', () => {
       target: 'buildGraph',
       file: 'src/builder.js',
       limit: 10,
+      offset: 0,
       sort: 'cyclomatic',
       aboveThreshold: true,
+      health: undefined,
       noTests: true,
       kind: 'function',
     });