From 1f4c1df2c780a9bd0b7a9773c80d433534bb3c90 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sun, 1 Mar 2026 16:45:23 -0700 Subject: [PATCH 1/5] feat: add hybrid BM25 + semantic search via FTS5 Add FTS5 full-text index alongside embeddings for BM25 keyword search. The `search` command now defaults to hybrid mode (BM25 + semantic fused via RRF), with `--mode semantic` and `--mode keyword` alternatives. Falls back gracefully to semantic-only on older DBs without FTS5. Impact: 9 functions changed, 6 affected --- src/cli.js | 30 ++ src/embedder.js | 388 +++++++++++++++++++++--- src/index.js | 4 + src/mcp.js | 131 +++++++- tests/search/embedder-search.test.js | 321 ++++++++++++++++++-- tests/search/embedding-strategy.test.js | 55 ++++ 6 files changed, 847 insertions(+), 82 deletions(-) diff --git a/src/cli.js b/src/cli.js index f63f96bb..bf957889 100644 --- a/src/cli.js +++ b/src/cli.js @@ -556,8 +556,14 @@ program .option('-k, --kind ', 'Filter by kind: function, method, class') .option('--file ', 'Filter by file path pattern') .option('--rrf-k ', 'RRF k parameter for multi-query ranking', '60') + .option('--mode ', 'Search mode: hybrid, semantic, keyword (default: hybrid)') .option('-j, --json', 'Output as JSON') .action(async (query, opts) => { + const validModes = ['hybrid', 'semantic', 'keyword']; + if (opts.mode && !validModes.includes(opts.mode)) { + console.error(`Invalid mode "${opts.mode}". Valid: ${validModes.join(', ')}`); + process.exit(1); + } await search(query, opts.db, { limit: parseInt(opts.limit, 10), noTests: resolveNoTests(opts), @@ -566,6 +572,7 @@ program kind: opts.kind, filePattern: opts.file, rrfK: parseInt(opts.rrfK, 10), + mode: opts.mode, json: opts.json, }); }); @@ -834,6 +841,29 @@ program }); }); +program + .command('owners [target]') + .description('Show CODEOWNERS mapping for files and functions') + .option('-d, --db ', 'Path to graph.db') + .option('--owner ', 'Filter to a specific owner') + .option('--boundary', 'Show cross-owner boundary edges') + .option('-f, --file ', 'Scope to a specific file') + .option('-k, --kind ', 'Filter by symbol kind') + .option('-T, --no-tests', 'Exclude test/spec files') + .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') + .option('-j, --json', 'Output as JSON') + .action(async (target, opts) => { + const { owners } = await import('./owners.js'); + owners(opts.db, { + owner: opts.owner, + boundary: opts.boundary, + file: opts.file || target, + kind: opts.kind, + noTests: resolveNoTests(opts), + json: opts.json, + }); + }); + program .command('branch-compare ') .description('Compare code structure between two branches/refs') diff --git a/src/embedder.js b/src/embedder.js index 4b9d43f0..265f12a6 100644 --- a/src/embedder.js +++ b/src/embedder.js @@ -384,6 +384,22 @@ function initEmbeddingsSchema(db) { value TEXT ); `); + + // Add full_text column (idempotent — ignore if already exists) + try { + db.exec('ALTER TABLE embeddings ADD COLUMN full_text TEXT'); + } catch { + /* column already exists */ + } + + // FTS5 virtual table for BM25 keyword search + db.exec(` + CREATE VIRTUAL TABLE IF NOT EXISTS fts_index USING fts5( + name, + content, + tokenize='unicode61' + ); + `); } /** @@ -411,6 +427,7 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath, options = db.exec('DELETE FROM embeddings'); db.exec('DELETE FROM embedding_meta'); + db.exec('DELETE FROM fts_index'); const nodes = db .prepare( @@ -445,6 +462,7 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath, options = const texts = []; const nodeIds = []; + const nodeNames = []; const previews = []; const config = getModelConfig(modelKey); const contextWindow = config.contextWindow; @@ -476,6 +494,7 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath, options = texts.push(text); nodeIds.push(node.id); + nodeNames.push(node.name); previews.push(`${node.name} (${node.kind}) -- ${file}:${node.line}`); } } @@ -490,16 +509,19 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath, options = const { vectors, dim } = await embed(texts, modelKey); const insert = db.prepare( - 'INSERT OR REPLACE INTO embeddings (node_id, vector, text_preview) VALUES (?, ?, ?)', + 'INSERT OR REPLACE INTO embeddings (node_id, vector, text_preview, full_text) VALUES (?, ?, ?, ?)', ); + const insertFts = db.prepare('INSERT INTO fts_index(rowid, name, content) VALUES (?, ?, ?)'); const insertMeta = db.prepare('INSERT OR REPLACE INTO embedding_meta (key, value) VALUES (?, ?)'); const insertAll = db.transaction(() => { for (let i = 0; i < vectors.length; i++) { - insert.run(nodeIds[i], Buffer.from(vectors[i].buffer), previews[i]); + insert.run(nodeIds[i], Buffer.from(vectors[i].buffer), previews[i], texts[i]); + insertFts.run(nodeIds[i], nodeNames[i], texts[i]); } insertMeta.run('model', config.name); insertMeta.run('dim', String(dim)); insertMeta.run('count', String(vectors.length)); + insertMeta.run('fts_count', String(vectors.length)); insertMeta.run('strategy', strategy); insertMeta.run('built_at', new Date().toISOString()); if (overflowCount > 0) { @@ -731,71 +753,361 @@ export async function multiSearchData(queries, customDbPath, opts = {}) { } /** - * Semantic search with pre-filter support — CLI wrapper with multi-query detection. + * Sanitize a user query for FTS5 MATCH syntax. + * Wraps each token as an implicit OR and escapes special FTS5 characters. + */ +function sanitizeFtsQuery(query) { + // Remove FTS5 special chars that could cause syntax errors + const cleaned = query.replace(/[*"():^{}~<>]/g, ' ').trim(); + if (!cleaned) return null; + // Split into tokens, wrap with OR for multi-token queries + const tokens = cleaned.split(/\s+/).filter((t) => t.length > 0); + if (tokens.length === 0) return null; + if (tokens.length === 1) return `"${tokens[0]}"`; + return tokens.map((t) => `"${t}"`).join(' OR '); +} + +/** + * Check if the FTS5 index exists in the database. + * Returns true if fts_index table exists and has rows, false otherwise. + */ +function hasFtsIndex(db) { + try { + const row = db.prepare('SELECT COUNT(*) as c FROM fts_index').get(); + return row.c > 0; + } catch { + return false; + } +} + +/** + * BM25 keyword search via FTS5. + * Returns { results: [{ name, kind, file, line, bm25Score }] } or null if no FTS5 index. + */ +export function ftsSearchData(query, customDbPath, opts = {}) { + const limit = opts.limit || 15; + const noTests = opts.noTests || false; + const TEST_PATTERN = /\.(test|spec)\.|__test__|__tests__|\.stories\./; + + const db = openReadonlyOrFail(customDbPath); + + if (!hasFtsIndex(db)) { + db.close(); + return null; + } + + const ftsQuery = sanitizeFtsQuery(query); + if (!ftsQuery) { + db.close(); + return { results: [] }; + } + + let sql = ` + SELECT f.rowid AS node_id, rank AS bm25_score, + n.name, n.kind, n.file, n.line + FROM fts_index f + JOIN nodes n ON f.rowid = n.id + WHERE fts_index MATCH ? + `; + const params = [ftsQuery]; + + if (opts.kind) { + sql += ' AND n.kind = ?'; + params.push(opts.kind); + } + + const isGlob = opts.filePattern && /[*?[\]]/.test(opts.filePattern); + if (opts.filePattern && !isGlob) { + sql += ' AND n.file LIKE ?'; + params.push(`%${opts.filePattern}%`); + } + + sql += ' ORDER BY rank LIMIT ?'; + params.push(limit * 5); // fetch generous set for post-filtering + + let rows; + try { + rows = db.prepare(sql).all(...params); + } catch { + // Invalid FTS5 query syntax — return empty + db.close(); + return { results: [] }; + } + + if (isGlob) { + rows = rows.filter((row) => globMatch(row.file, opts.filePattern)); + } + if (noTests) { + rows = rows.filter((row) => !TEST_PATTERN.test(row.file)); + } + + db.close(); + + const results = rows.slice(0, limit).map((row) => ({ + name: row.name, + kind: row.kind, + file: row.file, + line: row.line, + bm25Score: -row.bm25_score, // FTS5 rank is negative; negate for display + })); + + return { results }; +} + +/** + * Hybrid BM25 + semantic search with RRF fusion. + * Returns { results: [{ name, kind, file, line, rrf, bm25Score, bm25Rank, similarity, semanticRank }] } + * or null if no FTS5 index (caller should fall back to semantic-only). + */ +export async function hybridSearchData(query, customDbPath, opts = {}) { + const limit = opts.limit || 15; + const k = opts.rrfK || 60; + const topK = (opts.limit || 15) * 5; + + // Split semicolons for multi-query support + const queries = + typeof query === 'string' + ? query + .split(';') + .map((q) => q.trim()) + .filter((q) => q.length > 0) + : [query]; + + // Check FTS5 availability first (sync, cheap) + const checkDb = openReadonlyOrFail(customDbPath); + const ftsAvailable = hasFtsIndex(checkDb); + checkDb.close(); + if (!ftsAvailable) return null; + + // Collect ranked lists: for each query, one BM25 list + one semantic list + const rankedLists = []; + + for (const q of queries) { + // BM25 ranked list (sync) + const bm25Data = ftsSearchData(q, customDbPath, { ...opts, limit: topK }); + if (bm25Data?.results) { + rankedLists.push( + bm25Data.results.map((r, idx) => ({ + key: `${r.name}:${r.file}:${r.line}`, + rank: idx + 1, + source: 'bm25', + ...r, + })), + ); + } + + // Semantic ranked list (async) + const semData = await searchData(q, customDbPath, { + ...opts, + limit: topK, + minScore: opts.minScore || 0.2, + }); + if (semData?.results) { + rankedLists.push( + semData.results.map((r, idx) => ({ + key: `${r.name}:${r.file}:${r.line}`, + rank: idx + 1, + source: 'semantic', + ...r, + })), + ); + } + } + + // RRF fusion across all ranked lists + const fusionMap = new Map(); + for (const list of rankedLists) { + for (const item of list) { + if (!fusionMap.has(item.key)) { + fusionMap.set(item.key, { + name: item.name, + kind: item.kind, + file: item.file, + line: item.line, + rrfScore: 0, + bm25Score: null, + bm25Rank: null, + similarity: null, + semanticRank: null, + }); + } + const entry = fusionMap.get(item.key); + entry.rrfScore += 1 / (k + item.rank); + if (item.source === 'bm25') { + if (entry.bm25Rank === null || item.rank < entry.bm25Rank) { + entry.bm25Score = item.bm25Score; + entry.bm25Rank = item.rank; + } + } else { + if (entry.semanticRank === null || item.rank < entry.semanticRank) { + entry.similarity = item.similarity; + entry.semanticRank = item.rank; + } + } + } + } + + const results = [...fusionMap.values()] + .sort((a, b) => b.rrfScore - a.rrfScore) + .slice(0, limit) + .map((e) => ({ + name: e.name, + kind: e.kind, + file: e.file, + line: e.line, + rrf: e.rrfScore, + bm25Score: e.bm25Score, + bm25Rank: e.bm25Rank, + similarity: e.similarity, + semanticRank: e.semanticRank, + })); + + return { results }; +} + +/** + * Search with mode support — CLI wrapper with multi-query detection. + * Modes: 'hybrid' (default), 'semantic', 'keyword' */ export async function search(query, customDbPath, opts = {}) { + const mode = opts.mode || 'hybrid'; + // Split by semicolons, trim, filter empties const queries = query .split(';') .map((q) => q.trim()) .filter((q) => q.length > 0); - if (queries.length <= 1) { - // Single-query path — preserve original output format - const singleQuery = queries[0] || query; - const data = await searchData(singleQuery, customDbPath, opts); - if (!data) return; + const kindIcon = (kind) => (kind === 'function' ? 'f' : kind === 'class' ? '*' : 'o'); + + // ─── Keyword-only mode ────────────────────────────────────────────── + if (mode === 'keyword') { + const singleQuery = queries.length === 1 ? queries[0] : query; + const data = ftsSearchData(singleQuery, customDbPath, opts); + if (!data) { + console.log('No FTS5 index found. Run `codegraph embed` to build the keyword index.'); + return; + } if (opts.json) { console.log(JSON.stringify(data, null, 2)); return; } - console.log(`\nSemantic search: "${singleQuery}"\n`); - + console.log(`\nKeyword search: "${singleQuery}" (BM25)\n`); if (data.results.length === 0) { - console.log(' No results above threshold.'); + console.log(' No results found.'); } else { for (const r of data.results) { - const bar = '#'.repeat(Math.round(r.similarity * 20)); - const kindIcon = r.kind === 'function' ? 'f' : r.kind === 'class' ? '*' : 'o'; - console.log(` ${(r.similarity * 100).toFixed(1)}% ${bar}`); - console.log(` ${kindIcon} ${r.name} -- ${r.file}:${r.line}`); + console.log( + ` BM25 ${r.bm25Score.toFixed(2)} ${kindIcon(r.kind)} ${r.name} -- ${r.file}:${r.line}`, + ); } } - console.log(`\n ${data.results.length} results shown\n`); - } else { - // Multi-query path — RRF ranking - const data = await multiSearchData(queries, customDbPath, opts); - if (!data) return; + return; + } - if (opts.json) { - console.log(JSON.stringify(data, null, 2)); - return; - } + // ─── Semantic-only mode ───────────────────────────────────────────── + if (mode === 'semantic') { + if (queries.length <= 1) { + const singleQuery = queries[0] || query; + const data = await searchData(singleQuery, customDbPath, opts); + if (!data) return; - console.log(`\nMulti-query semantic search (RRF, k=${opts.rrfK || 60}):`); - queries.forEach((q, i) => { - console.log(` [${i + 1}] "${q}"`); - }); - console.log(); + if (opts.json) { + console.log(JSON.stringify(data, null, 2)); + return; + } - if (data.results.length === 0) { - console.log(' No results above threshold.'); + console.log(`\nSemantic search: "${singleQuery}"\n`); + if (data.results.length === 0) { + console.log(' No results above threshold.'); + } else { + for (const r of data.results) { + const bar = '#'.repeat(Math.round(r.similarity * 20)); + console.log(` ${(r.similarity * 100).toFixed(1)}% ${bar}`); + console.log(` ${kindIcon(r.kind)} ${r.name} -- ${r.file}:${r.line}`); + } + } + console.log(`\n ${data.results.length} results shown\n`); } else { - for (const r of data.results) { - const kindIcon = r.kind === 'function' ? 'f' : r.kind === 'class' ? '*' : 'o'; - console.log(` RRF ${r.rrf.toFixed(4)} ${kindIcon} ${r.name} -- ${r.file}:${r.line}`); - for (const qs of r.queryScores) { - const bar = '#'.repeat(Math.round(qs.similarity * 20)); + const data = await multiSearchData(queries, customDbPath, opts); + if (!data) return; + + if (opts.json) { + console.log(JSON.stringify(data, null, 2)); + return; + } + + console.log(`\nMulti-query semantic search (RRF, k=${opts.rrfK || 60}):`); + for (let i = 0; i < queries.length; i++) console.log(` [${i + 1}] "${queries[i]}"`); + console.log(); + if (data.results.length === 0) { + console.log(' No results above threshold.'); + } else { + for (const r of data.results) { console.log( - ` [${queries.indexOf(qs.query) + 1}] ${(qs.similarity * 100).toFixed(1)}% ${bar} (rank ${qs.rank})`, + ` RRF ${r.rrf.toFixed(4)} ${kindIcon(r.kind)} ${r.name} -- ${r.file}:${r.line}`, ); + for (const qs of r.queryScores) { + const bar = '#'.repeat(Math.round(qs.similarity * 20)); + console.log( + ` [${queries.indexOf(qs.query) + 1}] ${(qs.similarity * 100).toFixed(1)}% ${bar} (rank ${qs.rank})`, + ); + } } } + console.log(`\n ${data.results.length} results shown\n`); } + return; + } - console.log(`\n ${data.results.length} results shown\n`); + // ─── Hybrid mode (default) ────────────────────────────────────────── + const data = await hybridSearchData(query, customDbPath, opts); + + if (!data) { + // No FTS5 index — fall back to semantic-only + warn( + 'FTS5 index not found — using semantic search only. Re-run `codegraph embed` to enable hybrid mode.', + ); + return search(query, customDbPath, { ...opts, mode: 'semantic' }); } + + if (opts.json) { + console.log(JSON.stringify(data, null, 2)); + return; + } + + const rrfK = opts.rrfK || 60; + if (queries.length <= 1) { + const singleQuery = queries[0] || query; + console.log(`\nHybrid search: "${singleQuery}" (BM25 + semantic, RRF k=${rrfK})\n`); + } else { + console.log(`\nHybrid multi-query search (BM25 + semantic, RRF k=${rrfK}):`); + for (let i = 0; i < queries.length; i++) console.log(` [${i + 1}] "${queries[i]}"`); + console.log(); + } + + if (data.results.length === 0) { + console.log(' No results found.'); + } else { + for (const r of data.results) { + console.log( + ` RRF ${r.rrf.toFixed(4)} ${kindIcon(r.kind)} ${r.name} -- ${r.file}:${r.line}`, + ); + const parts = []; + if (r.bm25Rank != null) { + parts.push(`BM25: rank ${r.bm25Rank} (score ${r.bm25Score.toFixed(2)})`); + } + if (r.semanticRank != null) { + parts.push(`Semantic: rank ${r.semanticRank} (${(r.similarity * 100).toFixed(1)}%)`); + } + if (parts.length > 0) { + console.log(` ${parts.join(' | ')}`); + } + } + } + + console.log(`\n ${data.results.length} results shown\n`); } diff --git a/src/index.js b/src/index.js index 2b539e12..3cef9655 100644 --- a/src/index.js +++ b/src/index.js @@ -55,6 +55,8 @@ export { EMBEDDING_STRATEGIES, embed, estimateTokens, + ftsSearchData, + hybridSearchData, MODELS, multiSearchData, search, @@ -70,6 +72,8 @@ export { setVerbose } from './logger.js'; export { manifesto, manifestoData, RULE_DEFS } from './manifesto.js'; // Native engine export { isNativeAvailable } from './native.js'; +// Ownership (CODEOWNERS) +export { matchOwners, owners, ownersData, ownersForFiles, parseCodeowners } from './owners.js'; // Pagination utilities export { MCP_DEFAULTS, MCP_MAX_LIMIT, paginate, paginateResult } from './paginate.js'; diff --git a/src/mcp.js b/src/mcp.js index abd41893..ee11bb3c 100644 --- a/src/mcp.js +++ b/src/mcp.js @@ -247,13 +247,19 @@ const BASE_TOOLS = [ { name: 'semantic_search', description: - 'Search code symbols by meaning using embeddings (requires prior `codegraph embed`)', + 'Search code symbols by meaning using embeddings and/or keyword matching (requires prior `codegraph embed`). Default hybrid mode combines BM25 keyword + semantic search for best results.', inputSchema: { type: 'object', properties: { query: { type: 'string', description: 'Natural language search query' }, limit: { type: 'number', description: 'Max results to return', default: 15 }, min_score: { type: 'number', description: 'Minimum similarity score (0-1)', default: 0.2 }, + mode: { + type: 'string', + enum: ['hybrid', 'semantic', 'keyword'], + description: + 'Search mode: hybrid (BM25 + semantic, default), semantic (embeddings only), keyword (BM25 only)', + }, }, required: ['query'], }, @@ -491,6 +497,48 @@ const BASE_TOOLS = [ }, }, }, + { + name: 'code_owners', + description: + 'Show CODEOWNERS mapping for files and functions. Shows ownership coverage, per-owner breakdown, and cross-owner boundary edges.', + inputSchema: { + type: 'object', + properties: { + file: { type: 'string', description: 'Scope to a specific file (partial match)' }, + owner: { type: 'string', description: 'Filter to a specific owner (e.g. @team-name)' }, + boundary: { + type: 'boolean', + description: 'Show cross-owner boundary edges', + default: false, + }, + kind: { + type: 'string', + description: 'Filter by symbol kind (function, method, class, etc.)', + }, + no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, + }, + }, + }, + { + name: 'branch_compare', + description: + 'Compare code structure between two git refs (branches, tags, commits). Shows added/removed/changed symbols and transitive caller impact using temporary git worktrees.', + inputSchema: { + type: 'object', + properties: { + base: { type: 'string', description: 'Base git ref (branch, tag, or commit SHA)' }, + target: { type: 'string', description: 'Target git ref to compare against base' }, + depth: { type: 'number', description: 'Max transitive caller depth', default: 3 }, + no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, + format: { + type: 'string', + enum: ['json', 'mermaid'], + description: 'Output format (default: json)', + }, + }, + required: ['base', 'target'], + }, + }, ]; const LIST_REPOS_TOOL = { @@ -704,18 +752,55 @@ export async function startMCPServer(customDbPath, options = {}) { } break; case 'semantic_search': { - const { searchData } = await import('./embedder.js'); - result = await searchData(args.query, dbPath, { - limit: args.limit, - minScore: args.min_score, - }); - if (result === null) { - return { - content: [ - { type: 'text', text: 'Semantic search unavailable. Run `codegraph embed` first.' }, - ], - isError: true, - }; + const mode = args.mode || 'hybrid'; + const searchOpts = { limit: args.limit, minScore: args.min_score }; + + if (mode === 'keyword') { + const { ftsSearchData } = await import('./embedder.js'); + result = ftsSearchData(args.query, dbPath, searchOpts); + if (result === null) { + return { + content: [ + { + type: 'text', + text: 'No FTS5 index found. Run `codegraph embed` to build the keyword index.', + }, + ], + isError: true, + }; + } + } else if (mode === 'semantic') { + const { searchData } = await import('./embedder.js'); + result = await searchData(args.query, dbPath, searchOpts); + if (result === null) { + return { + content: [ + { + type: 'text', + text: 'Semantic search unavailable. Run `codegraph embed` first.', + }, + ], + isError: true, + }; + } + } else { + // hybrid (default) — falls back to semantic if no FTS5 + const { hybridSearchData, searchData } = await import('./embedder.js'); + result = await hybridSearchData(args.query, dbPath, searchOpts); + if (result === null) { + result = await searchData(args.query, dbPath, searchOpts); + if (result === null) { + return { + content: [ + { + type: 'text', + text: 'Semantic search unavailable. Run `codegraph embed` first.', + }, + ], + isError: true, + }; + } + } } break; } @@ -859,6 +944,26 @@ export async function startMCPServer(customDbPath, options = {}) { }); break; } + case 'code_owners': { + const { ownersData } = await import('./owners.js'); + result = ownersData(dbPath, { + file: args.file, + owner: args.owner, + boundary: args.boundary, + kind: args.kind, + noTests: args.no_tests, + }); + break; + } + case 'branch_compare': { + const { branchCompareData, branchCompareMermaid } = await import('./branch-compare.js'); + const bcData = await branchCompareData(args.base, args.target, { + depth: args.depth, + noTests: args.no_tests, + }); + result = args.format === 'mermaid' ? branchCompareMermaid(bcData) : bcData; + break; + } case 'list_repos': { const { listRepos, pruneRegistry } = await import('./registry.js'); pruneRegistry(); diff --git a/tests/search/embedder-search.test.js b/tests/search/embedder-search.test.js index bbe57b67..93ea518c 100644 --- a/tests/search/embedder-search.test.js +++ b/tests/search/embedder-search.test.js @@ -31,7 +31,14 @@ vi.mock('@huggingface/transformers', () => ({ cos_sim: () => 0, })); -import { cosineSim, multiSearchData, search, searchData } from '../../src/embedder.js'; +import { + cosineSim, + ftsSearchData, + hybridSearchData, + multiSearchData, + search, + searchData, +} from '../../src/embedder.js'; // ─── Helpers ─────────────────────────────────────────────────────────── @@ -48,14 +55,30 @@ function insertNode(db, name, kind, file, line) { .run(name, kind, file, line).lastInsertRowid; } -function insertEmbedding(db, nodeId, vec, preview) { - db.prepare('INSERT INTO embeddings (node_id, vector, text_preview) VALUES (?, ?, ?)').run( +function insertEmbedding(db, nodeId, vec, preview, fullText) { + db.prepare( + 'INSERT INTO embeddings (node_id, vector, text_preview, full_text) VALUES (?, ?, ?, ?)', + ).run(nodeId, Buffer.from(vec.buffer), preview, fullText || preview); +} + +function insertFts(db, nodeId, name, content) { + db.prepare('INSERT INTO fts_index(rowid, name, content) VALUES (?, ?, ?)').run( nodeId, - Buffer.from(vec.buffer), - preview, + name, + content, ); } +/** Capture console.log calls and return joined output. */ +function captureLog(fn) { + const lines = []; + const spy = vi.spyOn(console, 'log').mockImplementation((...args) => lines.push(args.join(' '))); + return fn().then(() => { + spy.mockRestore(); + return lines.join('\n'); + }); +} + // ─── Fixture DB ──────────────────────────────────────────────────────── // // Nodes & vectors: @@ -67,8 +90,15 @@ function insertEmbedding(db, nodeId, vec, preview) { // Query vectors: // "auth" → [1, 0, 0] (cosine: A=1.0, C≈0.707) // "jwt" → [0, 1, 0] (cosine: B=1.0, C≈0.707) +// +// FTS5 content: +// A: "function authenticate (authenticate) in src/auth.js" +// B: "function validateJWT (validate JWT) in src/jwt.js" +// C: "function authMiddleware (auth Middleware) in src/middleware.js" +// D: "function formatDate (format Date) in src/utils.js" let tmpDir, dbPath; +let noFtsDir, noFtsDbPath; beforeAll(() => { tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-test-')); @@ -83,12 +113,18 @@ beforeAll(() => { node_id INTEGER PRIMARY KEY, vector BLOB NOT NULL, text_preview TEXT, + full_text TEXT, FOREIGN KEY(node_id) REFERENCES nodes(id) ); CREATE TABLE IF NOT EXISTS embedding_meta ( key TEXT PRIMARY KEY, value TEXT ); + CREATE VIRTUAL TABLE IF NOT EXISTS fts_index USING fts5( + name, + content, + tokenize='unicode61' + ); `); const idA = insertNode(db, 'authenticate', 'function', 'src/auth.js', 10); @@ -97,26 +133,80 @@ beforeAll(() => { const idD = insertNode(db, 'formatDate', 'function', 'src/utils.js', 1); const S = Math.SQRT1_2; // ≈ 0.7071 - insertEmbedding(db, idA, makeVec([1, 0, 0]), 'authenticate (function) -- src/auth.js:10'); - insertEmbedding(db, idB, makeVec([0, 1, 0]), 'validateJWT (function) -- src/jwt.js:20'); - insertEmbedding(db, idC, makeVec([S, S, 0]), 'authMiddleware (function) -- src/middleware.js:5'); - insertEmbedding(db, idD, makeVec([0, 0, 1]), 'formatDate (function) -- src/utils.js:1'); + const textA = 'function authenticate (authenticate) in src/auth.js\nValidate user credentials'; + const textB = 'function validateJWT (validate JWT) in src/jwt.js\nCheck JWT token validity'; + const textC = + 'function authMiddleware (auth Middleware) in src/middleware.js\nExpress auth middleware'; + const textD = 'function formatDate (format Date) in src/utils.js\nFormat a date object'; + + insertEmbedding(db, idA, makeVec([1, 0, 0]), 'authenticate (function) -- src/auth.js:10', textA); + insertEmbedding(db, idB, makeVec([0, 1, 0]), 'validateJWT (function) -- src/jwt.js:20', textB); + insertEmbedding( + db, + idC, + makeVec([S, S, 0]), + 'authMiddleware (function) -- src/middleware.js:5', + textC, + ); + insertEmbedding(db, idD, makeVec([0, 0, 1]), 'formatDate (function) -- src/utils.js:1', textD); + + // Populate FTS5 index + insertFts(db, idA, 'authenticate', textA); + insertFts(db, idB, 'validateJWT', textB); + insertFts(db, idC, 'authMiddleware', textC); + insertFts(db, idD, 'formatDate', textD); db.prepare( "INSERT INTO embedding_meta (key, value) VALUES ('model', 'Xenova/all-MiniLM-L6-v2')", ).run(); db.prepare("INSERT INTO embedding_meta (key, value) VALUES ('dim', '384')").run(); db.prepare("INSERT INTO embedding_meta (key, value) VALUES ('count', '4')").run(); + db.prepare("INSERT INTO embedding_meta (key, value) VALUES ('fts_count', '4')").run(); db.close(); // Query vectors used by the mocked embed() QUERY_VECTORS.set('auth', makeVec([1, 0, 0])); QUERY_VECTORS.set('jwt', makeVec([0, 1, 0])); QUERY_VECTORS.set('authenticate', makeVec([0.99, 0.1, 0])); // very similar to 'auth' + QUERY_VECTORS.set('"authenticate"', makeVec([0.99, 0.1, 0])); + QUERY_VECTORS.set('"formatDate"', makeVec([0, 0, 1])); + QUERY_VECTORS.set('buildGraph', makeVec([0.2, 0.2, 0.2])); + QUERY_VECTORS.set('"buildGraph"', makeVec([0.2, 0.2, 0.2])); + + // ─── Second DB without FTS5 (for fallback tests) ──────────────────── + noFtsDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-nofts-')); + noFtsDbPath = path.join(noFtsDir, 'graph.db'); + const db2 = new Database(noFtsDbPath); + db2.pragma('journal_mode = WAL'); + initSchema(db2); + db2.exec(` + CREATE TABLE IF NOT EXISTS embeddings ( + node_id INTEGER PRIMARY KEY, + vector BLOB NOT NULL, + text_preview TEXT, + FOREIGN KEY(node_id) REFERENCES nodes(id) + ); + CREATE TABLE IF NOT EXISTS embedding_meta ( + key TEXT PRIMARY KEY, + value TEXT + ); + `); + const nfIdA = insertNode(db2, 'hello', 'function', 'src/hello.js', 1); + db2 + .prepare('INSERT INTO embeddings (node_id, vector, text_preview) VALUES (?, ?, ?)') + .run(nfIdA, Buffer.from(makeVec([1, 0, 0]).buffer), 'hello (function) -- src/hello.js:1'); + db2 + .prepare("INSERT INTO embedding_meta (key, value) VALUES ('model', 'Xenova/all-MiniLM-L6-v2')") + .run(); + db2.prepare("INSERT INTO embedding_meta (key, value) VALUES ('dim', '384')").run(); + db2.prepare("INSERT INTO embedding_meta (key, value) VALUES ('count', '1')").run(); + db2.close(); + QUERY_VECTORS.set('hello', makeVec([1, 0, 0])); }); afterAll(() => { fs.rmSync(tmpDir, { recursive: true, force: true }); + fs.rmSync(noFtsDir, { recursive: true, force: true }); }); // ─── Tests ───────────────────────────────────────────────────────────── @@ -238,42 +328,155 @@ describe('searchData file pattern', () => { }); }); +// ─── FTS5 keyword search tests ───────────────────────────────────────── + +describe('ftsSearchData', () => { + test('returns results sorted by BM25 score', () => { + const data = ftsSearchData('authenticate', dbPath); + expect(data).not.toBeNull(); + expect(data.results.length).toBeGreaterThan(0); + expect(data.results[0].name).toBe('authenticate'); + expect(data.results[0].bm25Score).toBeGreaterThan(0); + }); + + test('respects limit parameter', () => { + const data = ftsSearchData('function', dbPath, { limit: 2 }); + expect(data).not.toBeNull(); + expect(data.results.length).toBeLessThanOrEqual(2); + }); + + test('respects kind filter', () => { + const data = ftsSearchData('authenticate', dbPath, { kind: 'function' }); + expect(data).not.toBeNull(); + for (const r of data.results) { + expect(r.kind).toBe('function'); + } + }); + + test('respects noTests filter', () => { + // Our fixture has no test files, so all results should pass + const data = ftsSearchData('authenticate', dbPath, { noTests: true }); + expect(data).not.toBeNull(); + expect(data.results.length).toBeGreaterThan(0); + }); + + test('returns null when FTS5 table does not exist (old DB)', () => { + const data = ftsSearchData('hello', noFtsDbPath); + expect(data).toBeNull(); + }); + + test('handles special characters in query without crashing', () => { + const data = ftsSearchData('auth*"()', dbPath); + expect(data).not.toBeNull(); + // Should return results or empty array, not throw + expect(data.results).toBeInstanceOf(Array); + }); + + test('result shape has expected fields', () => { + const data = ftsSearchData('authenticate', dbPath); + const r = data.results[0]; + expect(r).toHaveProperty('name'); + expect(r).toHaveProperty('kind'); + expect(r).toHaveProperty('file'); + expect(r).toHaveProperty('line'); + expect(r).toHaveProperty('bm25Score'); + }); + + test('returns empty results for query with no matches', () => { + const data = ftsSearchData('zzzznonexistent', dbPath); + expect(data).not.toBeNull(); + expect(data.results).toHaveLength(0); + }); +}); + +// ─── Hybrid search tests ─────────────────────────────────────────────── + +describe('hybridSearchData', () => { + test('RRF fusion produces higher scores for results in both BM25 and semantic', async () => { + const data = await hybridSearchData('authenticate', dbPath, { minScore: 0.01 }); + expect(data).not.toBeNull(); + expect(data.results.length).toBeGreaterThan(0); + + // authenticate should rank high — it appears in both BM25 (exact match) and semantic + const auth = data.results.find((r) => r.name === 'authenticate'); + expect(auth).toBeDefined(); + expect(auth.rrf).toBeGreaterThan(0); + }); + + test('a result in BM25-only still appears', async () => { + // "formatDate" text matches BM25 for "format" but may not match semantically + const data = await hybridSearchData('formatDate', dbPath, { minScore: 0.01 }); + expect(data).not.toBeNull(); + const fd = data.results.find((r) => r.name === 'formatDate'); + expect(fd).toBeDefined(); + }); + + test('respects rrfK parameter', async () => { + const d60 = await hybridSearchData('authenticate', dbPath, { minScore: 0.01, rrfK: 60 }); + const d10 = await hybridSearchData('authenticate', dbPath, { minScore: 0.01, rrfK: 10 }); + expect(d60).not.toBeNull(); + expect(d10).not.toBeNull(); + // Lower k = higher RRF scores + expect(d10.results[0].rrf).toBeGreaterThan(d60.results[0].rrf); + }); + + test('falls back to null when no FTS5 index', async () => { + const data = await hybridSearchData('hello', noFtsDbPath, { minScore: 0.01 }); + expect(data).toBeNull(); + }); + + test('result shape has hybrid fields', async () => { + const data = await hybridSearchData('authenticate', dbPath, { minScore: 0.01 }); + expect(data).not.toBeNull(); + const r = data.results[0]; + expect(r).toHaveProperty('rrf'); + expect(r).toHaveProperty('name'); + expect(r).toHaveProperty('kind'); + expect(r).toHaveProperty('file'); + expect(r).toHaveProperty('line'); + // At least one of these should be non-null + expect(r.bm25Rank !== null || r.semanticRank !== null).toBe(true); + }); + + test('multi-query with semicolons works', async () => { + const data = await hybridSearchData('auth ; jwt', dbPath, { minScore: 0.01 }); + expect(data).not.toBeNull(); + expect(data.results.length).toBeGreaterThan(0); + }); +}); + +// ─── search CLI wrapper tests ────────────────────────────────────────── + describe('search (CLI wrapper)', () => { - /** Capture console.log calls and return joined output. */ - function captureLog(fn) { - const lines = []; - const spy = vi - .spyOn(console, 'log') - .mockImplementation((...args) => lines.push(args.join(' '))); - return fn().then(() => { - spy.mockRestore(); - return lines.join('\n'); - }); - } - - test('single query prints similarity format', async () => { - const out = await captureLog(() => search('auth', dbPath, { minScore: 0.2 })); + test('mode: semantic — single query prints similarity format', async () => { + const out = await captureLog(() => search('auth', dbPath, { minScore: 0.2, mode: 'semantic' })); expect(out).toContain('Semantic search: "auth"'); expect(out).toContain('%'); expect(out).toContain('authenticate'); }); - test('semicolons trigger multi-query RRF format', async () => { - const out = await captureLog(() => search('auth ; jwt', dbPath, { minScore: 0.2 })); + test('mode: semantic — semicolons trigger multi-query RRF format', async () => { + const out = await captureLog(() => + search('auth ; jwt', dbPath, { minScore: 0.2, mode: 'semantic' }), + ); expect(out).toContain('Multi-query semantic search'); expect(out).toContain('RRF'); expect(out).toContain('[1] "auth"'); expect(out).toContain('[2] "jwt"'); }); - test('trailing semicolons fall back to single-query', async () => { - const out = await captureLog(() => search('auth ;', dbPath, { minScore: 0.2 })); + test('mode: semantic — trailing semicolons fall back to single-query', async () => { + const out = await captureLog(() => + search('auth ;', dbPath, { minScore: 0.2, mode: 'semantic' }), + ); expect(out).toContain('Semantic search: "auth"'); expect(out).not.toContain('Multi-query'); }); - test('single query with json: true outputs valid JSON with results array', async () => { - const out = await captureLog(() => search('auth', dbPath, { minScore: 0.2, json: true })); + test('mode: semantic — json output has similarity', async () => { + const out = await captureLog(() => + search('auth', dbPath, { minScore: 0.2, json: true, mode: 'semantic' }), + ); const parsed = JSON.parse(out); expect(parsed.results).toBeInstanceOf(Array); expect(parsed.results.length).toBeGreaterThan(0); @@ -281,12 +484,68 @@ describe('search (CLI wrapper)', () => { expect(parsed.results[0]).toHaveProperty('name'); }); - test('multi query with json: true outputs valid JSON with rrf and queryScores', async () => { - const out = await captureLog(() => search('auth ; jwt', dbPath, { minScore: 0.2, json: true })); + test('mode: semantic — multi query json has rrf and queryScores', async () => { + const out = await captureLog(() => + search('auth ; jwt', dbPath, { minScore: 0.2, json: true, mode: 'semantic' }), + ); const parsed = JSON.parse(out); expect(parsed.results).toBeInstanceOf(Array); expect(parsed.results.length).toBeGreaterThan(0); expect(parsed.results[0]).toHaveProperty('rrf'); expect(parsed.results[0]).toHaveProperty('queryScores'); }); + + test('mode: keyword — prints BM25 format', async () => { + const out = await captureLog(() => search('authenticate', dbPath, { mode: 'keyword' })); + expect(out).toContain('Keyword search'); + expect(out).toContain('BM25'); + expect(out).toContain('authenticate'); + }); + + test('mode: keyword — json output has bm25Score', async () => { + const out = await captureLog(() => + search('authenticate', dbPath, { json: true, mode: 'keyword' }), + ); + const parsed = JSON.parse(out); + expect(parsed.results).toBeInstanceOf(Array); + expect(parsed.results.length).toBeGreaterThan(0); + expect(parsed.results[0]).toHaveProperty('bm25Score'); + }); + + test('mode: keyword — no FTS5 index prints error', async () => { + const out = await captureLog(() => search('hello', noFtsDbPath, { mode: 'keyword' })); + expect(out).toContain('No FTS5 index found'); + }); + + test('default mode (hybrid) — prints hybrid format', async () => { + const out = await captureLog(() => search('authenticate', dbPath, { minScore: 0.01 })); + expect(out).toContain('Hybrid search'); + expect(out).toContain('BM25 + semantic'); + }); + + test('default mode (hybrid) — json output has rrf and bm25/semantic breakdown', async () => { + const out = await captureLog(() => + search('authenticate', dbPath, { minScore: 0.01, json: true }), + ); + const parsed = JSON.parse(out); + expect(parsed.results).toBeInstanceOf(Array); + expect(parsed.results.length).toBeGreaterThan(0); + expect(parsed.results[0]).toHaveProperty('rrf'); + }); + + test('hybrid mode falls back to semantic when no FTS5 index', async () => { + const spy = vi.spyOn(process.stderr, 'write').mockImplementation(() => {}); + const out = await captureLog(() => search('hello', noFtsDbPath, { minScore: 0.01 })); + const stderrOutput = spy.mock.calls.map((c) => c[0]).join(''); + spy.mockRestore(); + // Should fall back gracefully and still produce output + expect(stderrOutput).toContain('FTS5 index not found'); + expect(out).toContain('Semantic search'); + }); + + test('multi-query hybrid works', async () => { + const out = await captureLog(() => search('auth ; jwt', dbPath, { minScore: 0.01 })); + expect(out).toContain('Hybrid'); + expect(out).toContain('RRF'); + }); }); diff --git a/tests/search/embedding-strategy.test.js b/tests/search/embedding-strategy.test.js index 5db82bb9..e1553678 100644 --- a/tests/search/embedding-strategy.test.js +++ b/tests/search/embedding-strategy.test.js @@ -234,6 +234,61 @@ describe('buildEmbeddings defaults to structured', () => { }); }); +describe('FTS5 index built alongside embeddings', () => { + test('full_text column is populated in embeddings table', async () => { + EMBEDDED_TEXTS.length = 0; + await buildEmbeddings(tmpDir, 'minilm', dbPath, { strategy: 'structured' }); + + const db = new Database(dbPath, { readonly: true }); + const rows = db.prepare('SELECT full_text FROM embeddings WHERE full_text IS NOT NULL').all(); + db.close(); + expect(rows.length).toBeGreaterThan(0); + // Each full_text should contain structured text content + for (const row of rows) { + expect(row.full_text.length).toBeGreaterThan(0); + } + }); + + test('FTS5 row count matches embedding count', async () => { + const db = new Database(dbPath, { readonly: true }); + const embCount = db.prepare('SELECT COUNT(*) as c FROM embeddings').get().c; + const ftsCount = db.prepare('SELECT COUNT(*) as c FROM fts_index').get().c; + db.close(); + expect(ftsCount).toBe(embCount); + }); + + test('FTS5 content matches the structured/source text', async () => { + const db = new Database(dbPath, { readonly: true }); + // FTS5 rowid matches embeddings.node_id + const emb = db.prepare('SELECT node_id, full_text FROM embeddings').all(); + for (const row of emb) { + const fts = db.prepare('SELECT content FROM fts_index WHERE rowid = ?').get(row.node_id); + expect(fts).toBeDefined(); + expect(fts.content).toBe(row.full_text); + } + db.close(); + }); + + test('fts_count is stored in metadata', async () => { + const db = new Database(dbPath, { readonly: true }); + const row = db.prepare("SELECT value FROM embedding_meta WHERE key = 'fts_count'").get(); + db.close(); + expect(row).toBeDefined(); + expect(Number(row.value)).toBeGreaterThan(0); + }); + + test('FTS5 name column contains symbol names', async () => { + const db = new Database(dbPath, { readonly: true }); + const results = db + .prepare("SELECT rowid, name FROM fts_index WHERE fts_index MATCH 'add'") + .all(); + db.close(); + expect(results.length).toBeGreaterThan(0); + const names = results.map((r) => r.name); + expect(names).toContain('add'); + }); +}); + describe('context window overflow detection', () => { let bigDir, bigDbPath; From b9f597270ca9cdcd1be5b1762e5ad8fb51da3dc4 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sun, 1 Mar 2026 16:53:24 -0700 Subject: [PATCH 2/5] fix: update MCP test tool list and remove premature owners.js export Add code_owners and branch_compare to ALL_TOOL_NAMES in mcp.test.js to match the tools added in the previous commit. Remove owners.js re-export from index.js since that module is not yet on this branch. --- src/index.js | 2 -- tests/unit/mcp.test.js | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/index.js b/src/index.js index 3cef9655..73fcb01c 100644 --- a/src/index.js +++ b/src/index.js @@ -72,8 +72,6 @@ export { setVerbose } from './logger.js'; export { manifesto, manifestoData, RULE_DEFS } from './manifesto.js'; // Native engine export { isNativeAvailable } from './native.js'; -// Ownership (CODEOWNERS) -export { matchOwners, owners, ownersData, ownersForFiles, parseCodeowners } from './owners.js'; // Pagination utilities export { MCP_DEFAULTS, MCP_MAX_LIMIT, paginate, paginateResult } from './paginate.js'; diff --git a/tests/unit/mcp.test.js b/tests/unit/mcp.test.js index a221fafb..1c082085 100644 --- a/tests/unit/mcp.test.js +++ b/tests/unit/mcp.test.js @@ -33,6 +33,8 @@ const ALL_TOOL_NAMES = [ 'complexity', 'manifesto', 'communities', + 'code_owners', + 'branch_compare', 'list_repos', ]; From d1eaf8be90497560c5992a99b97bc2627bada967 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sun, 1 Mar 2026 17:42:30 -0700 Subject: [PATCH 3/5] fix(ci): escape dots in version grep patterns and add duplicate-skip guards Escape dots in VERSION before using as PCRE pattern to prevent false matches (e.g. 2.4.0 matching 2x4x0). Also add "Check for existing benchmark" steps to skip re-running benchmarks for already-reported versions, and detect untracked files in change detection. --- .github/workflows/benchmark.yml | 136 ++++++++++++++++++++++++++------ 1 file changed, 112 insertions(+), 24 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 4e952b56..864ee594 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -56,8 +56,22 @@ jobs: echo "version=${{ inputs.version }}" >> "$GITHUB_OUTPUT" fi + - name: Check for existing benchmark + id: existing + run: | + VERSION="${{ steps.mode.outputs.version }}" + VERSION_RE="${VERSION//./\\.}" + if [ "$VERSION" = "dev" ]; then + echo "skip=false" >> "$GITHUB_OUTPUT" + elif grep -qP '"version":\s*"'"$VERSION_RE"'"' generated/BUILD-BENCHMARKS.md 2>/dev/null; then + echo "Benchmark for $VERSION already exists in BUILD-BENCHMARKS.md — skipping" + echo "skip=true" >> "$GITHUB_OUTPUT" + else + echo "skip=false" >> "$GITHUB_OUTPUT" + fi + - name: Wait for npm propagation - if: steps.mode.outputs.source == 'npm' + if: steps.existing.outputs.skip != 'true' && steps.mode.outputs.source == 'npm' run: | VERSION="${{ steps.mode.outputs.version }}" echo "Waiting for @optave/codegraph@${VERSION} on npm..." @@ -73,6 +87,7 @@ jobs: exit 1 - name: Run build benchmark + if: steps.existing.outputs.skip != 'true' run: | ARGS="--version ${{ steps.mode.outputs.version }}" if [ "${{ steps.mode.outputs.source }}" = "npm" ]; then @@ -81,25 +96,33 @@ jobs: node scripts/benchmark.js $ARGS 2>/dev/null > benchmark-result.json - name: Update build report + if: steps.existing.outputs.skip != 'true' run: node scripts/update-benchmark-report.js benchmark-result.json - name: Upload build result + if: steps.existing.outputs.skip != 'true' uses: actions/upload-artifact@v7 with: name: build-benchmark-result path: benchmark-result.json - name: Check for changes + if: steps.existing.outputs.skip != 'true' id: changes run: | - if git diff --quiet HEAD -- generated/BUILD-BENCHMARKS.md README.md; then - echo "changed=false" >> "$GITHUB_OUTPUT" - else - echo "changed=true" >> "$GITHUB_OUTPUT" + CHANGED=false + # Detect modified tracked files + if ! git diff --quiet HEAD -- generated/BUILD-BENCHMARKS.md README.md 2>/dev/null; then + CHANGED=true + fi + # Detect newly created (untracked) files + if [ -n "$(git ls-files --others --exclude-standard generated/BUILD-BENCHMARKS.md)" ]; then + CHANGED=true fi + echo "changed=$CHANGED" >> "$GITHUB_OUTPUT" - name: Commit and push via PR - if: steps.changes.outputs.changed == 'true' + if: steps.existing.outputs.skip != 'true' && steps.changes.outputs.changed == 'true' env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} VERSION: ${{ steps.mode.outputs.version }} @@ -164,8 +187,22 @@ jobs: echo "version=${{ inputs.version }}" >> "$GITHUB_OUTPUT" fi + - name: Check for existing benchmark + id: existing + run: | + VERSION="${{ steps.mode.outputs.version }}" + VERSION_RE="${VERSION//./\\.}" + if [ "$VERSION" = "dev" ]; then + echo "skip=false" >> "$GITHUB_OUTPUT" + elif grep -qP '"version":\s*"'"$VERSION_RE"'"' generated/EMBEDDING-BENCHMARKS.md 2>/dev/null; then + echo "Benchmark for $VERSION already exists in EMBEDDING-BENCHMARKS.md — skipping" + echo "skip=true" >> "$GITHUB_OUTPUT" + else + echo "skip=false" >> "$GITHUB_OUTPUT" + fi + - name: Wait for npm propagation - if: steps.mode.outputs.source == 'npm' + if: steps.existing.outputs.skip != 'true' && steps.mode.outputs.source == 'npm' run: | VERSION="${{ steps.mode.outputs.version }}" echo "Waiting for @optave/codegraph@${VERSION} on npm..." @@ -181,6 +218,7 @@ jobs: exit 1 - name: Cache HuggingFace models + if: steps.existing.outputs.skip != 'true' uses: actions/cache@v5 with: path: ~/.cache/huggingface @@ -188,9 +226,11 @@ jobs: restore-keys: hf-models-${{ runner.os }}- - name: Build graph + if: steps.existing.outputs.skip != 'true' run: node src/cli.js build . - name: Run embedding benchmark + if: steps.existing.outputs.skip != 'true' env: HF_TOKEN: ${{ secrets.HF_TOKEN }} run: | @@ -201,25 +241,31 @@ jobs: node scripts/embedding-benchmark.js $ARGS 2>/dev/null > embedding-benchmark-result.json - name: Update embedding report + if: steps.existing.outputs.skip != 'true' run: node scripts/update-embedding-report.js embedding-benchmark-result.json - name: Upload embedding result + if: steps.existing.outputs.skip != 'true' uses: actions/upload-artifact@v7 with: name: embedding-benchmark-result path: embedding-benchmark-result.json - name: Check for changes + if: steps.existing.outputs.skip != 'true' id: changes run: | - if git diff --quiet HEAD -- generated/EMBEDDING-BENCHMARKS.md; then - echo "changed=false" >> "$GITHUB_OUTPUT" - else - echo "changed=true" >> "$GITHUB_OUTPUT" + CHANGED=false + if ! git diff --quiet HEAD -- generated/EMBEDDING-BENCHMARKS.md 2>/dev/null; then + CHANGED=true + fi + if [ -n "$(git ls-files --others --exclude-standard generated/EMBEDDING-BENCHMARKS.md)" ]; then + CHANGED=true fi + echo "changed=$CHANGED" >> "$GITHUB_OUTPUT" - name: Commit and push via PR - if: steps.changes.outputs.changed == 'true' + if: steps.existing.outputs.skip != 'true' && steps.changes.outputs.changed == 'true' env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} VERSION: ${{ steps.mode.outputs.version }} @@ -284,8 +330,22 @@ jobs: echo "version=${{ inputs.version }}" >> "$GITHUB_OUTPUT" fi + - name: Check for existing benchmark + id: existing + run: | + VERSION="${{ steps.mode.outputs.version }}" + VERSION_RE="${VERSION//./\\.}" + if [ "$VERSION" = "dev" ]; then + echo "skip=false" >> "$GITHUB_OUTPUT" + elif grep -qP '"version":\s*"'"$VERSION_RE"'"' generated/QUERY-BENCHMARKS.md 2>/dev/null; then + echo "Benchmark for $VERSION already exists in QUERY-BENCHMARKS.md — skipping" + echo "skip=true" >> "$GITHUB_OUTPUT" + else + echo "skip=false" >> "$GITHUB_OUTPUT" + fi + - name: Wait for npm propagation - if: steps.mode.outputs.source == 'npm' + if: steps.existing.outputs.skip != 'true' && steps.mode.outputs.source == 'npm' run: | VERSION="${{ steps.mode.outputs.version }}" echo "Waiting for @optave/codegraph@${VERSION} on npm..." @@ -301,6 +361,7 @@ jobs: exit 1 - name: Run query benchmark + if: steps.existing.outputs.skip != 'true' run: | ARGS="--version ${{ steps.mode.outputs.version }}" if [ "${{ steps.mode.outputs.source }}" = "npm" ]; then @@ -309,25 +370,31 @@ jobs: node scripts/query-benchmark.js $ARGS 2>/dev/null > query-benchmark-result.json - name: Update query report + if: steps.existing.outputs.skip != 'true' run: node scripts/update-query-report.js query-benchmark-result.json - name: Upload query result + if: steps.existing.outputs.skip != 'true' uses: actions/upload-artifact@v7 with: name: query-benchmark-result path: query-benchmark-result.json - name: Check for changes + if: steps.existing.outputs.skip != 'true' id: changes run: | - if git diff --quiet HEAD -- generated/QUERY-BENCHMARKS.md; then - echo "changed=false" >> "$GITHUB_OUTPUT" - else - echo "changed=true" >> "$GITHUB_OUTPUT" + CHANGED=false + if ! git diff --quiet HEAD -- generated/QUERY-BENCHMARKS.md 2>/dev/null; then + CHANGED=true + fi + if [ -n "$(git ls-files --others --exclude-standard generated/QUERY-BENCHMARKS.md)" ]; then + CHANGED=true fi + echo "changed=$CHANGED" >> "$GITHUB_OUTPUT" - name: Commit and push via PR - if: steps.changes.outputs.changed == 'true' + if: steps.existing.outputs.skip != 'true' && steps.changes.outputs.changed == 'true' env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} VERSION: ${{ steps.mode.outputs.version }} @@ -392,8 +459,22 @@ jobs: echo "version=${{ inputs.version }}" >> "$GITHUB_OUTPUT" fi + - name: Check for existing benchmark + id: existing + run: | + VERSION="${{ steps.mode.outputs.version }}" + VERSION_RE="${VERSION//./\\.}" + if [ "$VERSION" = "dev" ]; then + echo "skip=false" >> "$GITHUB_OUTPUT" + elif grep -qP '"version":\s*"'"$VERSION_RE"'"' generated/INCREMENTAL-BENCHMARKS.md 2>/dev/null; then + echo "Benchmark for $VERSION already exists in INCREMENTAL-BENCHMARKS.md — skipping" + echo "skip=true" >> "$GITHUB_OUTPUT" + else + echo "skip=false" >> "$GITHUB_OUTPUT" + fi + - name: Wait for npm propagation - if: steps.mode.outputs.source == 'npm' + if: steps.existing.outputs.skip != 'true' && steps.mode.outputs.source == 'npm' run: | VERSION="${{ steps.mode.outputs.version }}" echo "Waiting for @optave/codegraph@${VERSION} on npm..." @@ -409,6 +490,7 @@ jobs: exit 1 - name: Run incremental benchmark + if: steps.existing.outputs.skip != 'true' run: | ARGS="--version ${{ steps.mode.outputs.version }}" if [ "${{ steps.mode.outputs.source }}" = "npm" ]; then @@ -417,25 +499,31 @@ jobs: node scripts/incremental-benchmark.js $ARGS 2>/dev/null > incremental-benchmark-result.json - name: Update incremental report + if: steps.existing.outputs.skip != 'true' run: node scripts/update-incremental-report.js incremental-benchmark-result.json - name: Upload incremental result + if: steps.existing.outputs.skip != 'true' uses: actions/upload-artifact@v7 with: name: incremental-benchmark-result path: incremental-benchmark-result.json - name: Check for changes + if: steps.existing.outputs.skip != 'true' id: changes run: | - if git diff --quiet HEAD -- generated/INCREMENTAL-BENCHMARKS.md; then - echo "changed=false" >> "$GITHUB_OUTPUT" - else - echo "changed=true" >> "$GITHUB_OUTPUT" + CHANGED=false + if ! git diff --quiet HEAD -- generated/INCREMENTAL-BENCHMARKS.md 2>/dev/null; then + CHANGED=true + fi + if [ -n "$(git ls-files --others --exclude-standard generated/INCREMENTAL-BENCHMARKS.md)" ]; then + CHANGED=true fi + echo "changed=$CHANGED" >> "$GITHUB_OUTPUT" - name: Commit and push via PR - if: steps.changes.outputs.changed == 'true' + if: steps.existing.outputs.skip != 'true' && steps.changes.outputs.changed == 'true' env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} VERSION: ${{ steps.mode.outputs.version }} From 8ea86afc6a1aad8d9e2a8943e1fb42a7d642f6f7 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sun, 1 Mar 2026 17:49:33 -0700 Subject: [PATCH 4/5] Revert "fix(ci): escape dots in version grep patterns and add duplicate-skip guards" This reverts commit d1eaf8be90497560c5992a99b97bc2627bada967. --- .github/workflows/benchmark.yml | 136 ++++++-------------------------- 1 file changed, 24 insertions(+), 112 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 864ee594..4e952b56 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -56,22 +56,8 @@ jobs: echo "version=${{ inputs.version }}" >> "$GITHUB_OUTPUT" fi - - name: Check for existing benchmark - id: existing - run: | - VERSION="${{ steps.mode.outputs.version }}" - VERSION_RE="${VERSION//./\\.}" - if [ "$VERSION" = "dev" ]; then - echo "skip=false" >> "$GITHUB_OUTPUT" - elif grep -qP '"version":\s*"'"$VERSION_RE"'"' generated/BUILD-BENCHMARKS.md 2>/dev/null; then - echo "Benchmark for $VERSION already exists in BUILD-BENCHMARKS.md — skipping" - echo "skip=true" >> "$GITHUB_OUTPUT" - else - echo "skip=false" >> "$GITHUB_OUTPUT" - fi - - name: Wait for npm propagation - if: steps.existing.outputs.skip != 'true' && steps.mode.outputs.source == 'npm' + if: steps.mode.outputs.source == 'npm' run: | VERSION="${{ steps.mode.outputs.version }}" echo "Waiting for @optave/codegraph@${VERSION} on npm..." @@ -87,7 +73,6 @@ jobs: exit 1 - name: Run build benchmark - if: steps.existing.outputs.skip != 'true' run: | ARGS="--version ${{ steps.mode.outputs.version }}" if [ "${{ steps.mode.outputs.source }}" = "npm" ]; then @@ -96,33 +81,25 @@ jobs: node scripts/benchmark.js $ARGS 2>/dev/null > benchmark-result.json - name: Update build report - if: steps.existing.outputs.skip != 'true' run: node scripts/update-benchmark-report.js benchmark-result.json - name: Upload build result - if: steps.existing.outputs.skip != 'true' uses: actions/upload-artifact@v7 with: name: build-benchmark-result path: benchmark-result.json - name: Check for changes - if: steps.existing.outputs.skip != 'true' id: changes run: | - CHANGED=false - # Detect modified tracked files - if ! git diff --quiet HEAD -- generated/BUILD-BENCHMARKS.md README.md 2>/dev/null; then - CHANGED=true - fi - # Detect newly created (untracked) files - if [ -n "$(git ls-files --others --exclude-standard generated/BUILD-BENCHMARKS.md)" ]; then - CHANGED=true + if git diff --quiet HEAD -- generated/BUILD-BENCHMARKS.md README.md; then + echo "changed=false" >> "$GITHUB_OUTPUT" + else + echo "changed=true" >> "$GITHUB_OUTPUT" fi - echo "changed=$CHANGED" >> "$GITHUB_OUTPUT" - name: Commit and push via PR - if: steps.existing.outputs.skip != 'true' && steps.changes.outputs.changed == 'true' + if: steps.changes.outputs.changed == 'true' env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} VERSION: ${{ steps.mode.outputs.version }} @@ -187,22 +164,8 @@ jobs: echo "version=${{ inputs.version }}" >> "$GITHUB_OUTPUT" fi - - name: Check for existing benchmark - id: existing - run: | - VERSION="${{ steps.mode.outputs.version }}" - VERSION_RE="${VERSION//./\\.}" - if [ "$VERSION" = "dev" ]; then - echo "skip=false" >> "$GITHUB_OUTPUT" - elif grep -qP '"version":\s*"'"$VERSION_RE"'"' generated/EMBEDDING-BENCHMARKS.md 2>/dev/null; then - echo "Benchmark for $VERSION already exists in EMBEDDING-BENCHMARKS.md — skipping" - echo "skip=true" >> "$GITHUB_OUTPUT" - else - echo "skip=false" >> "$GITHUB_OUTPUT" - fi - - name: Wait for npm propagation - if: steps.existing.outputs.skip != 'true' && steps.mode.outputs.source == 'npm' + if: steps.mode.outputs.source == 'npm' run: | VERSION="${{ steps.mode.outputs.version }}" echo "Waiting for @optave/codegraph@${VERSION} on npm..." @@ -218,7 +181,6 @@ jobs: exit 1 - name: Cache HuggingFace models - if: steps.existing.outputs.skip != 'true' uses: actions/cache@v5 with: path: ~/.cache/huggingface @@ -226,11 +188,9 @@ jobs: restore-keys: hf-models-${{ runner.os }}- - name: Build graph - if: steps.existing.outputs.skip != 'true' run: node src/cli.js build . - name: Run embedding benchmark - if: steps.existing.outputs.skip != 'true' env: HF_TOKEN: ${{ secrets.HF_TOKEN }} run: | @@ -241,31 +201,25 @@ jobs: node scripts/embedding-benchmark.js $ARGS 2>/dev/null > embedding-benchmark-result.json - name: Update embedding report - if: steps.existing.outputs.skip != 'true' run: node scripts/update-embedding-report.js embedding-benchmark-result.json - name: Upload embedding result - if: steps.existing.outputs.skip != 'true' uses: actions/upload-artifact@v7 with: name: embedding-benchmark-result path: embedding-benchmark-result.json - name: Check for changes - if: steps.existing.outputs.skip != 'true' id: changes run: | - CHANGED=false - if ! git diff --quiet HEAD -- generated/EMBEDDING-BENCHMARKS.md 2>/dev/null; then - CHANGED=true - fi - if [ -n "$(git ls-files --others --exclude-standard generated/EMBEDDING-BENCHMARKS.md)" ]; then - CHANGED=true + if git diff --quiet HEAD -- generated/EMBEDDING-BENCHMARKS.md; then + echo "changed=false" >> "$GITHUB_OUTPUT" + else + echo "changed=true" >> "$GITHUB_OUTPUT" fi - echo "changed=$CHANGED" >> "$GITHUB_OUTPUT" - name: Commit and push via PR - if: steps.existing.outputs.skip != 'true' && steps.changes.outputs.changed == 'true' + if: steps.changes.outputs.changed == 'true' env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} VERSION: ${{ steps.mode.outputs.version }} @@ -330,22 +284,8 @@ jobs: echo "version=${{ inputs.version }}" >> "$GITHUB_OUTPUT" fi - - name: Check for existing benchmark - id: existing - run: | - VERSION="${{ steps.mode.outputs.version }}" - VERSION_RE="${VERSION//./\\.}" - if [ "$VERSION" = "dev" ]; then - echo "skip=false" >> "$GITHUB_OUTPUT" - elif grep -qP '"version":\s*"'"$VERSION_RE"'"' generated/QUERY-BENCHMARKS.md 2>/dev/null; then - echo "Benchmark for $VERSION already exists in QUERY-BENCHMARKS.md — skipping" - echo "skip=true" >> "$GITHUB_OUTPUT" - else - echo "skip=false" >> "$GITHUB_OUTPUT" - fi - - name: Wait for npm propagation - if: steps.existing.outputs.skip != 'true' && steps.mode.outputs.source == 'npm' + if: steps.mode.outputs.source == 'npm' run: | VERSION="${{ steps.mode.outputs.version }}" echo "Waiting for @optave/codegraph@${VERSION} on npm..." @@ -361,7 +301,6 @@ jobs: exit 1 - name: Run query benchmark - if: steps.existing.outputs.skip != 'true' run: | ARGS="--version ${{ steps.mode.outputs.version }}" if [ "${{ steps.mode.outputs.source }}" = "npm" ]; then @@ -370,31 +309,25 @@ jobs: node scripts/query-benchmark.js $ARGS 2>/dev/null > query-benchmark-result.json - name: Update query report - if: steps.existing.outputs.skip != 'true' run: node scripts/update-query-report.js query-benchmark-result.json - name: Upload query result - if: steps.existing.outputs.skip != 'true' uses: actions/upload-artifact@v7 with: name: query-benchmark-result path: query-benchmark-result.json - name: Check for changes - if: steps.existing.outputs.skip != 'true' id: changes run: | - CHANGED=false - if ! git diff --quiet HEAD -- generated/QUERY-BENCHMARKS.md 2>/dev/null; then - CHANGED=true - fi - if [ -n "$(git ls-files --others --exclude-standard generated/QUERY-BENCHMARKS.md)" ]; then - CHANGED=true + if git diff --quiet HEAD -- generated/QUERY-BENCHMARKS.md; then + echo "changed=false" >> "$GITHUB_OUTPUT" + else + echo "changed=true" >> "$GITHUB_OUTPUT" fi - echo "changed=$CHANGED" >> "$GITHUB_OUTPUT" - name: Commit and push via PR - if: steps.existing.outputs.skip != 'true' && steps.changes.outputs.changed == 'true' + if: steps.changes.outputs.changed == 'true' env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} VERSION: ${{ steps.mode.outputs.version }} @@ -459,22 +392,8 @@ jobs: echo "version=${{ inputs.version }}" >> "$GITHUB_OUTPUT" fi - - name: Check for existing benchmark - id: existing - run: | - VERSION="${{ steps.mode.outputs.version }}" - VERSION_RE="${VERSION//./\\.}" - if [ "$VERSION" = "dev" ]; then - echo "skip=false" >> "$GITHUB_OUTPUT" - elif grep -qP '"version":\s*"'"$VERSION_RE"'"' generated/INCREMENTAL-BENCHMARKS.md 2>/dev/null; then - echo "Benchmark for $VERSION already exists in INCREMENTAL-BENCHMARKS.md — skipping" - echo "skip=true" >> "$GITHUB_OUTPUT" - else - echo "skip=false" >> "$GITHUB_OUTPUT" - fi - - name: Wait for npm propagation - if: steps.existing.outputs.skip != 'true' && steps.mode.outputs.source == 'npm' + if: steps.mode.outputs.source == 'npm' run: | VERSION="${{ steps.mode.outputs.version }}" echo "Waiting for @optave/codegraph@${VERSION} on npm..." @@ -490,7 +409,6 @@ jobs: exit 1 - name: Run incremental benchmark - if: steps.existing.outputs.skip != 'true' run: | ARGS="--version ${{ steps.mode.outputs.version }}" if [ "${{ steps.mode.outputs.source }}" = "npm" ]; then @@ -499,31 +417,25 @@ jobs: node scripts/incremental-benchmark.js $ARGS 2>/dev/null > incremental-benchmark-result.json - name: Update incremental report - if: steps.existing.outputs.skip != 'true' run: node scripts/update-incremental-report.js incremental-benchmark-result.json - name: Upload incremental result - if: steps.existing.outputs.skip != 'true' uses: actions/upload-artifact@v7 with: name: incremental-benchmark-result path: incremental-benchmark-result.json - name: Check for changes - if: steps.existing.outputs.skip != 'true' id: changes run: | - CHANGED=false - if ! git diff --quiet HEAD -- generated/INCREMENTAL-BENCHMARKS.md 2>/dev/null; then - CHANGED=true - fi - if [ -n "$(git ls-files --others --exclude-standard generated/INCREMENTAL-BENCHMARKS.md)" ]; then - CHANGED=true + if git diff --quiet HEAD -- generated/INCREMENTAL-BENCHMARKS.md; then + echo "changed=false" >> "$GITHUB_OUTPUT" + else + echo "changed=true" >> "$GITHUB_OUTPUT" fi - echo "changed=$CHANGED" >> "$GITHUB_OUTPUT" - name: Commit and push via PR - if: steps.existing.outputs.skip != 'true' && steps.changes.outputs.changed == 'true' + if: steps.changes.outputs.changed == 'true' env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} VERSION: ${{ steps.mode.outputs.version }} From dc9ca9f0e7e501990a74c19bf5ce4d2e2bb0de6d Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sun, 1 Mar 2026 18:22:06 -0700 Subject: [PATCH 5/5] feat: universal pagination, NDJSON streaming, and generator APIs (#207) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: universal pagination, NDJSON streaming, and generator APIs Extend limit/offset pagination to all 21 MCP tools that return arrays, add --ndjson streaming to ~14 CLI commands, and introduce generator functions for memory-efficient iteration on large codebases. Phase 1 — Universal Pagination: - Add PAGINATION_PROPS to 15 additional MCP tool schemas - Wire limit/offset through all MCP handlers with per-tool defaults - Apply paginateResult in data functions across queries, complexity, communities, manifesto, flow, cochange, and structure modules - Expand MCP_DEFAULTS with sensible caps for each tool Phase 2 — NDJSON Streaming: - Add printNdjson helper to paginate.js - Add --ndjson/--limit/--offset options to CLI commands - Refactor existing NDJSON handlers to use shared helper Phase 3 — Generator/Iterator APIs: - Add iterListFunctions, iterRoles, iterWhere generators (queries.js) - Add iterComplexity generator (complexity.js) - All use better-sqlite3 .iterate() with try/finally cleanup - Export from index.js public API Impact: 30 functions changed, 19 affected * perf(queries): hoist prepared statements out of iterWhere loop Move crossFileCallers and uses prepared statements above the iteration loop so they are created once and reused per row, instead of being re-prepared on every iteration. --- src/cli.js | 98 +++++++++- src/cochange.js | 7 +- src/communities.js | 8 +- src/complexity.js | 93 ++++++++- src/flow.js | 8 +- src/index.js | 6 +- src/manifesto.js | 9 +- src/mcp.js | 67 ++++++- src/paginate.js | 34 ++++ src/queries.js | 191 ++++++++++++++++-- src/structure.js | 7 +- tests/integration/context.test.js | 16 +- tests/integration/pagination.test.js | 276 ++++++++++++++++++++++++++- tests/unit/mcp.test.js | 10 + 14 files changed, 777 insertions(+), 53 deletions(-) diff --git a/src/cli.js b/src/cli.js index 41f14272..c6d72cf7 100644 --- a/src/cli.js +++ b/src/cli.js @@ -16,6 +16,7 @@ import { } from './embedder.js'; import { exportDOT, exportJSON, exportMermaid } from './export.js'; import { setVerbose } from './logger.js'; +import { printNdjson } from './paginate.js'; import { ALL_SYMBOL_KINDS, context, @@ -127,8 +128,17 @@ program .option('-T, --no-tests', 'Exclude test/spec files from results') .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') .option('-j, --json', 'Output as JSON') + .option('--limit ', 'Max results to return') + .option('--offset ', 'Skip N results (default: 0)') + .option('--ndjson', 'Newline-delimited JSON output') .action((file, opts) => { - impactAnalysis(file, opts.db, { noTests: resolveNoTests(opts), json: opts.json }); + impactAnalysis(file, opts.db, { + noTests: resolveNoTests(opts), + json: opts.json, + limit: opts.limit ? parseInt(opts.limit, 10) : undefined, + offset: opts.offset ? parseInt(opts.offset, 10) : undefined, + ndjson: opts.ndjson, + }); }); program @@ -164,8 +174,17 @@ program .option('-T, --no-tests', 'Exclude test/spec files from results') .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') .option('-j, --json', 'Output as JSON') + .option('--limit ', 'Max results to return') + .option('--offset ', 'Skip N results (default: 0)') + .option('--ndjson', 'Newline-delimited JSON output') .action((file, opts) => { - fileDeps(file, opts.db, { noTests: resolveNoTests(opts), json: opts.json }); + fileDeps(file, opts.db, { + noTests: resolveNoTests(opts), + json: opts.json, + limit: opts.limit ? parseInt(opts.limit, 10) : undefined, + offset: opts.offset ? parseInt(opts.offset, 10) : undefined, + ndjson: opts.ndjson, + }); }); program @@ -178,6 +197,9 @@ program .option('-T, --no-tests', 'Exclude test/spec files from results') .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') .option('-j, --json', 'Output as JSON') + .option('--limit ', 'Max results to return') + .option('--offset ', 'Skip N results (default: 0)') + .option('--ndjson', 'Newline-delimited JSON output') .action((name, opts) => { if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`); @@ -189,6 +211,9 @@ program kind: opts.kind, noTests: resolveNoTests(opts), json: opts.json, + limit: opts.limit ? parseInt(opts.limit, 10) : undefined, + offset: opts.offset ? parseInt(opts.offset, 10) : undefined, + ndjson: opts.ndjson, }); }); @@ -202,6 +227,9 @@ program .option('-T, --no-tests', 'Exclude test/spec files from results') .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') .option('-j, --json', 'Output as JSON') + .option('--limit ', 'Max results to return') + .option('--offset ', 'Skip N results (default: 0)') + .option('--ndjson', 'Newline-delimited JSON output') .action((name, opts) => { if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`); @@ -213,6 +241,9 @@ program kind: opts.kind, noTests: resolveNoTests(opts), json: opts.json, + limit: opts.limit ? parseInt(opts.limit, 10) : undefined, + offset: opts.offset ? parseInt(opts.offset, 10) : undefined, + ndjson: opts.ndjson, }); }); @@ -258,6 +289,9 @@ program .option('-T, --no-tests', 'Exclude test/spec files from results') .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') .option('-j, --json', 'Output as JSON') + .option('--limit ', 'Max results to return') + .option('--offset ', 'Skip N results (default: 0)') + .option('--ndjson', 'Newline-delimited JSON output') .action((name, opts) => { if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`); @@ -271,6 +305,9 @@ program noTests: resolveNoTests(opts), includeTests: opts.withTestSource, json: opts.json, + limit: opts.limit ? parseInt(opts.limit, 10) : undefined, + offset: opts.offset ? parseInt(opts.offset, 10) : undefined, + ndjson: opts.ndjson, }); }); @@ -282,11 +319,17 @@ program .option('-T, --no-tests', 'Exclude test/spec files from results') .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') .option('-j, --json', 'Output as JSON') + .option('--limit ', 'Max results to return') + .option('--offset ', 'Skip N results (default: 0)') + .option('--ndjson', 'Newline-delimited JSON output') .action((target, opts) => { explain(target, opts.db, { depth: parseInt(opts.depth, 10), noTests: resolveNoTests(opts), json: opts.json, + limit: opts.limit ? parseInt(opts.limit, 10) : undefined, + offset: opts.offset ? parseInt(opts.offset, 10) : undefined, + ndjson: opts.ndjson, }); }); @@ -327,6 +370,9 @@ program .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') .option('-j, --json', 'Output as JSON') .option('-f, --format ', 'Output format: text, mermaid, json', 'text') + .option('--limit ', 'Max results to return') + .option('--offset ', 'Skip N results (default: 0)') + .option('--ndjson', 'Newline-delimited JSON output') .action((ref, opts) => { diffImpact(opts.db, { ref, @@ -335,6 +381,9 @@ program noTests: resolveNoTests(opts), json: opts.json, format: opts.format, + limit: opts.limit ? parseInt(opts.limit, 10) : undefined, + offset: opts.offset ? parseInt(opts.offset, 10) : undefined, + ndjson: opts.ndjson, }); }); @@ -640,6 +689,8 @@ program .option('--rrf-k ', 'RRF k parameter for multi-query ranking', '60') .option('--mode ', 'Search mode: hybrid, semantic, keyword (default: hybrid)') .option('-j, --json', 'Output as JSON') + .option('--offset ', 'Skip N results (default: 0)') + .option('--ndjson', 'Newline-delimited JSON output') .action(async (query, opts) => { const validModes = ['hybrid', 'semantic', 'keyword']; if (opts.mode && !validModes.includes(opts.mode)) { @@ -671,6 +722,9 @@ program .option('-T, --no-tests', 'Exclude test/spec files') .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') .option('-j, --json', 'Output as JSON') + .option('--limit ', 'Max results to return') + .option('--offset ', 'Skip N results (default: 0)') + .option('--ndjson', 'Newline-delimited JSON output') .action(async (dir, opts) => { const { structureData, formatStructure } = await import('./structure.js'); const data = structureData(opts.db, { @@ -679,8 +733,12 @@ program sort: opts.sort, full: opts.full, noTests: resolveNoTests(opts), + limit: opts.limit ? parseInt(opts.limit, 10) : undefined, + offset: opts.offset ? parseInt(opts.offset, 10) : undefined, }); - if (opts.json) { + if (opts.ndjson) { + printNdjson(data, 'directories'); + } else if (opts.json) { console.log(JSON.stringify(data, null, 2)); } else { console.log(formatStructure(data)); @@ -699,15 +757,20 @@ program .option('-T, --no-tests', 'Exclude test/spec files from results') .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') .option('-j, --json', 'Output as JSON') + .option('--offset ', 'Skip N results (default: 0)') + .option('--ndjson', 'Newline-delimited JSON output') .action(async (opts) => { const { hotspotsData, formatHotspots } = await import('./structure.js'); const data = hotspotsData(opts.db, { metric: opts.metric, level: opts.level, limit: parseInt(opts.limit, 10), + offset: opts.offset ? parseInt(opts.offset, 10) : undefined, noTests: resolveNoTests(opts), }); - if (opts.json) { + if (opts.ndjson) { + printNdjson(data, 'hotspots'); + } else if (opts.json) { console.log(JSON.stringify(data, null, 2)); } else { console.log(formatHotspots(data)); @@ -757,6 +820,8 @@ program .option('-T, --no-tests', 'Exclude test/spec files') .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') .option('-j, --json', 'Output as JSON') + .option('--offset ', 'Skip N results (default: 0)') + .option('--ndjson', 'Newline-delimited JSON output') .action(async (file, opts) => { const { analyzeCoChanges, coChangeData, coChangeTopData, formatCoChange, formatCoChangeTop } = await import('./cochange.js'); @@ -783,20 +848,25 @@ program const queryOpts = { limit: parseInt(opts.limit, 10), + offset: opts.offset ? parseInt(opts.offset, 10) : undefined, minJaccard: opts.minJaccard ? parseFloat(opts.minJaccard) : config.coChange?.minJaccard, noTests: resolveNoTests(opts), }; if (file) { const data = coChangeData(file, opts.db, queryOpts); - if (opts.json) { + if (opts.ndjson) { + printNdjson(data, 'partners'); + } else if (opts.json) { console.log(JSON.stringify(data, null, 2)); } else { console.log(formatCoChange(data)); } } else { const data = coChangeTopData(opts.db, queryOpts); - if (opts.json) { + if (opts.ndjson) { + printNdjson(data, 'pairs'); + } else if (opts.json) { console.log(JSON.stringify(data, null, 2)); } else { console.log(formatCoChangeTop(data)); @@ -860,6 +930,8 @@ program .option('-T, --no-tests', 'Exclude test/spec files from results') .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') .option('-j, --json', 'Output as JSON') + .option('--offset ', 'Skip N results (default: 0)') + .option('--ndjson', 'Newline-delimited JSON output') .action(async (target, opts) => { if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`); @@ -869,6 +941,7 @@ program complexity(opts.db, { target, limit: parseInt(opts.limit, 10), + offset: opts.offset ? parseInt(opts.offset, 10) : undefined, sort: opts.sort, aboveThreshold: opts.aboveThreshold, health: opts.health, @@ -876,6 +949,7 @@ program kind: opts.kind, noTests: resolveNoTests(opts), json: opts.json, + ndjson: opts.ndjson, }); }); @@ -888,6 +962,9 @@ program .option('-f, --file ', 'Scope to file (partial match)') .option('-k, --kind ', 'Filter by symbol kind') .option('-j, --json', 'Output as JSON') + .option('--limit ', 'Max results to return') + .option('--offset ', 'Skip N results (default: 0)') + .option('--ndjson', 'Newline-delimited JSON output') .action(async (opts) => { if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`); @@ -899,6 +976,9 @@ program kind: opts.kind, noTests: resolveNoTests(opts), json: opts.json, + limit: opts.limit ? parseInt(opts.limit, 10) : undefined, + offset: opts.offset ? parseInt(opts.offset, 10) : undefined, + ndjson: opts.ndjson, }); }); @@ -912,6 +992,9 @@ program .option('-T, --no-tests', 'Exclude test/spec files from results') .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') .option('-j, --json', 'Output as JSON') + .option('--limit ', 'Max results to return') + .option('--offset ', 'Skip N results (default: 0)') + .option('--ndjson', 'Newline-delimited JSON output') .action(async (opts) => { const { communities } = await import('./communities.js'); communities(opts.db, { @@ -920,6 +1003,9 @@ program drift: opts.drift, noTests: resolveNoTests(opts), json: opts.json, + limit: opts.limit ? parseInt(opts.limit, 10) : undefined, + offset: opts.offset ? parseInt(opts.offset, 10) : undefined, + ndjson: opts.ndjson, }); }); diff --git a/src/cochange.js b/src/cochange.js index b08ce8db..d1fb2ed3 100644 --- a/src/cochange.js +++ b/src/cochange.js @@ -11,6 +11,7 @@ import path from 'node:path'; import { normalizePath } from './constants.js'; import { closeDb, findDbPath, initSchema, openDb, openReadonlyOrFail } from './db.js'; import { warn } from './logger.js'; +import { paginateResult } from './paginate.js'; import { isTestFile } from './queries.js'; /** @@ -313,7 +314,8 @@ export function coChangeData(file, customDbPath, opts = {}) { const meta = getCoChangeMeta(db); closeDb(db); - return { file: resolvedFile, partners, meta }; + const base = { file: resolvedFile, partners, meta }; + return paginateResult(base, 'partners', { limit: opts.limit, offset: opts.offset }); } /** @@ -365,7 +367,8 @@ export function coChangeTopData(customDbPath, opts = {}) { const meta = getCoChangeMeta(db); closeDb(db); - return { pairs, meta }; + const base = { pairs, meta }; + return paginateResult(base, 'pairs', { limit: opts.limit, offset: opts.offset }); } /** diff --git a/src/communities.js b/src/communities.js index 7eba4071..926b611b 100644 --- a/src/communities.js +++ b/src/communities.js @@ -2,6 +2,7 @@ import path from 'node:path'; import Graph from 'graphology'; import louvain from 'graphology-communities-louvain'; import { openReadonlyOrFail } from './db.js'; +import { paginateResult, printNdjson } from './paginate.js'; import { isTestFile } from './queries.js'; // ─── Graph Construction ─────────────────────────────────────────────── @@ -201,7 +202,7 @@ export function communitiesData(customDbPath, opts = {}) { const driftScore = Math.round(((splitRatio + mergeRatio) / 2) * 100); - return { + const base = { communities: opts.drift ? [] : communities, modularity: +modularity.toFixed(4), drift: { splitCandidates, mergeCandidates }, @@ -212,6 +213,7 @@ export function communitiesData(customDbPath, opts = {}) { driftScore, }, }; + return paginateResult(base, 'communities', { limit: opts.limit, offset: opts.offset }); } /** @@ -238,6 +240,10 @@ export function communitySummaryForStats(customDbPath, opts = {}) { export function communities(customDbPath, opts = {}) { const data = communitiesData(customDbPath, opts); + if (opts.ndjson) { + printNdjson(data, 'communities'); + return; + } if (opts.json) { console.log(JSON.stringify(data, null, 2)); return; diff --git a/src/complexity.js b/src/complexity.js index 01ffee18..30bee701 100644 --- a/src/complexity.js +++ b/src/complexity.js @@ -3,6 +3,7 @@ import path from 'node:path'; import { loadConfig } from './config.js'; import { openReadonlyOrFail } from './db.js'; import { info } from './logger.js'; +import { paginateResult, printNdjson } from './paginate.js'; import { LANGUAGE_REGISTRY } from './parser.js'; import { isTestFile } from './queries.js'; @@ -1887,10 +1888,9 @@ export function complexityData(customDbPath, opts = {}) { FROM function_complexity fc JOIN nodes n ON fc.node_id = n.id ${where} ${having} - ORDER BY ${orderBy} - LIMIT ?`, + ORDER BY ${orderBy}`, ) - .all(...params, limit); + .all(...params); } catch { db.close(); return { functions: [], summary: null, thresholds }; @@ -1980,7 +1980,88 @@ export function complexityData(customDbPath, opts = {}) { } db.close(); - return { functions, summary, thresholds }; + const base = { functions, summary, thresholds }; + return paginateResult(base, 'functions', { limit: opts.limit, offset: opts.offset }); +} + +/** + * Generator: stream complexity rows one-by-one using .iterate() for memory efficiency. + * @param {string} [customDbPath] + * @param {object} [opts] + * @param {boolean} [opts.noTests] + * @param {string} [opts.file] + * @param {string} [opts.target] + * @param {string} [opts.kind] + * @param {string} [opts.sort] + * @yields {{ name: string, kind: string, file: string, line: number, cognitive: number, cyclomatic: number, maxNesting: number, loc: number, sloc: number }} + */ +export function* iterComplexity(customDbPath, opts = {}) { + const db = openReadonlyOrFail(customDbPath); + try { + const noTests = opts.noTests || false; + const sort = opts.sort || 'cognitive'; + + let where = "WHERE n.kind IN ('function','method')"; + const params = []; + + if (noTests) { + where += ` AND n.file NOT LIKE '%.test.%' + AND n.file NOT LIKE '%.spec.%' + AND n.file NOT LIKE '%__test__%' + AND n.file NOT LIKE '%__tests__%' + AND n.file NOT LIKE '%.stories.%'`; + } + if (opts.target) { + where += ' AND n.name LIKE ?'; + params.push(`%${opts.target}%`); + } + if (opts.file) { + where += ' AND n.file LIKE ?'; + params.push(`%${opts.file}%`); + } + if (opts.kind) { + where += ' AND n.kind = ?'; + params.push(opts.kind); + } + + const orderMap = { + cognitive: 'fc.cognitive DESC', + cyclomatic: 'fc.cyclomatic DESC', + nesting: 'fc.max_nesting DESC', + mi: 'fc.maintainability_index ASC', + volume: 'fc.halstead_volume DESC', + effort: 'fc.halstead_effort DESC', + bugs: 'fc.halstead_bugs DESC', + loc: 'fc.loc DESC', + }; + const orderBy = orderMap[sort] || 'fc.cognitive DESC'; + + const stmt = db.prepare( + `SELECT n.name, n.kind, n.file, n.line, n.end_line, + fc.cognitive, fc.cyclomatic, fc.max_nesting, fc.loc, fc.sloc + FROM function_complexity fc + JOIN nodes n ON fc.node_id = n.id + ${where} + ORDER BY ${orderBy}`, + ); + for (const r of stmt.iterate(...params)) { + if (noTests && isTestFile(r.file)) continue; + yield { + name: r.name, + kind: r.kind, + file: r.file, + line: r.line, + endLine: r.end_line || null, + cognitive: r.cognitive, + cyclomatic: r.cyclomatic, + maxNesting: r.max_nesting, + loc: r.loc || 0, + sloc: r.sloc || 0, + }; + } + } finally { + db.close(); + } } /** @@ -1989,6 +2070,10 @@ export function complexityData(customDbPath, opts = {}) { export function complexity(customDbPath, opts = {}) { const data = complexityData(customDbPath, opts); + if (opts.ndjson) { + printNdjson(data, 'functions'); + return; + } if (opts.json) { console.log(JSON.stringify(data, null, 2)); return; diff --git a/src/flow.js b/src/flow.js index 93381652..ab59fe45 100644 --- a/src/flow.js +++ b/src/flow.js @@ -6,7 +6,7 @@ */ import { openReadonlyOrFail } from './db.js'; -import { paginateResult } from './paginate.js'; +import { paginateResult, printNdjson } from './paginate.js'; import { isTestFile, kindIcon } from './queries.js'; import { FRAMEWORK_ENTRY_PREFIXES } from './structure.js'; @@ -204,7 +204,7 @@ export function flowData(name, dbPath, opts = {}) { } db.close(); - return { + const base = { entry, depth: maxDepth, steps, @@ -213,6 +213,7 @@ export function flowData(name, dbPath, opts = {}) { totalReached: visited.size - 1, // exclude the entry node itself truncated, }; + return paginateResult(base, 'steps', { limit: opts.limit, offset: opts.offset }); } /** @@ -293,8 +294,7 @@ export function flow(name, dbPath, opts = {}) { offset: opts.offset, }); if (opts.ndjson) { - if (data._pagination) console.log(JSON.stringify({ _meta: data._pagination })); - for (const e of data.entries) console.log(JSON.stringify(e)); + printNdjson(data, 'entries'); return; } if (opts.json) { diff --git a/src/index.js b/src/index.js index b195d8c6..c9f5f862 100644 --- a/src/index.js +++ b/src/index.js @@ -30,6 +30,7 @@ export { computeLOCMetrics, computeMaintainabilityIndex, HALSTEAD_RULES, + iterComplexity, } from './complexity.js'; // Configuration export { loadConfig } from './config.js'; @@ -75,7 +76,7 @@ export { isNativeAvailable } from './native.js'; // Ownership (CODEOWNERS) export { matchOwners, owners, ownersData, ownersForFiles, parseCodeowners } from './owners.js'; // Pagination utilities -export { MCP_DEFAULTS, MCP_MAX_LIMIT, paginate, paginateResult } from './paginate.js'; +export { MCP_DEFAULTS, MCP_MAX_LIMIT, paginate, paginateResult, printNdjson } from './paginate.js'; // Unified parser API export { getActiveEngine, parseFileAuto, parseFilesAuto } from './parser.js'; @@ -92,6 +93,9 @@ export { fnDepsData, fnImpactData, impactAnalysisData, + iterListFunctions, + iterRoles, + iterWhere, kindIcon, moduleMapData, pathData, diff --git a/src/manifesto.js b/src/manifesto.js index 8fc907ff..3549860a 100644 --- a/src/manifesto.js +++ b/src/manifesto.js @@ -2,6 +2,7 @@ import { loadConfig } from './config.js'; import { findCycles } from './cycles.js'; import { openReadonlyOrFail } from './db.js'; import { debug } from './logger.js'; +import { paginateResult, printNdjson } from './paginate.js'; // ─── Rule Definitions ───────────────────────────────────────────────── @@ -354,12 +355,13 @@ export function manifestoData(customDbPath, opts = {}) { violationCount: violations.length, }; - return { + const base = { rules: ruleResults, violations, summary, passed: failViolations.length === 0, }; + return paginateResult(base, 'violations', { limit: opts.limit, offset: opts.offset }); } finally { db.close(); } @@ -371,6 +373,11 @@ export function manifestoData(customDbPath, opts = {}) { export function manifesto(customDbPath, opts = {}) { const data = manifestoData(customDbPath, opts); + if (opts.ndjson) { + printNdjson(data, 'violations'); + if (!data.passed) process.exit(1); + return; + } if (opts.json) { console.log(JSON.stringify(data, null, 2)); if (!data.passed) process.exit(1); diff --git a/src/mcp.js b/src/mcp.js index ee11bb3c..19732931 100644 --- a/src/mcp.js +++ b/src/mcp.js @@ -50,6 +50,7 @@ const BASE_TOOLS = [ properties: { file: { type: 'string', description: 'File path (partial match supported)' }, no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, + ...PAGINATION_PROPS, }, required: ['file'], }, @@ -62,6 +63,7 @@ const BASE_TOOLS = [ properties: { file: { type: 'string', description: 'File path to analyze' }, no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, + ...PAGINATION_PROPS, }, required: ['file'], }, @@ -103,6 +105,7 @@ const BASE_TOOLS = [ description: 'Filter to a specific symbol kind', }, no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, + ...PAGINATION_PROPS, }, required: ['name'], }, @@ -126,6 +129,7 @@ const BASE_TOOLS = [ description: 'Filter to a specific symbol kind', }, no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, + ...PAGINATION_PROPS, }, required: ['name'], }, @@ -190,6 +194,7 @@ const BASE_TOOLS = [ description: 'Include test file source code', default: false, }, + ...PAGINATION_PROPS, }, required: ['name'], }, @@ -203,6 +208,7 @@ const BASE_TOOLS = [ properties: { target: { type: 'string', description: 'File path or function name' }, no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, + ...PAGINATION_PROPS, }, required: ['target'], }, @@ -241,6 +247,7 @@ const BASE_TOOLS = [ enum: ['json', 'mermaid'], description: 'Output format (default: json)', }, + ...PAGINATION_PROPS, }, }, }, @@ -260,6 +267,7 @@ const BASE_TOOLS = [ description: 'Search mode: hybrid (BM25 + semantic, default), semantic (embeddings only), keyword (BM25 only)', }, + ...PAGINATION_PROPS, }, required: ['query'], }, @@ -318,6 +326,7 @@ const BASE_TOOLS = [ description: 'Return all files without limit', default: false, }, + ...PAGINATION_PROPS, }, }, }, @@ -358,6 +367,7 @@ const BASE_TOOLS = [ }, limit: { type: 'number', description: 'Number of results to return', default: 10 }, no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, + offset: { type: 'number', description: 'Skip this many results (pagination, default: 0)' }, }, }, }, @@ -379,6 +389,7 @@ const BASE_TOOLS = [ default: 0.3, }, no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, + offset: { type: 'number', description: 'Skip this many results (pagination, default: 0)' }, }, }, }, @@ -405,6 +416,7 @@ const BASE_TOOLS = [ description: 'Filter to a specific symbol kind', }, no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, + ...PAGINATION_PROPS, }, required: ['name'], }, @@ -452,6 +464,7 @@ const BASE_TOOLS = [ type: 'string', description: 'Filter by symbol kind (function, method, class, etc.)', }, + offset: { type: 'number', description: 'Skip this many results (pagination, default: 0)' }, }, }, }, @@ -468,6 +481,7 @@ const BASE_TOOLS = [ type: 'string', description: 'Filter by symbol kind (function, method, class, etc.)', }, + ...PAGINATION_PROPS, }, }, }, @@ -494,6 +508,7 @@ const BASE_TOOLS = [ default: false, }, no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, + ...PAGINATION_PROPS, }, }, }, @@ -671,10 +686,18 @@ export async function startMCPServer(customDbPath, options = {}) { }); break; case 'file_deps': - result = fileDepsData(args.file, dbPath, { noTests: args.no_tests }); + result = fileDepsData(args.file, dbPath, { + noTests: args.no_tests, + limit: Math.min(args.limit ?? MCP_DEFAULTS.file_deps, MCP_MAX_LIMIT), + offset: args.offset ?? 0, + }); break; case 'impact_analysis': - result = impactAnalysisData(args.file, dbPath, { noTests: args.no_tests }); + result = impactAnalysisData(args.file, dbPath, { + noTests: args.no_tests, + limit: Math.min(args.limit ?? MCP_DEFAULTS.impact_analysis, MCP_MAX_LIMIT), + offset: args.offset ?? 0, + }); break; case 'find_cycles': { const db = new Database(findDbPath(dbPath), { readonly: true }); @@ -692,6 +715,8 @@ export async function startMCPServer(customDbPath, options = {}) { file: args.file, kind: args.kind, noTests: args.no_tests, + limit: Math.min(args.limit ?? MCP_DEFAULTS.fn_deps, MCP_MAX_LIMIT), + offset: args.offset ?? 0, }); break; case 'fn_impact': @@ -700,6 +725,8 @@ export async function startMCPServer(customDbPath, options = {}) { file: args.file, kind: args.kind, noTests: args.no_tests, + limit: Math.min(args.limit ?? MCP_DEFAULTS.fn_impact, MCP_MAX_LIMIT), + offset: args.offset ?? 0, }); break; case 'symbol_path': @@ -721,10 +748,16 @@ export async function startMCPServer(customDbPath, options = {}) { noSource: args.no_source, noTests: args.no_tests, includeTests: args.include_tests, + limit: Math.min(args.limit ?? MCP_DEFAULTS.context, MCP_MAX_LIMIT), + offset: args.offset ?? 0, }); break; case 'explain': - result = explainData(args.target, dbPath, { noTests: args.no_tests }); + result = explainData(args.target, dbPath, { + noTests: args.no_tests, + limit: Math.min(args.limit ?? MCP_DEFAULTS.explain, MCP_MAX_LIMIT), + offset: args.offset ?? 0, + }); break; case 'where': result = whereData(args.target, dbPath, { @@ -748,12 +781,18 @@ export async function startMCPServer(customDbPath, options = {}) { ref: args.ref, depth: args.depth, noTests: args.no_tests, + limit: Math.min(args.limit ?? MCP_DEFAULTS.diff_impact, MCP_MAX_LIMIT), + offset: args.offset ?? 0, }); } break; case 'semantic_search': { const mode = args.mode || 'hybrid'; - const searchOpts = { limit: args.limit, minScore: args.min_score }; + const searchOpts = { + limit: Math.min(args.limit ?? MCP_DEFAULTS.semantic_search, MCP_MAX_LIMIT), + offset: args.offset ?? 0, + minScore: args.min_score, + }; if (mode === 'keyword') { const { ftsSearchData } = await import('./embedder.js'); @@ -864,6 +903,8 @@ export async function startMCPServer(customDbPath, options = {}) { depth: args.depth, sort: args.sort, full: args.full, + limit: Math.min(args.limit ?? MCP_DEFAULTS.structure, MCP_MAX_LIMIT), + offset: args.offset ?? 0, }); break; } @@ -872,7 +913,8 @@ export async function startMCPServer(customDbPath, options = {}) { result = hotspotsData(dbPath, { metric: args.metric, level: args.level, - limit: args.limit, + limit: Math.min(args.limit ?? MCP_DEFAULTS.hotspots, MCP_MAX_LIMIT), + offset: args.offset ?? 0, noTests: args.no_tests, }); break; @@ -881,12 +923,14 @@ export async function startMCPServer(customDbPath, options = {}) { const { coChangeData, coChangeTopData } = await import('./cochange.js'); result = args.file ? coChangeData(args.file, dbPath, { - limit: args.limit, + limit: Math.min(args.limit ?? MCP_DEFAULTS.co_changes, MCP_MAX_LIMIT), + offset: args.offset ?? 0, minJaccard: args.min_jaccard, noTests: args.no_tests, }) : coChangeTopData(dbPath, { - limit: args.limit, + limit: Math.min(args.limit ?? MCP_DEFAULTS.co_changes, MCP_MAX_LIMIT), + offset: args.offset ?? 0, minJaccard: args.min_jaccard, noTests: args.no_tests, }); @@ -899,6 +943,8 @@ export async function startMCPServer(customDbPath, options = {}) { file: args.file, kind: args.kind, noTests: args.no_tests, + limit: Math.min(args.limit ?? MCP_DEFAULTS.execution_flow, MCP_MAX_LIMIT), + offset: args.offset ?? 0, }); break; } @@ -916,7 +962,8 @@ export async function startMCPServer(customDbPath, options = {}) { result = complexityData(dbPath, { target: args.name, file: args.file, - limit: args.limit, + limit: Math.min(args.limit ?? MCP_DEFAULTS.complexity, MCP_MAX_LIMIT), + offset: args.offset ?? 0, sort: args.sort, aboveThreshold: args.above_threshold, health: args.health, @@ -931,6 +978,8 @@ export async function startMCPServer(customDbPath, options = {}) { file: args.file, noTests: args.no_tests, kind: args.kind, + limit: Math.min(args.limit ?? MCP_DEFAULTS.manifesto, MCP_MAX_LIMIT), + offset: args.offset ?? 0, }); break; } @@ -941,6 +990,8 @@ export async function startMCPServer(customDbPath, options = {}) { resolution: args.resolution, drift: args.drift, noTests: args.no_tests, + limit: Math.min(args.limit ?? MCP_DEFAULTS.communities, MCP_MAX_LIMIT), + offset: args.offset ?? 0, }); break; } diff --git a/src/paginate.js b/src/paginate.js index 7109f0bc..a93ec1da 100644 --- a/src/paginate.js +++ b/src/paginate.js @@ -7,12 +7,29 @@ /** Default limits applied by MCP tool handlers (not by the programmatic API). */ export const MCP_DEFAULTS = { + // Existing list_functions: 100, query_function: 50, where: 50, node_roles: 100, list_entry_points: 100, export_graph: 500, + // Smaller defaults for rich/nested results + fn_deps: 10, + fn_impact: 5, + context: 5, + explain: 10, + file_deps: 20, + diff_impact: 30, + impact_analysis: 20, + semantic_search: 20, + execution_flow: 50, + hotspots: 20, + co_changes: 20, + complexity: 30, + manifesto: 50, + communities: 20, + structure: 30, }; /** Hard cap to prevent abuse via MCP. */ @@ -68,3 +85,20 @@ export function paginateResult(result, field, { limit, offset } = {}) { const { items, pagination } = paginate(arr, { limit, offset }); return { ...result, [field]: items, _pagination: pagination }; } + +/** + * Print data as newline-delimited JSON (NDJSON). + * + * Emits a `_meta` line with pagination info (if present), then one JSON + * line per item in the named array field. + * + * @param {object} data - Result object (may contain `_pagination`) + * @param {string} field - Array field name to stream (e.g. `'results'`) + */ +export function printNdjson(data, field) { + if (data._pagination) console.log(JSON.stringify({ _meta: data._pagination })); + const items = data[field]; + if (Array.isArray(items)) { + for (const item of items) console.log(JSON.stringify(item)); + } +} diff --git a/src/queries.js b/src/queries.js index 9b1929ab..2a8df478 100644 --- a/src/queries.js +++ b/src/queries.js @@ -6,7 +6,7 @@ import { findCycles } from './cycles.js'; import { findDbPath, openReadonlyOrFail } from './db.js'; import { debug } from './logger.js'; import { ownersForFiles } from './owners.js'; -import { paginateResult } from './paginate.js'; +import { paginateResult, printNdjson } from './paginate.js'; import { LANGUAGE_REGISTRY } from './parser.js'; /** @@ -392,7 +392,8 @@ export function fileDepsData(file, customDbPath, opts = {}) { }); db.close(); - return { file, results }; + const base = { file, results }; + return paginateResult(base, 'results', { limit: opts.limit, offset: opts.offset }); } export function fnDepsData(name, customDbPath, opts = {}) { @@ -512,7 +513,8 @@ export function fnDepsData(name, customDbPath, opts = {}) { }); db.close(); - return { name, results }; + const base = { name, results }; + return paginateResult(base, 'results', { limit: opts.limit, offset: opts.offset }); } export function fnImpactData(name, customDbPath, opts = {}) { @@ -526,7 +528,7 @@ export function fnImpactData(name, customDbPath, opts = {}) { return { name, results: [] }; } - const results = nodes.slice(0, 3).map((node) => { + const results = nodes.map((node) => { const visited = new Set([node.id]); const levels = {}; let frontier = [node.id]; @@ -565,7 +567,8 @@ export function fnImpactData(name, customDbPath, opts = {}) { }); db.close(); - return { name, results }; + const base = { name, results }; + return paginateResult(base, 'results', { limit: opts.limit, offset: opts.offset }); } export function pathData(from, to, customDbPath, opts = {}) { @@ -1016,7 +1019,7 @@ export function diffImpactData(customDbPath, opts = {}) { } db.close(); - return { + const base = { changedFiles: changedRanges.size, newFiles: [...newFiles], affectedFunctions: functionResults, @@ -1031,6 +1034,7 @@ export function diffImpactData(customDbPath, opts = {}) { ownersAffected: ownership ? ownership.affectedOwners.length : 0, }, }; + return paginateResult(base, 'affectedFunctions', { limit: opts.limit, offset: opts.offset }); } export function diffImpactMermaid(customDbPath, opts = {}) { @@ -1178,6 +1182,131 @@ export function listFunctionsData(customDbPath, opts = {}) { return paginateResult(base, 'functions', { limit: opts.limit, offset: opts.offset }); } +/** + * Generator: stream functions one-by-one using .iterate() for memory efficiency. + * @param {string} [customDbPath] + * @param {object} [opts] + * @param {boolean} [opts.noTests] + * @param {string} [opts.file] + * @param {string} [opts.pattern] + * @yields {{ name: string, kind: string, file: string, line: number, role: string|null }} + */ +export function* iterListFunctions(customDbPath, opts = {}) { + const db = openReadonlyOrFail(customDbPath); + try { + const noTests = opts.noTests || false; + const kinds = ['function', 'method', 'class']; + const placeholders = kinds.map(() => '?').join(', '); + + const conditions = [`kind IN (${placeholders})`]; + const params = [...kinds]; + + if (opts.file) { + conditions.push('file LIKE ?'); + params.push(`%${opts.file}%`); + } + if (opts.pattern) { + conditions.push('name LIKE ?'); + params.push(`%${opts.pattern}%`); + } + + const stmt = db.prepare( + `SELECT name, kind, file, line, role FROM nodes WHERE ${conditions.join(' AND ')} ORDER BY file, line`, + ); + for (const row of stmt.iterate(...params)) { + if (noTests && isTestFile(row.file)) continue; + yield { name: row.name, kind: row.kind, file: row.file, line: row.line, role: row.role }; + } + } finally { + db.close(); + } +} + +/** + * Generator: stream role-classified symbols one-by-one. + * @param {string} [customDbPath] + * @param {object} [opts] + * @param {boolean} [opts.noTests] + * @param {string} [opts.role] + * @param {string} [opts.file] + * @yields {{ name: string, kind: string, file: string, line: number, role: string }} + */ +export function* iterRoles(customDbPath, opts = {}) { + const db = openReadonlyOrFail(customDbPath); + try { + const noTests = opts.noTests || false; + const conditions = ['role IS NOT NULL']; + const params = []; + + if (opts.role) { + conditions.push('role = ?'); + params.push(opts.role); + } + if (opts.file) { + conditions.push('file LIKE ?'); + params.push(`%${opts.file}%`); + } + + const stmt = db.prepare( + `SELECT name, kind, file, line, role FROM nodes WHERE ${conditions.join(' AND ')} ORDER BY role, file, line`, + ); + for (const row of stmt.iterate(...params)) { + if (noTests && isTestFile(row.file)) continue; + yield { name: row.name, kind: row.kind, file: row.file, line: row.line, role: row.role }; + } + } finally { + db.close(); + } +} + +/** + * Generator: stream symbol lookup results one-by-one. + * @param {string} target - Symbol name to search for (partial match) + * @param {string} [customDbPath] + * @param {object} [opts] + * @param {boolean} [opts.noTests] + * @yields {{ name: string, kind: string, file: string, line: number, role: string|null, exported: boolean, uses: object[] }} + */ +export function* iterWhere(target, customDbPath, opts = {}) { + const db = openReadonlyOrFail(customDbPath); + try { + const noTests = opts.noTests || false; + const placeholders = ALL_SYMBOL_KINDS.map(() => '?').join(', '); + const stmt = db.prepare( + `SELECT * FROM nodes WHERE name LIKE ? AND kind IN (${placeholders}) ORDER BY file, line`, + ); + const crossFileCallersStmt = db.prepare( + `SELECT COUNT(*) as cnt FROM edges e JOIN nodes n ON e.source_id = n.id + WHERE e.target_id = ? AND e.kind = 'calls' AND n.file != ?`, + ); + const usesStmt = db.prepare( + `SELECT n.name, n.file, n.line FROM edges e JOIN nodes n ON e.source_id = n.id + WHERE e.target_id = ? AND e.kind = 'calls'`, + ); + for (const node of stmt.iterate(`%${target}%`, ...ALL_SYMBOL_KINDS)) { + if (noTests && isTestFile(node.file)) continue; + + const crossFileCallers = crossFileCallersStmt.get(node.id, node.file); + const exported = crossFileCallers.cnt > 0; + + let uses = usesStmt.all(node.id); + if (noTests) uses = uses.filter((u) => !isTestFile(u.file)); + + yield { + name: node.name, + kind: node.kind, + file: node.file, + line: node.line, + role: node.role || null, + exported, + uses: uses.map((u) => ({ name: u.name, file: u.file, line: u.line })), + }; + } + } finally { + db.close(); + } +} + export function statsData(customDbPath, opts = {}) { const db = openReadonlyOrFail(customDbPath); const noTests = opts.noTests || false; @@ -1572,8 +1701,7 @@ export function queryName(name, customDbPath, opts = {}) { offset: opts.offset, }); if (opts.ndjson) { - if (data._pagination) console.log(JSON.stringify({ _meta: data._pagination })); - for (const r of data.results) console.log(JSON.stringify(r)); + printNdjson(data, 'results'); return; } if (opts.json) { @@ -1605,7 +1733,11 @@ export function queryName(name, customDbPath, opts = {}) { } export function impactAnalysis(file, customDbPath, opts = {}) { - const data = impactAnalysisData(file, customDbPath, { noTests: opts.noTests }); + const data = impactAnalysisData(file, customDbPath, opts); + if (opts.ndjson) { + printNdjson(data, 'sources'); + return; + } if (opts.json) { console.log(JSON.stringify(data, null, 2)); return; @@ -1664,7 +1796,11 @@ export function moduleMap(customDbPath, limit = 20, opts = {}) { } export function fileDeps(file, customDbPath, opts = {}) { - const data = fileDepsData(file, customDbPath, { noTests: opts.noTests }); + const data = fileDepsData(file, customDbPath, opts); + if (opts.ndjson) { + printNdjson(data, 'results'); + return; + } if (opts.json) { console.log(JSON.stringify(data, null, 2)); return; @@ -1695,6 +1831,10 @@ export function fileDeps(file, customDbPath, opts = {}) { export function fnDeps(name, customDbPath, opts = {}) { const data = fnDepsData(name, customDbPath, opts); + if (opts.ndjson) { + printNdjson(data, 'results'); + return; + } if (opts.json) { console.log(JSON.stringify(data, null, 2)); return; @@ -1863,8 +2003,7 @@ export function contextData(name, customDbPath, opts = {}) { return { name, results: [] }; } - // Limit to first 5 results - nodes = nodes.slice(0, 5); + // No hardcoded slice — pagination handles bounding via limit/offset // File-lines cache to avoid re-reading the same file const fileCache = new Map(); @@ -2069,11 +2208,16 @@ export function contextData(name, customDbPath, opts = {}) { }); db.close(); - return { name, results }; + const base = { name, results }; + return paginateResult(base, 'results', { limit: opts.limit, offset: opts.offset }); } export function context(name, customDbPath, opts = {}) { const data = contextData(name, customDbPath, opts); + if (opts.ndjson) { + printNdjson(data, 'results'); + return; + } if (opts.json) { console.log(JSON.stringify(data, null, 2)); return; @@ -2429,11 +2573,16 @@ export function explainData(target, customDbPath, opts = {}) { } db.close(); - return { target, kind, results }; + const base = { target, kind, results }; + return paginateResult(base, 'results', { limit: opts.limit, offset: opts.offset }); } export function explain(target, customDbPath, opts = {}) { const data = explainData(target, customDbPath, opts); + if (opts.ndjson) { + printNdjson(data, 'results'); + return; + } if (opts.json) { console.log(JSON.stringify(data, null, 2)); return; @@ -2664,8 +2813,7 @@ export function whereData(target, customDbPath, opts = {}) { export function where(target, customDbPath, opts = {}) { const data = whereData(target, customDbPath, opts); if (opts.ndjson) { - if (data._pagination) console.log(JSON.stringify({ _meta: data._pagination })); - for (const r of data.results) console.log(JSON.stringify(r)); + printNdjson(data, 'results'); return; } if (opts.json) { @@ -2756,8 +2904,7 @@ export function rolesData(customDbPath, opts = {}) { export function roles(customDbPath, opts = {}) { const data = rolesData(customDbPath, opts); if (opts.ndjson) { - if (data._pagination) console.log(JSON.stringify({ _meta: data._pagination })); - for (const s of data.symbols) console.log(JSON.stringify(s)); + printNdjson(data, 'symbols'); return; } if (opts.json) { @@ -2798,6 +2945,10 @@ export function roles(customDbPath, opts = {}) { export function fnImpact(name, customDbPath, opts = {}) { const data = fnImpactData(name, customDbPath, opts); + if (opts.ndjson) { + printNdjson(data, 'results'); + return; + } if (opts.json) { console.log(JSON.stringify(data, null, 2)); return; @@ -2830,6 +2981,10 @@ export function diffImpact(customDbPath, opts = {}) { return; } const data = diffImpactData(customDbPath, opts); + if (opts.ndjson) { + printNdjson(data, 'affectedFunctions'); + return; + } if (opts.json || opts.format === 'json') { console.log(JSON.stringify(data, null, 2)); return; diff --git a/src/structure.js b/src/structure.js index ca92ed51..a4c28f41 100644 --- a/src/structure.js +++ b/src/structure.js @@ -2,6 +2,7 @@ import path from 'node:path'; import { normalizePath } from './constants.js'; import { openReadonlyOrFail } from './db.js'; import { debug } from './logger.js'; +import { paginateResult } from './paginate.js'; import { isTestFile } from './queries.js'; // ─── Build-time: insert directory nodes, contains edges, and metrics ──── @@ -463,7 +464,8 @@ export function structureData(customDbPath, opts = {}) { } } - return { directories: result, count: result.length }; + const base = { directories: result, count: result.length }; + return paginateResult(base, 'directories', { limit: opts.limit, offset: opts.offset }); } /** @@ -534,7 +536,8 @@ export function hotspotsData(customDbPath, opts = {}) { })); db.close(); - return { metric, level, limit, hotspots }; + const base = { metric, level, limit, hotspots }; + return paginateResult(base, 'hotspots', { limit: opts.limit, offset: opts.offset }); } /** diff --git a/tests/integration/context.test.js b/tests/integration/context.test.js index 39070576..fd2779a7 100644 --- a/tests/integration/context.test.js +++ b/tests/integration/context.test.js @@ -229,10 +229,18 @@ describe('contextData', () => { expect(pfResult).toBeDefined(); }); - test('limits results to 5', () => { - // We only have a few functions, so this mainly checks the cap logic doesn't crash - const data = contextData('', dbPath); // empty name matches everything via LIKE '%%' - expect(data.results.length).toBeLessThanOrEqual(5); + test('limits results with pagination', () => { + // Without limit, all matches are returned (no hardcoded cap) + const all = contextData('', dbPath); // empty name matches everything via LIKE '%%' + expect(all.results.length).toBeGreaterThan(0); + + // With limit, results are capped and pagination metadata is present + const data = contextData('', dbPath, { limit: 2, offset: 0 }); + expect(data.results.length).toBeLessThanOrEqual(2); + if (all.results.length > 2) { + expect(data._pagination).toBeDefined(); + expect(data._pagination.hasMore).toBe(true); + } }); test('includeTests includes test source', () => { diff --git a/tests/integration/pagination.test.js b/tests/integration/pagination.test.js index 4bf652f8..46824881 100644 --- a/tests/integration/pagination.test.js +++ b/tests/integration/pagination.test.js @@ -21,8 +21,27 @@ import { afterAll, beforeAll, describe, expect, test } from 'vitest'; import { initSchema } from '../../src/db.js'; import { exportDOT, exportJSON, exportMermaid } from '../../src/export.js'; import { listEntryPointsData } from '../../src/flow.js'; -import { MCP_DEFAULTS, MCP_MAX_LIMIT, paginate, paginateResult } from '../../src/paginate.js'; -import { listFunctionsData, queryNameData, rolesData, whereData } from '../../src/queries.js'; +import { + MCP_DEFAULTS, + MCP_MAX_LIMIT, + paginate, + paginateResult, + printNdjson, +} from '../../src/paginate.js'; +import { + contextData, + explainData, + fileDepsData, + fnDepsData, + fnImpactData, + iterListFunctions, + iterRoles, + iterWhere, + listFunctionsData, + queryNameData, + rolesData, + whereData, +} from '../../src/queries.js'; // ─── Helpers ─────────────────────────────────────────────────────────── @@ -297,6 +316,259 @@ describe('listEntryPointsData with pagination', () => { }); }); +// ─── fileDepsData with pagination ───────────────────────────────────── + +describe('fileDepsData with pagination', () => { + test('backward compat: no limit returns all', () => { + const data = fileDepsData('a.js', dbPath); + expect(data._pagination).toBeUndefined(); + expect(data.results.length).toBeGreaterThan(0); + }); + + test('paginated results', () => { + const full = fileDepsData('', dbPath); + if (full.results.length > 1) { + const paginated = fileDepsData('', dbPath, { limit: 1 }); + expect(paginated.results).toHaveLength(1); + expect(paginated._pagination).toBeDefined(); + expect(paginated._pagination.hasMore).toBe(true); + } + }); +}); + +// ─── fnDepsData with pagination ────────────────────────────────────── + +describe('fnDepsData with pagination', () => { + test('backward compat: no limit returns all', () => { + const data = fnDepsData('alpha', dbPath); + expect(data._pagination).toBeUndefined(); + expect(data.results.length).toBeGreaterThan(0); + }); + + test('paginated results', () => { + const full = fnDepsData('a', dbPath); + if (full.results.length > 1) { + const paginated = fnDepsData('a', dbPath, { limit: 1 }); + expect(paginated.results).toHaveLength(1); + expect(paginated._pagination).toBeDefined(); + expect(paginated._pagination.hasMore).toBe(true); + } + }); +}); + +// ─── fnImpactData with pagination ──────────────────────────────────── + +describe('fnImpactData with pagination', () => { + test('backward compat: no limit returns all', () => { + const data = fnImpactData('alpha', dbPath); + expect(data._pagination).toBeUndefined(); + expect(data.results.length).toBeGreaterThan(0); + }); + + test('paginated results', () => { + const full = fnImpactData('a', dbPath); + if (full.results.length > 1) { + const paginated = fnImpactData('a', dbPath, { limit: 1 }); + expect(paginated.results).toHaveLength(1); + expect(paginated._pagination).toBeDefined(); + } + }); +}); + +// ─── contextData with pagination ───────────────────────────────────── + +describe('contextData with pagination', () => { + test('backward compat: no limit returns all', () => { + const data = contextData('alpha', dbPath); + expect(data._pagination).toBeUndefined(); + expect(data.results.length).toBeGreaterThan(0); + }); + + test('paginated results', () => { + const full = contextData('a', dbPath); + if (full.results.length > 1) { + const paginated = contextData('a', dbPath, { limit: 1 }); + expect(paginated.results).toHaveLength(1); + expect(paginated._pagination).toBeDefined(); + } + }); +}); + +// ─── explainData with pagination ───────────────────────────────────── + +describe('explainData with pagination', () => { + test('backward compat: no limit returns all', () => { + const data = explainData('a.js', dbPath); + expect(data._pagination).toBeUndefined(); + expect(data.results.length).toBeGreaterThan(0); + }); + + test('paginated results', () => { + const full = explainData('', dbPath); + if (full.results.length > 1) { + const paginated = explainData('', dbPath, { limit: 1 }); + expect(paginated.results).toHaveLength(1); + expect(paginated._pagination).toBeDefined(); + } + }); +}); + +// ─── MCP new defaults ──────────────────────────────────────────────── + +describe('MCP new defaults', () => { + test('MCP_DEFAULTS has new pagination keys', () => { + expect(MCP_DEFAULTS.fn_deps).toBe(10); + expect(MCP_DEFAULTS.fn_impact).toBe(5); + expect(MCP_DEFAULTS.context).toBe(5); + expect(MCP_DEFAULTS.explain).toBe(10); + expect(MCP_DEFAULTS.file_deps).toBe(20); + expect(MCP_DEFAULTS.diff_impact).toBe(30); + expect(MCP_DEFAULTS.semantic_search).toBe(20); + expect(MCP_DEFAULTS.execution_flow).toBe(50); + expect(MCP_DEFAULTS.hotspots).toBe(20); + expect(MCP_DEFAULTS.co_changes).toBe(20); + expect(MCP_DEFAULTS.complexity).toBe(30); + expect(MCP_DEFAULTS.manifesto).toBe(50); + expect(MCP_DEFAULTS.communities).toBe(20); + expect(MCP_DEFAULTS.structure).toBe(30); + }); +}); + +// ─── Iterator/Generator APIs ───────────────────────────────────────── + +describe('iterListFunctions', () => { + test('yields all functions matching listFunctionsData', () => { + const full = listFunctionsData(dbPath); + const iter = [...iterListFunctions(dbPath)]; + expect(iter.length).toBe(full.functions.length); + for (const item of iter) { + expect(item).toHaveProperty('name'); + expect(item).toHaveProperty('kind'); + expect(item).toHaveProperty('file'); + expect(item).toHaveProperty('line'); + } + }); + + test('early break closes DB (no leak)', () => { + let count = 0; + for (const _item of iterListFunctions(dbPath)) { + count++; + if (count >= 2) break; + } + expect(count).toBe(2); + // If the DB leaked, subsequent operations would fail + const data = listFunctionsData(dbPath); + expect(data.functions.length).toBeGreaterThan(0); + }); + + test('noTests filtering works', () => { + const all = [...iterListFunctions(dbPath)]; + const noTests = [...iterListFunctions(dbPath, { noTests: true })]; + // Should not include test files (fixture has none, so counts equal) + expect(noTests.length).toBeLessThanOrEqual(all.length); + }); +}); + +describe('iterRoles', () => { + test('yields all role-classified symbols', () => { + const full = rolesData(dbPath); + const iter = [...iterRoles(dbPath)]; + expect(iter.length).toBe(full.count); + for (const item of iter) { + expect(item.role).toBeTruthy(); + } + }); + + test('role filter works', () => { + const coreOnly = [...iterRoles(dbPath, { role: 'core' })]; + for (const item of coreOnly) { + expect(item.role).toBe('core'); + } + }); + + test('early break closes DB (no leak)', () => { + let count = 0; + for (const _item of iterRoles(dbPath)) { + count++; + if (count >= 1) break; + } + expect(count).toBe(1); + const data = rolesData(dbPath); + expect(data.count).toBeGreaterThan(0); + }); +}); + +describe('iterWhere', () => { + test('yields matching symbols with uses', () => { + const iter = [...iterWhere('alpha', dbPath)]; + expect(iter.length).toBeGreaterThan(0); + const alpha = iter.find((r) => r.name === 'alpha'); + expect(alpha).toBeDefined(); + expect(alpha).toHaveProperty('exported'); + expect(alpha).toHaveProperty('uses'); + expect(Array.isArray(alpha.uses)).toBe(true); + }); + + test('early break closes DB (no leak)', () => { + let count = 0; + for (const _item of iterWhere('a', dbPath)) { + count++; + if (count >= 1) break; + } + expect(count).toBe(1); + const data = whereData('alpha', dbPath); + expect(data.results.length).toBeGreaterThan(0); + }); +}); + +// ─── printNdjson utility ───────────────────────────────────────────── + +describe('printNdjson', () => { + test('outputs JSON lines for array field', () => { + const logs = []; + const origLog = console.log; + console.log = (...args) => logs.push(args.join(' ')); + try { + printNdjson({ items: [{ a: 1 }, { b: 2 }] }, 'items'); + expect(logs).toHaveLength(2); + expect(JSON.parse(logs[0])).toEqual({ a: 1 }); + expect(JSON.parse(logs[1])).toEqual({ b: 2 }); + } finally { + console.log = origLog; + } + }); + + test('emits _meta when _pagination exists', () => { + const logs = []; + const origLog = console.log; + console.log = (...args) => logs.push(args.join(' ')); + try { + printNdjson( + { items: [{ x: 1 }], _pagination: { total: 10, offset: 0, limit: 1, hasMore: true } }, + 'items', + ); + expect(logs).toHaveLength(2); + const meta = JSON.parse(logs[0]); + expect(meta._meta).toBeDefined(); + expect(meta._meta.total).toBe(10); + } finally { + console.log = origLog; + } + }); + + test('handles empty array', () => { + const logs = []; + const origLog = console.log; + console.log = (...args) => logs.push(args.join(' ')); + try { + printNdjson({ items: [] }, 'items'); + expect(logs).toHaveLength(0); + } finally { + console.log = origLog; + } + }); +}); + // ─── MCP default limits ────────────────────────────────────────────── describe('MCP defaults', () => { diff --git a/tests/unit/mcp.test.js b/tests/unit/mcp.test.js index 1c082085..3df9d876 100644 --- a/tests/unit/mcp.test.js +++ b/tests/unit/mcp.test.js @@ -340,6 +340,8 @@ describe('startMCPServer handler dispatch', () => { file: 'src/app.js', kind: 'function', noTests: true, + limit: 10, + offset: 0, }); vi.doUnmock('@modelcontextprotocol/sdk/server/index.js'); @@ -392,7 +394,11 @@ describe('startMCPServer handler dispatch', () => { expect(result.isError).toBeUndefined(); expect(fnImpactMock).toHaveBeenCalledWith('handleClick', '/tmp/test.db', { depth: undefined, + file: undefined, + kind: undefined, noTests: undefined, + limit: 5, + offset: 0, }); vi.doUnmock('@modelcontextprotocol/sdk/server/index.js'); @@ -448,6 +454,8 @@ describe('startMCPServer handler dispatch', () => { ref: undefined, depth: undefined, noTests: undefined, + limit: 30, + offset: 0, }); vi.doUnmock('@modelcontextprotocol/sdk/server/index.js'); @@ -1067,8 +1075,10 @@ describe('startMCPServer handler dispatch', () => { target: 'buildGraph', file: 'src/builder.js', limit: 10, + offset: 0, sort: 'cyclomatic', aboveThreshold: true, + health: undefined, noTests: true, kind: 'function', });