From f5ee3dee4d116fde881a072d98d111ded6f1d4fd Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 02:05:31 -0600 Subject: [PATCH 01/21] chore: remove dead exports and un-export internal constant - Remove dead `truncate` function from ast-analysis/shared.js (0 consumers) - Remove dead `truncStart` function from presentation/table.js (0 consumers) - Un-export `BATCH_CHUNK` in builder/helpers.js (only used internally) Skipped sync.json targets that were false positives: - BUILTIN_RECEIVERS: used by incremental.js + build-edges.js - TRANSIENT_CODES/RETRY_DELAY_MS: internal to readFileSafe - MAX_COL_WIDTH: internal to printAutoTable - findFunctionNode: re-exported from index.js, used in tests Impact: 1 functions changed, 32 affected --- src/ast-analysis/shared.js | 12 ------------ src/domain/graph/builder/helpers.js | 2 +- src/presentation/table.js | 8 -------- 3 files changed, 1 insertion(+), 21 deletions(-) diff --git a/src/ast-analysis/shared.js b/src/ast-analysis/shared.js index 964f9a06..e3f40bd0 100644 --- a/src/ast-analysis/shared.js +++ b/src/ast-analysis/shared.js @@ -176,18 +176,6 @@ export function findFunctionNode(rootNode, startLine, _endLine, rules) { return best; } -/** - * Truncate a string to a maximum length, appending an ellipsis if truncated. - * - * @param {string} str - Input string - * @param {number} [max=200] - Maximum length - * @returns {string} - */ -export function truncate(str, max = 200) { - if (!str) return ''; - return str.length > max ? `${str.slice(0, max)}…` : str; -} - // ─── Extension / Language Mapping ───────────────────────────────────────── /** diff --git a/src/domain/graph/builder/helpers.js b/src/domain/graph/builder/helpers.js index 038de4c2..b7916c84 100644 --- a/src/domain/graph/builder/helpers.js +++ b/src/domain/graph/builder/helpers.js @@ -179,7 +179,7 @@ export function purgeFilesFromGraph(db, files, options = {}) { } /** Batch INSERT chunk size for multi-value INSERTs. */ -export const BATCH_CHUNK = 200; +const BATCH_CHUNK = 200; /** * Batch-insert node rows via multi-value INSERT statements. diff --git a/src/presentation/table.js b/src/presentation/table.js index d5ef1903..4fdba379 100644 --- a/src/presentation/table.js +++ b/src/presentation/table.js @@ -37,11 +37,3 @@ export function truncEnd(str, maxLen) { if (str.length <= maxLen) return str; return `${str.slice(0, maxLen - 1)}\u2026`; } - -/** - * Truncate a string from the start, prepending '\u2026' if truncated. - */ -export function truncStart(str, maxLen) { - if (str.length <= maxLen) return str; - return `\u2026${str.slice(-(maxLen - 1))}`; -} From 17cdcb00984f582485f8582734a40e3df4211d10 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 02:14:13 -0600 Subject: [PATCH 02/21] refactor: extract shared findNodes utility from cfg and dataflow features Impact: 5 functions changed, 7 affected --- src/features/cfg.js | 31 +++++------------ src/features/dataflow.js | 55 +++++++++++++++---------------- src/features/shared/find-nodes.js | 32 ++++++++++++++++++ 3 files changed, 66 insertions(+), 52 deletions(-) create mode 100644 src/features/shared/find-nodes.js diff --git a/src/features/cfg.js b/src/features/cfg.js index e8728cab..eff08652 100644 --- a/src/features/cfg.js +++ b/src/features/cfg.js @@ -24,8 +24,8 @@ import { openReadonlyOrFail, } from '../db/index.js'; import { info } from '../infrastructure/logger.js'; -import { isTestFile } from '../infrastructure/test-filter.js'; import { paginateResult } from '../shared/paginate.js'; +import { findNodes } from './shared/find-nodes.js'; // Re-export for backward compatibility export { _makeCfgRules as makeCfgRules, CFG_RULES }; @@ -273,27 +273,7 @@ export async function buildCFGData(db, fileSymbols, rootDir, _engineOpts) { // ─── Query-Time Functions ─────────────────────────────────────────────── -function findNodes(db, name, opts = {}) { - const kinds = opts.kind ? [opts.kind] : ['function', 'method']; - const placeholders = kinds.map(() => '?').join(', '); - const params = [`%${name}%`, ...kinds]; - - let fileCondition = ''; - if (opts.file) { - fileCondition = ' AND n.file LIKE ?'; - params.push(`%${opts.file}%`); - } - - const rows = db - .prepare( - `SELECT n.id, n.name, n.kind, n.file, n.line, n.end_line - FROM nodes n - WHERE n.name LIKE ? AND n.kind IN (${placeholders})${fileCondition}`, - ) - .all(...params); - - return opts.noTests ? rows.filter((n) => !isTestFile(n.file)) : rows; -} +const CFG_DEFAULT_KINDS = ['function', 'method']; /** * Load CFG data for a function from the database. @@ -317,7 +297,12 @@ export function cfgData(name, customDbPath, opts = {}) { }; } - const nodes = findNodes(db, name, { noTests, file: opts.file, kind: opts.kind }); + const nodes = findNodes( + db, + name, + { noTests, file: opts.file, kind: opts.kind }, + CFG_DEFAULT_KINDS, + ); if (nodes.length === 0) { return { name, results: [] }; } diff --git a/src/features/dataflow.js b/src/features/dataflow.js index 9d0c8bcc..0f500b8f 100644 --- a/src/features/dataflow.js +++ b/src/features/dataflow.js @@ -24,6 +24,7 @@ import { ALL_SYMBOL_KINDS, normalizeSymbol } from '../domain/queries.js'; import { info } from '../infrastructure/logger.js'; import { isTestFile } from '../infrastructure/test-filter.js'; import { paginateResult } from '../shared/paginate.js'; +import { findNodes } from './shared/find-nodes.js'; // Re-export for backward compatibility export { _makeDataflowRules as makeDataflowRules, DATAFLOW_RULES }; @@ -234,31 +235,7 @@ export async function buildDataflowEdges(db, fileSymbols, rootDir, _engineOpts) // ── Query functions ───────────────────────────────────────────────────────── -/** - * Look up node(s) by name with optional file/kind/noTests filtering. - * Similar to findMatchingNodes in queries.js but operates on the dataflow table. - */ -function findNodes(db, name, opts = {}) { - const kinds = opts.kind ? [opts.kind] : ALL_SYMBOL_KINDS; - const placeholders = kinds.map(() => '?').join(', '); - const params = [`%${name}%`, ...kinds]; - - let fileCondition = ''; - if (opts.file) { - fileCondition = ' AND file LIKE ?'; - params.push(`%${opts.file}%`); - } - - const rows = db - .prepare( - `SELECT * FROM nodes - WHERE name LIKE ? AND kind IN (${placeholders})${fileCondition} - ORDER BY file, line`, - ) - .all(...params); - - return opts.noTests ? rows.filter((n) => !isTestFile(n.file)) : rows; -} +// findNodes imported from ./shared/find-nodes.js /** * Return all dataflow edges for a symbol. @@ -282,7 +259,12 @@ export function dataflowData(name, customDbPath, opts = {}) { }; } - const nodes = findNodes(db, name, { noTests, file: opts.file, kind: opts.kind }); + const nodes = findNodes( + db, + name, + { noTests, file: opts.file, kind: opts.kind }, + ALL_SYMBOL_KINDS, + ); if (nodes.length === 0) { return { name, results: [] }; } @@ -426,12 +408,22 @@ export function dataflowPathData(from, to, customDbPath, opts = {}) { }; } - const fromNodes = findNodes(db, from, { noTests, file: opts.fromFile, kind: opts.kind }); + const fromNodes = findNodes( + db, + from, + { noTests, file: opts.fromFile, kind: opts.kind }, + ALL_SYMBOL_KINDS, + ); if (fromNodes.length === 0) { return { from, to, found: false, error: `No symbol matching "${from}"` }; } - const toNodes = findNodes(db, to, { noTests, file: opts.toFile, kind: opts.kind }); + const toNodes = findNodes( + db, + to, + { noTests, file: opts.toFile, kind: opts.kind }, + ALL_SYMBOL_KINDS, + ); if (toNodes.length === 0) { return { from, to, found: false, error: `No symbol matching "${to}"` }; } @@ -554,7 +546,12 @@ export function dataflowImpactData(name, customDbPath, opts = {}) { }; } - const nodes = findNodes(db, name, { noTests, file: opts.file, kind: opts.kind }); + const nodes = findNodes( + db, + name, + { noTests, file: opts.file, kind: opts.kind }, + ALL_SYMBOL_KINDS, + ); if (nodes.length === 0) { return { name, results: [] }; } diff --git a/src/features/shared/find-nodes.js b/src/features/shared/find-nodes.js new file mode 100644 index 00000000..cc886d80 --- /dev/null +++ b/src/features/shared/find-nodes.js @@ -0,0 +1,32 @@ +import { isTestFile } from '../../infrastructure/test-filter.js'; + +/** + * Look up node(s) by name with optional file/kind/noTests filtering. + * + * @param {object} db - open SQLite database handle + * @param {string} name - symbol name (partial LIKE match) + * @param {object} [opts] - { kind, file, noTests } + * @param {string[]} defaultKinds - fallback kinds when opts.kind is not set + * @returns {object[]} matching node rows + */ +export function findNodes(db, name, opts = {}, defaultKinds) { + const kinds = opts.kind ? [opts.kind] : defaultKinds; + const placeholders = kinds.map(() => '?').join(', '); + const params = [`%${name}%`, ...kinds]; + + let fileCondition = ''; + if (opts.file) { + fileCondition = ' AND file LIKE ?'; + params.push(`%${opts.file}%`); + } + + const rows = db + .prepare( + `SELECT * FROM nodes + WHERE name LIKE ? AND kind IN (${placeholders})${fileCondition} + ORDER BY file, line`, + ) + .all(...params); + + return opts.noTests ? rows.filter((n) => !isTestFile(n.file)) : rows; +} From a09740d9184ea58b3cdcecfeebb964f8743594e8 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 02:18:59 -0600 Subject: [PATCH 03/21] fix: replace empty catch blocks in db connection and migrations connection.js: add debug() logging to all 8 catch-with-fallback blocks so failures are observable without changing behavior. migrations.js: replace 14 try/catch blocks in initSchema with hasColumn() and hasTable() guards. CREATE INDEX calls use IF NOT EXISTS directly. getBuildMeta uses hasTable() check instead of try/catch. Impact: 10 functions changed, 19 affected --- src/db/connection.js | 30 +++++++++------- src/db/migrations.js | 86 +++++++++++++++----------------------------- 2 files changed, 46 insertions(+), 70 deletions(-) diff --git a/src/db/connection.js b/src/db/connection.js index 75ee4a6d..59114bbd 100644 --- a/src/db/connection.js +++ b/src/db/connection.js @@ -37,10 +37,12 @@ export function findRepoRoot(fromDir) { // matches the realpathSync'd dir in findDbPath. try { root = fs.realpathSync(raw); - } catch { + } catch (e) { + debug(`realpathSync failed for git root "${raw}", using resolve: ${e.message}`); root = path.resolve(raw); } - } catch { + } catch (e) { + debug(`git rev-parse failed for "${dir}": ${e.message}`); root = null; } if (!fromDir) { @@ -60,7 +62,8 @@ function isProcessAlive(pid) { try { process.kill(pid, 0); return true; - } catch { + } catch (e) { + debug(`PID ${pid} not alive: ${e.code || e.message}`); return false; } } @@ -75,13 +78,13 @@ function acquireAdvisoryLock(dbPath) { warn(`Another process (PID ${pid}) may be using this database. Proceeding with caution.`); } } - } catch { - /* ignore read errors */ + } catch (e) { + debug(`Advisory lock read failed: ${e.message}`); } try { fs.writeFileSync(lockPath, String(process.pid), 'utf-8'); - } catch { - /* best-effort */ + } catch (e) { + debug(`Advisory lock write failed: ${e.message}`); } } @@ -91,8 +94,8 @@ function releaseAdvisoryLock(lockPath) { if (Number(content) === process.pid) { fs.unlinkSync(lockPath); } - } catch { - /* ignore */ + } catch (e) { + debug(`Advisory lock release failed for ${lockPath}: ${e.message}`); } } @@ -107,7 +110,8 @@ function isSameDirectory(a, b) { const sa = fs.statSync(a); const sb = fs.statSync(b); return sa.dev === sb.dev && sa.ino === sb.ino; - } catch { + } catch (e) { + debug(`isSameDirectory stat failed: ${e.message}`); return false; } } @@ -139,7 +143,8 @@ export function findDbPath(customPath) { if (rawCeiling) { try { ceiling = fs.realpathSync(rawCeiling); - } catch { + } catch (e) { + debug(`realpathSync failed for ceiling "${rawCeiling}": ${e.message}`); ceiling = rawCeiling; } } else { @@ -149,7 +154,8 @@ export function findDbPath(customPath) { let dir; try { dir = fs.realpathSync(process.cwd()); - } catch { + } catch (e) { + debug(`realpathSync failed for cwd: ${e.message}`); dir = process.cwd(); } while (true) { diff --git a/src/db/migrations.js b/src/db/migrations.js index 3b38feff..8a12bda2 100644 --- a/src/db/migrations.js +++ b/src/db/migrations.js @@ -242,13 +242,20 @@ export const MIGRATIONS = [ }, ]; +function hasColumn(db, table, column) { + const cols = db.pragma(`table_info(${table})`); + return cols.some((c) => c.name === column); +} + +function hasTable(db, table) { + const row = db.prepare("SELECT 1 FROM sqlite_master WHERE type='table' AND name=?").get(table); + return !!row; +} + export function getBuildMeta(db, key) { - try { - const row = db.prepare('SELECT value FROM build_meta WHERE key = ?').get(key); - return row ? row.value : null; - } catch { - return null; - } + if (!hasTable(db, 'build_meta')) return null; + const row = db.prepare('SELECT value FROM build_meta WHERE key = ?').get(key); + return row ? row.value : null; } export function setBuildMeta(db, entries) { @@ -280,74 +287,37 @@ export function initSchema(db) { } } - try { + // Legacy column compat — add columns that may be missing from pre-migration DBs + if (!hasColumn(db, 'nodes', 'end_line')) { db.exec('ALTER TABLE nodes ADD COLUMN end_line INTEGER'); - } catch { - /* already exists */ } - try { + if (!hasColumn(db, 'edges', 'confidence')) { db.exec('ALTER TABLE edges ADD COLUMN confidence REAL DEFAULT 1.0'); - } catch { - /* already exists */ } - try { + if (!hasColumn(db, 'edges', 'dynamic')) { db.exec('ALTER TABLE edges ADD COLUMN dynamic INTEGER DEFAULT 0'); - } catch { - /* already exists */ } - try { + if (!hasColumn(db, 'nodes', 'role')) { db.exec('ALTER TABLE nodes ADD COLUMN role TEXT'); - } catch { - /* already exists */ } - try { - db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_role ON nodes(role)'); - } catch { - /* already exists */ - } - try { + db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_role ON nodes(role)'); + if (!hasColumn(db, 'nodes', 'parent_id')) { db.exec('ALTER TABLE nodes ADD COLUMN parent_id INTEGER REFERENCES nodes(id)'); - } catch { - /* already exists */ - } - try { - db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_parent ON nodes(parent_id)'); - } catch { - /* already exists */ - } - try { - db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_kind_parent ON nodes(kind, parent_id)'); - } catch { - /* already exists */ } - try { + db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_parent ON nodes(parent_id)'); + db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_kind_parent ON nodes(kind, parent_id)'); + if (!hasColumn(db, 'nodes', 'qualified_name')) { db.exec('ALTER TABLE nodes ADD COLUMN qualified_name TEXT'); - } catch { - /* already exists */ } - try { + if (!hasColumn(db, 'nodes', 'scope')) { db.exec('ALTER TABLE nodes ADD COLUMN scope TEXT'); - } catch { - /* already exists */ } - try { + if (!hasColumn(db, 'nodes', 'visibility')) { db.exec('ALTER TABLE nodes ADD COLUMN visibility TEXT'); - } catch { - /* already exists */ } - try { + if (hasTable(db, 'nodes')) { db.exec('UPDATE nodes SET qualified_name = name WHERE qualified_name IS NULL'); - } catch { - /* nodes table may not exist yet */ - } - try { - db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_qualified_name ON nodes(qualified_name)'); - } catch { - /* already exists */ - } - try { - db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_scope ON nodes(scope)'); - } catch { - /* already exists */ } + db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_qualified_name ON nodes(qualified_name)'); + db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_scope ON nodes(scope)'); } From b691fcc90b9cc9757997cfc076af00b5dd756473 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 02:21:22 -0600 Subject: [PATCH 04/21] fix: replace empty catch blocks in domain analysis layer Add debug() logging to 10 empty catch blocks across context.js, symbol-lookup.js, exports.js, impact.js, and module-map.js. All catches retain their fallback behavior but failures are now observable via debug logging. Impact: 6 functions changed, 18 affected --- src/domain/analysis/context.js | 13 +++++++------ src/domain/analysis/exports.js | 5 +++-- src/domain/analysis/impact.js | 13 +++++++------ src/domain/analysis/module-map.js | 9 +++++---- src/domain/analysis/symbol-lookup.js | 4 +++- 5 files changed, 25 insertions(+), 19 deletions(-) diff --git a/src/domain/analysis/context.js b/src/domain/analysis/context.js index e3409208..a97e5419 100644 --- a/src/domain/analysis/context.js +++ b/src/domain/analysis/context.js @@ -13,6 +13,7 @@ import { getComplexityForNode, openReadonlyOrFail, } from '../../db/index.js'; +import { debug } from '../../infrastructure/logger.js'; import { isTestFile } from '../../infrastructure/test-filter.js'; import { createFileLinesReader, @@ -142,8 +143,8 @@ function explainFunctionImpl(db, target, noTests, getFileLines) { halsteadVolume: cRow.halstead_volume || 0, }; } - } catch { - /* table may not exist */ + } catch (e) { + debug(`complexity lookup failed for node ${node.id}: ${e.message}`); } return { @@ -311,8 +312,8 @@ export function contextData(name, customDbPath, opts = {}) { halsteadVolume: cRow.halstead_volume || 0, }; } - } catch { - /* table may not exist */ + } catch (e) { + debug(`complexity lookup failed for node ${node.id}: ${e.message}`); } // Children (parameters, properties, constants) @@ -324,8 +325,8 @@ export function contextData(name, customDbPath, opts = {}) { line: c.line, endLine: c.end_line || null, })); - } catch { - /* parent_id column may not exist */ + } catch (e) { + debug(`findNodeChildren failed for node ${node.id}: ${e.message}`); } return { diff --git a/src/domain/analysis/exports.js b/src/domain/analysis/exports.js index 9af6b807..7bebac40 100644 --- a/src/domain/analysis/exports.js +++ b/src/domain/analysis/exports.js @@ -6,6 +6,7 @@ import { findNodesByFile, openReadonlyOrFail, } from '../../db/index.js'; +import { debug } from '../../infrastructure/logger.js'; import { isTestFile } from '../../infrastructure/test-filter.js'; import { createFileLinesReader, @@ -60,8 +61,8 @@ function exportsFileImpl(db, target, noTests, getFileLines, unused) { try { db.prepare('SELECT exported FROM nodes LIMIT 0').raw(); hasExportedCol = true; - } catch { - /* old DB without exported column */ + } catch (e) { + debug(`exported column not available, using fallback: ${e.message}`); } return fileNodes.map((fn) => { diff --git a/src/domain/analysis/impact.js b/src/domain/analysis/impact.js index 736d76e0..bd3bbe1d 100644 --- a/src/domain/analysis/impact.js +++ b/src/domain/analysis/impact.js @@ -13,6 +13,7 @@ import { evaluateBoundaries } from '../../features/boundaries.js'; import { coChangeForFiles } from '../../features/cochange.js'; import { ownersForFiles } from '../../features/owners.js'; import { loadConfig } from '../../infrastructure/config.js'; +import { debug } from '../../infrastructure/logger.js'; import { isTestFile } from '../../infrastructure/test-filter.js'; import { normalizeSymbol } from '../../shared/normalize.js'; import { paginateResult } from '../../shared/paginate.js'; @@ -289,8 +290,8 @@ export function diffImpactData(customDbPath, opts = {}) { }); // Exclude files already found via static analysis historicallyCoupled = coResults.filter((r) => !affectedFiles.has(r.file)); - } catch { - /* co_changes table doesn't exist — skip silently */ + } catch (e) { + debug(`co_changes lookup skipped: ${e.message}`); } // Look up CODEOWNERS for changed + affected files @@ -305,8 +306,8 @@ export function diffImpactData(customDbPath, opts = {}) { suggestedReviewers: ownerResult.suggestedReviewers, }; } - } catch { - /* CODEOWNERS missing or unreadable — skip silently */ + } catch (e) { + debug(`CODEOWNERS lookup skipped: ${e.message}`); } // Check boundary violations scoped to changed files @@ -323,8 +324,8 @@ export function diffImpactData(customDbPath, opts = {}) { boundaryViolations = result.violations; boundaryViolationCount = result.violationCount; } - } catch { - /* boundary check failed — skip silently */ + } catch (e) { + debug(`boundary check skipped: ${e.message}`); } const base = { diff --git a/src/domain/analysis/module-map.js b/src/domain/analysis/module-map.js index e6aa0936..d2bc613b 100644 --- a/src/domain/analysis/module-map.js +++ b/src/domain/analysis/module-map.js @@ -1,5 +1,6 @@ import path from 'node:path'; import { openReadonlyOrFail, testFilterSQL } from '../../db/index.js'; +import { debug } from '../../infrastructure/logger.js'; import { isTestFile } from '../../infrastructure/test-filter.js'; import { findCycles } from '../graph/cycles.js'; import { LANGUAGE_REGISTRY } from '../parser.js'; @@ -193,8 +194,8 @@ export function statsData(customDbPath, opts = {}) { builtAt: meta.built_at || null, }; } - } catch { - /* embeddings table may not exist */ + } catch (e) { + debug(`embeddings lookup skipped: ${e.message}`); } // Graph quality metrics @@ -301,8 +302,8 @@ export function statsData(customDbPath, opts = {}) { minMI: +Math.min(...miValues).toFixed(1), }; } - } catch { - /* table may not exist in older DBs */ + } catch (e) { + debug(`complexity summary skipped: ${e.message}`); } return { diff --git a/src/domain/analysis/symbol-lookup.js b/src/domain/analysis/symbol-lookup.js index b272004a..312581cc 100644 --- a/src/domain/analysis/symbol-lookup.js +++ b/src/domain/analysis/symbol-lookup.js @@ -14,6 +14,7 @@ import { openReadonlyOrFail, Repository, } from '../../db/index.js'; +import { debug } from '../../infrastructure/logger.js'; import { isTestFile } from '../../infrastructure/test-filter.js'; import { ALL_SYMBOL_KINDS } from '../../shared/kinds.js'; import { getFileHash, normalizeSymbol } from '../../shared/normalize.js'; @@ -206,7 +207,8 @@ export function childrenData(name, customDbPath, opts = {}) { let children; try { children = findNodeChildren(db, node.id); - } catch { + } catch (e) { + debug(`findNodeChildren failed for node ${node.id}: ${e.message}`); children = []; } if (noTests) children = children.filter((c) => !isTestFile(c.file || node.file)); From dadb383a8dea5b7be7ab7ea7ac7e705633de9314 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 02:22:28 -0600 Subject: [PATCH 05/21] fix: replace empty catch blocks in parser.js Add debug() logging to 6 empty catch blocks: 3 in disposeParsers() for WASM resource cleanup, 2 in ensureWasmTrees() for file read and parse failures, and 1 in getActiveEngine() for version lookup. Impact: 3 functions changed, 0 affected --- src/domain/parser.js | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/src/domain/parser.js b/src/domain/parser.js index fb41d473..476e6184 100644 --- a/src/domain/parser.js +++ b/src/domain/parser.js @@ -2,7 +2,7 @@ import fs from 'node:fs'; import path from 'node:path'; import { fileURLToPath } from 'node:url'; import { Language, Parser, Query } from 'web-tree-sitter'; -import { warn } from '../infrastructure/logger.js'; +import { debug, warn } from '../infrastructure/logger.js'; import { getNative, getNativePackageVersion, loadNative } from '../infrastructure/native.js'; // Re-export all extractors for backward compatibility @@ -116,29 +116,35 @@ export async function createParsers() { */ export function disposeParsers() { if (_cachedParsers) { - for (const [, parser] of _cachedParsers) { + for (const [id, parser] of _cachedParsers) { if (parser && typeof parser.delete === 'function') { try { parser.delete(); - } catch {} + } catch (e) { + debug(`Failed to dispose parser ${id}: ${e.message}`); + } } } _cachedParsers = null; } - for (const [, query] of _queryCache) { + for (const [id, query] of _queryCache) { if (query && typeof query.delete === 'function') { try { query.delete(); - } catch {} + } catch (e) { + debug(`Failed to dispose query ${id}: ${e.message}`); + } } } _queryCache.clear(); if (_cachedLanguages) { - for (const [, lang] of _cachedLanguages) { + for (const [id, lang] of _cachedLanguages) { if (lang && typeof lang.delete === 'function') { try { lang.delete(); - } catch {} + } catch (e) { + debug(`Failed to dispose language ${id}: ${e.message}`); + } } } _cachedLanguages = null; @@ -189,14 +195,15 @@ export async function ensureWasmTrees(fileSymbols, rootDir) { let code; try { code = fs.readFileSync(absPath, 'utf-8'); - } catch { + } catch (e) { + debug(`ensureWasmTrees: cannot read ${relPath}: ${e.message}`); continue; } try { symbols._tree = parser.parse(code); symbols._langId = entry.id; - } catch { - // skip files that fail to parse + } catch (e) { + debug(`ensureWasmTrees: parse failed for ${relPath}: ${e.message}`); } } } @@ -483,7 +490,9 @@ export function getActiveEngine(opts = {}) { if (native) { try { version = getNativePackageVersion() ?? version; - } catch {} + } catch (e) { + debug(`getNativePackageVersion failed: ${e.message}`); + } } return { name, version }; } From 22d94f4f70437a5c319431e0ee5a1e313ffdeef3 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 02:29:16 -0600 Subject: [PATCH 06/21] fix: replace empty catch blocks in features layer Add debug() logging to 9 empty catch blocks across complexity.js (5), cfg.js (2), and dataflow.js (2). All catches for file read and parse failures now log the error message before continuing. Impact: 4 functions changed, 2 affected --- src/features/cfg.js | 8 +++++--- src/features/complexity.js | 23 +++++++++++++---------- src/features/dataflow.js | 8 +++++--- 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/src/features/cfg.js b/src/features/cfg.js index eff08652..ae1b8564 100644 --- a/src/features/cfg.js +++ b/src/features/cfg.js @@ -23,7 +23,7 @@ import { hasCfgTables, openReadonlyOrFail, } from '../db/index.js'; -import { info } from '../infrastructure/logger.js'; +import { debug, info } from '../infrastructure/logger.js'; import { paginateResult } from '../shared/paginate.js'; import { findNodes } from './shared/find-nodes.js'; @@ -149,7 +149,8 @@ export async function buildCFGData(db, fileSymbols, rootDir, _engineOpts) { let code; try { code = fs.readFileSync(absPath, 'utf-8'); - } catch { + } catch (e) { + debug(`cfg: cannot read ${relPath}: ${e.message}`); continue; } @@ -158,7 +159,8 @@ export async function buildCFGData(db, fileSymbols, rootDir, _engineOpts) { try { tree = parser.parse(code); - } catch { + } catch (e) { + debug(`cfg: parse failed for ${relPath}: ${e.message}`); continue; } } diff --git a/src/features/complexity.js b/src/features/complexity.js index c5cdf62e..12f5acf1 100644 --- a/src/features/complexity.js +++ b/src/features/complexity.js @@ -14,7 +14,7 @@ import { walkWithVisitors } from '../ast-analysis/visitor.js'; import { createComplexityVisitor } from '../ast-analysis/visitors/complexity-visitor.js'; import { getFunctionNodeId, openReadonlyOrFail } from '../db/index.js'; import { loadConfig } from '../infrastructure/config.js'; -import { info } from '../infrastructure/logger.js'; +import { debug, info } from '../infrastructure/logger.js'; import { isTestFile } from '../infrastructure/test-filter.js'; import { paginateResult } from '../shared/paginate.js'; @@ -401,7 +401,8 @@ export async function buildComplexityMetrics(db, fileSymbols, rootDir, _engineOp let code; try { code = fs.readFileSync(absPath, 'utf-8'); - } catch { + } catch (e) { + debug(`complexity: cannot read ${relPath}: ${e.message}`); continue; } @@ -410,7 +411,8 @@ export async function buildComplexityMetrics(db, fileSymbols, rootDir, _engineOp try { tree = parser.parse(code); - } catch { + } catch (e) { + debug(`complexity: parse failed for ${relPath}: ${e.message}`); continue; } } @@ -606,13 +608,14 @@ export function complexityData(customDbPath, opts = {}) { ORDER BY ${orderBy}`, ) .all(...params); - } catch { + } catch (e) { + debug(`complexity query failed (table may not exist): ${e.message}`); // Check if graph has nodes even though complexity table is missing/empty let hasGraph = false; try { hasGraph = db.prepare('SELECT COUNT(*) as c FROM nodes').get().c > 0; - } catch { - /* ignore */ + } catch (e2) { + debug(`nodes table check failed: ${e2.message}`); } return { functions: [], summary: null, thresholds, hasGraph }; } @@ -701,8 +704,8 @@ export function complexityData(customDbPath, opts = {}) { ).length, }; } - } catch { - /* ignore */ + } catch (e) { + debug(`complexity summary query failed: ${e.message}`); } // When summary is null (no complexity rows), check if graph has nodes @@ -710,8 +713,8 @@ export function complexityData(customDbPath, opts = {}) { if (summary === null) { try { hasGraph = db.prepare('SELECT COUNT(*) as c FROM nodes').get().c > 0; - } catch { - /* ignore */ + } catch (e) { + debug(`nodes table check failed: ${e.message}`); } } diff --git a/src/features/dataflow.js b/src/features/dataflow.js index 0f500b8f..695afa95 100644 --- a/src/features/dataflow.js +++ b/src/features/dataflow.js @@ -21,7 +21,7 @@ import { walkWithVisitors } from '../ast-analysis/visitor.js'; import { createDataflowVisitor } from '../ast-analysis/visitors/dataflow-visitor.js'; import { hasDataflowTable, openReadonlyOrFail } from '../db/index.js'; import { ALL_SYMBOL_KINDS, normalizeSymbol } from '../domain/queries.js'; -import { info } from '../infrastructure/logger.js'; +import { debug, info } from '../infrastructure/logger.js'; import { isTestFile } from '../infrastructure/test-filter.js'; import { paginateResult } from '../shared/paginate.js'; import { findNodes } from './shared/find-nodes.js'; @@ -141,7 +141,8 @@ export async function buildDataflowEdges(db, fileSymbols, rootDir, _engineOpts) let code; try { code = fs.readFileSync(absPath, 'utf-8'); - } catch { + } catch (e) { + debug(`dataflow: cannot read ${relPath}: ${e.message}`); continue; } @@ -150,7 +151,8 @@ export async function buildDataflowEdges(db, fileSymbols, rootDir, _engineOpts) try { tree = parser.parse(code); - } catch { + } catch (e) { + debug(`dataflow: parse failed for ${relPath}: ${e.message}`); continue; } } From 3b365347a3e9e3ce39b652d674b69db8e7458278 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 02:39:16 -0600 Subject: [PATCH 07/21] refactor: decompose extractSymbolsWalk into per-category handlers Split the monolithic walkJavaScriptNode switch (13 cases, cognitive 228) into 11 focused handler functions. The dispatcher is now a thin switch that delegates to handleFunctionDecl, handleClassDecl, handleMethodDef, handleInterfaceDecl, handleTypeAliasDecl, handleVariableDecl, handleEnumDecl, handleCallExpr, handleImportStmt, handleExportStmt, and handleExpressionStmt. The expression_statement case now reuses the existing handleCommonJSAssignment helper, eliminating ~50 lines of duplication. Worst handler complexity: handleVariableDecl (cognitive 20), down from the original monolithic function (cognitive 279). Impact: 13 functions changed, 3 affected --- src/extractors/javascript.js | 578 +++++++++++++++++------------------ 1 file changed, 274 insertions(+), 304 deletions(-) diff --git a/src/extractors/javascript.js b/src/extractors/javascript.js index a2d9e7b1..997c8ea6 100644 --- a/src/extractors/javascript.js +++ b/src/extractors/javascript.js @@ -320,333 +320,303 @@ function handleCommonJSAssignment(left, right, node, imports) { // ── Manual tree walk (fallback when Query not available) ──────────────────── function extractSymbolsWalk(tree) { - const definitions = []; - const calls = []; - const imports = []; - const classes = []; - const exports = []; - - function walkJavaScriptNode(node) { - switch (node.type) { - case 'function_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const fnChildren = extractParameters(node); - definitions.push({ - name: nameNode.text, - kind: 'function', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: fnChildren.length > 0 ? fnChildren : undefined, - }); - } - break; - } + const ctx = { + definitions: [], + calls: [], + imports: [], + classes: [], + exports: [], + }; + + walkJavaScriptNode(tree.rootNode, ctx); + return ctx; +} - case 'class_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const className = nameNode.text; - const startLine = node.startPosition.row + 1; - const clsChildren = extractClassProperties(node); - definitions.push({ - name: className, - kind: 'class', - line: startLine, - endLine: nodeEndLine(node), - children: clsChildren.length > 0 ? clsChildren : undefined, - }); - const heritage = node.childForFieldName('heritage') || findChild(node, 'class_heritage'); - if (heritage) { - const superName = extractSuperclass(heritage); - if (superName) { - classes.push({ name: className, extends: superName, line: startLine }); - } - const implementsList = extractImplements(heritage); - for (const iface of implementsList) { - classes.push({ name: className, implements: iface, line: startLine }); - } - } - } - break; - } +function walkJavaScriptNode(node, ctx) { + switch (node.type) { + case 'function_declaration': + handleFunctionDecl(node, ctx); + break; + case 'class_declaration': + handleClassDecl(node, ctx); + break; + case 'method_definition': + handleMethodDef(node, ctx); + break; + case 'interface_declaration': + handleInterfaceDecl(node, ctx); + break; + case 'type_alias_declaration': + handleTypeAliasDecl(node, ctx); + break; + case 'lexical_declaration': + case 'variable_declaration': + handleVariableDecl(node, ctx); + break; + case 'enum_declaration': + handleEnumDecl(node, ctx); + break; + case 'call_expression': + handleCallExpr(node, ctx); + break; + case 'import_statement': + handleImportStmt(node, ctx); + break; + case 'export_statement': + handleExportStmt(node, ctx); + break; + case 'expression_statement': + handleExpressionStmt(node, ctx); + break; + } - case 'method_definition': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const parentClass = findParentClass(node); - const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; - const methChildren = extractParameters(node); - const methVis = extractVisibility(node); - definitions.push({ - name: fullName, - kind: 'method', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: methChildren.length > 0 ? methChildren : undefined, - visibility: methVis, - }); - } - break; - } + for (let i = 0; i < node.childCount; i++) { + walkJavaScriptNode(node.child(i), ctx); + } +} - case 'interface_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'interface', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - const body = - node.childForFieldName('body') || - findChild(node, 'interface_body') || - findChild(node, 'object_type'); - if (body) { - extractInterfaceMethods(body, nameNode.text, definitions); - } - } - break; - } +// ── Walk-path per-node-type handlers ──────────────────────────────────────── - case 'type_alias_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'type', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - } - break; - } +function handleFunctionDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + const fnChildren = extractParameters(node); + ctx.definitions.push({ + name: nameNode.text, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: fnChildren.length > 0 ? fnChildren : undefined, + }); + } +} - case 'lexical_declaration': - case 'variable_declaration': { - const isConst = node.text.startsWith('const '); - for (let i = 0; i < node.childCount; i++) { - const declarator = node.child(i); - if (declarator && declarator.type === 'variable_declarator') { - const nameN = declarator.childForFieldName('name'); - const valueN = declarator.childForFieldName('value'); - if (nameN && valueN) { - const valType = valueN.type; - if ( - valType === 'arrow_function' || - valType === 'function_expression' || - valType === 'function' - ) { - const varFnChildren = extractParameters(valueN); - definitions.push({ - name: nameN.text, - kind: 'function', - line: node.startPosition.row + 1, - endLine: nodeEndLine(valueN), - children: varFnChildren.length > 0 ? varFnChildren : undefined, - }); - } else if (isConst && nameN.type === 'identifier' && isConstantValue(valueN)) { - definitions.push({ - name: nameN.text, - kind: 'constant', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - } - } else if (isConst && nameN && nameN.type === 'identifier' && !valueN) { - // const with no value (shouldn't happen but be safe) - } - } - } - break; - } +function handleClassDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const className = nameNode.text; + const startLine = node.startPosition.row + 1; + const clsChildren = extractClassProperties(node); + ctx.definitions.push({ + name: className, + kind: 'class', + line: startLine, + endLine: nodeEndLine(node), + children: clsChildren.length > 0 ? clsChildren : undefined, + }); + const heritage = node.childForFieldName('heritage') || findChild(node, 'class_heritage'); + if (heritage) { + const superName = extractSuperclass(heritage); + if (superName) { + ctx.classes.push({ name: className, extends: superName, line: startLine }); + } + const implementsList = extractImplements(heritage); + for (const iface of implementsList) { + ctx.classes.push({ name: className, implements: iface, line: startLine }); + } + } +} - case 'enum_declaration': { - // TypeScript enum - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const enumChildren = []; - const body = node.childForFieldName('body') || findChild(node, 'enum_body'); - if (body) { - for (let i = 0; i < body.childCount; i++) { - const member = body.child(i); - if (!member) continue; - if (member.type === 'enum_assignment' || member.type === 'property_identifier') { - const mName = member.childForFieldName('name') || member.child(0); - if (mName) { - enumChildren.push({ - name: mName.text, - kind: 'constant', - line: member.startPosition.row + 1, - }); - } - } - } - } - definitions.push({ - name: nameNode.text, - kind: 'enum', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: enumChildren.length > 0 ? enumChildren : undefined, - }); - } - break; - } +function handleMethodDef(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + const parentClass = findParentClass(node); + const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + const methChildren = extractParameters(node); + const methVis = extractVisibility(node); + ctx.definitions.push({ + name: fullName, + kind: 'method', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: methChildren.length > 0 ? methChildren : undefined, + visibility: methVis, + }); + } +} - case 'call_expression': { - const fn = node.childForFieldName('function'); - if (fn) { - // Dynamic import(): import('./foo.js') → extract as an import entry - if (fn.type === 'import') { - const args = node.childForFieldName('arguments') || findChild(node, 'arguments'); - if (args) { - const strArg = findChild(args, 'string'); - if (strArg) { - const modPath = strArg.text.replace(/['"]/g, ''); - // Extract destructured names from parent context: - // const { a, b } = await import('./foo.js') - // (standalone import('./foo.js').then(...) calls produce an edge with empty names) - const names = extractDynamicImportNames(node); - imports.push({ - source: modPath, - names, - line: node.startPosition.row + 1, - dynamicImport: true, - }); - } else { - debug( - `Skipping non-static dynamic import() at line ${node.startPosition.row + 1} (template literal or variable)`, - ); - } - } - } else { - const callInfo = extractCallInfo(fn, node); - if (callInfo) calls.push(callInfo); - if (fn.type === 'member_expression') { - const cbDef = extractCallbackDefinition(node, fn); - if (cbDef) definitions.push(cbDef); - } - } - } - break; - } +function handleInterfaceDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + ctx.definitions.push({ + name: nameNode.text, + kind: 'interface', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + const body = + node.childForFieldName('body') || + findChild(node, 'interface_body') || + findChild(node, 'object_type'); + if (body) { + extractInterfaceMethods(body, nameNode.text, ctx.definitions); + } +} - case 'import_statement': { - const isTypeOnly = node.text.startsWith('import type'); - const source = node.childForFieldName('source') || findChild(node, 'string'); - if (source) { - const modPath = source.text.replace(/['"]/g, ''); - const names = extractImportNames(node); - imports.push({ - source: modPath, - names, +function handleTypeAliasDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + ctx.definitions.push({ + name: nameNode.text, + kind: 'type', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + } +} + +function handleVariableDecl(node, ctx) { + const isConst = node.text.startsWith('const '); + for (let i = 0; i < node.childCount; i++) { + const declarator = node.child(i); + if (declarator && declarator.type === 'variable_declarator') { + const nameN = declarator.childForFieldName('name'); + const valueN = declarator.childForFieldName('value'); + if (nameN && valueN) { + const valType = valueN.type; + if ( + valType === 'arrow_function' || + valType === 'function_expression' || + valType === 'function' + ) { + const varFnChildren = extractParameters(valueN); + ctx.definitions.push({ + name: nameN.text, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(valueN), + children: varFnChildren.length > 0 ? varFnChildren : undefined, + }); + } else if (isConst && nameN.type === 'identifier' && isConstantValue(valueN)) { + ctx.definitions.push({ + name: nameN.text, + kind: 'constant', line: node.startPosition.row + 1, - typeOnly: isTypeOnly, + endLine: nodeEndLine(node), }); } - break; } + } + } +} - case 'export_statement': { - const exportLine = node.startPosition.row + 1; - const decl = node.childForFieldName('declaration'); - if (decl) { - const declType = decl.type; - const kindMap = { - function_declaration: 'function', - class_declaration: 'class', - interface_declaration: 'interface', - type_alias_declaration: 'type', - }; - const kind = kindMap[declType]; - if (kind) { - const n = decl.childForFieldName('name'); - if (n) exports.push({ name: n.text, kind, line: exportLine }); - } - } - const source = node.childForFieldName('source') || findChild(node, 'string'); - if (source && !decl) { - const modPath = source.text.replace(/['"]/g, ''); - const reexportNames = extractImportNames(node); - const nodeText = node.text; - const isWildcard = nodeText.includes('export *') || nodeText.includes('export*'); - imports.push({ - source: modPath, - names: reexportNames, - line: exportLine, - reexport: true, - wildcardReexport: isWildcard && reexportNames.length === 0, +function handleEnumDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const enumChildren = []; + const body = node.childForFieldName('body') || findChild(node, 'enum_body'); + if (body) { + for (let i = 0; i < body.childCount; i++) { + const member = body.child(i); + if (!member) continue; + if (member.type === 'enum_assignment' || member.type === 'property_identifier') { + const mName = member.childForFieldName('name') || member.child(0); + if (mName) { + enumChildren.push({ + name: mName.text, + kind: 'constant', + line: member.startPosition.row + 1, }); } - break; } + } + } + ctx.definitions.push({ + name: nameNode.text, + kind: 'enum', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: enumChildren.length > 0 ? enumChildren : undefined, + }); +} - case 'expression_statement': { - const expr = node.child(0); - if (expr && expr.type === 'assignment_expression') { - const left = expr.childForFieldName('left'); - const right = expr.childForFieldName('right'); - if (left && right) { - const leftText = left.text; - if (leftText.startsWith('module.exports') || leftText === 'exports') { - if (right.type === 'call_expression') { - const fn = right.childForFieldName('function'); - const args = right.childForFieldName('arguments') || findChild(right, 'arguments'); - if (fn && fn.text === 'require' && args) { - const strArg = findChild(args, 'string'); - if (strArg) { - imports.push({ - source: strArg.text.replace(/['"]/g, ''), - names: [], - line: node.startPosition.row + 1, - reexport: true, - wildcardReexport: true, - }); - } - } - } - if (right.type === 'object') { - for (let ci = 0; ci < right.childCount; ci++) { - const child = right.child(ci); - if (child && child.type === 'spread_element') { - const spreadExpr = child.child(1) || child.childForFieldName('value'); - if (spreadExpr && spreadExpr.type === 'call_expression') { - const fn2 = spreadExpr.childForFieldName('function'); - const args2 = - spreadExpr.childForFieldName('arguments') || - findChild(spreadExpr, 'arguments'); - if (fn2 && fn2.text === 'require' && args2) { - const strArg2 = findChild(args2, 'string'); - if (strArg2) { - imports.push({ - source: strArg2.text.replace(/['"]/g, ''), - names: [], - line: node.startPosition.row + 1, - reexport: true, - wildcardReexport: true, - }); - } - } - } - } - } - } - } - } - } - break; +function handleCallExpr(node, ctx) { + const fn = node.childForFieldName('function'); + if (!fn) return; + if (fn.type === 'import') { + const args = node.childForFieldName('arguments') || findChild(node, 'arguments'); + if (args) { + const strArg = findChild(args, 'string'); + if (strArg) { + const modPath = strArg.text.replace(/['"]/g, ''); + const names = extractDynamicImportNames(node); + ctx.imports.push({ + source: modPath, + names, + line: node.startPosition.row + 1, + dynamicImport: true, + }); + } else { + debug( + `Skipping non-static dynamic import() at line ${node.startPosition.row + 1} (template literal or variable)`, + ); } } + } else { + const callInfo = extractCallInfo(fn, node); + if (callInfo) ctx.calls.push(callInfo); + if (fn.type === 'member_expression') { + const cbDef = extractCallbackDefinition(node, fn); + if (cbDef) ctx.definitions.push(cbDef); + } + } +} - for (let i = 0; i < node.childCount; i++) { - walkJavaScriptNode(node.child(i)); +function handleImportStmt(node, ctx) { + const isTypeOnly = node.text.startsWith('import type'); + const source = node.childForFieldName('source') || findChild(node, 'string'); + if (source) { + const modPath = source.text.replace(/['"]/g, ''); + const names = extractImportNames(node); + ctx.imports.push({ + source: modPath, + names, + line: node.startPosition.row + 1, + typeOnly: isTypeOnly, + }); + } +} + +function handleExportStmt(node, ctx) { + const exportLine = node.startPosition.row + 1; + const decl = node.childForFieldName('declaration'); + if (decl) { + const declType = decl.type; + const kindMap = { + function_declaration: 'function', + class_declaration: 'class', + interface_declaration: 'interface', + type_alias_declaration: 'type', + }; + const kind = kindMap[declType]; + if (kind) { + const n = decl.childForFieldName('name'); + if (n) ctx.exports.push({ name: n.text, kind, line: exportLine }); } } + const source = node.childForFieldName('source') || findChild(node, 'string'); + if (source && !decl) { + const modPath = source.text.replace(/['"]/g, ''); + const reexportNames = extractImportNames(node); + const nodeText = node.text; + const isWildcard = nodeText.includes('export *') || nodeText.includes('export*'); + ctx.imports.push({ + source: modPath, + names: reexportNames, + line: exportLine, + reexport: true, + wildcardReexport: isWildcard && reexportNames.length === 0, + }); + } +} - walkJavaScriptNode(tree.rootNode); - return { definitions, calls, imports, classes, exports }; +function handleExpressionStmt(node, ctx) { + const expr = node.child(0); + if (expr && expr.type === 'assignment_expression') { + const left = expr.childForFieldName('left'); + const right = expr.childForFieldName('right'); + handleCommonJSAssignment(left, right, node, ctx.imports); + } } // ── Child extraction helpers ──────────────────────────────────────────────── From e1d7ee03846d70178fa75db4145482375687d12b Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 02:41:20 -0600 Subject: [PATCH 08/21] refactor: decompose extractPythonSymbols into per-category handlers Split walkPythonNode switch into 7 focused handlers: handlePyFunctionDef, handlePyClassDef, handlePyCall, handlePyImport, handlePyExpressionStmt, handlePyImportFrom, plus the decorated_definition inline dispatch. Moved extractPythonParameters, extractPythonClassProperties, walkInitBody, and findPythonParentClass from closures to module-scope functions. Impact: 12 functions changed, 5 affected --- src/extractors/python.js | 502 ++++++++++++++++++++------------------- 1 file changed, 252 insertions(+), 250 deletions(-) diff --git a/src/extractors/python.js b/src/extractors/python.js index 968dbacb..053a07ca 100644 --- a/src/extractors/python.js +++ b/src/extractors/python.js @@ -4,292 +4,294 @@ import { findChild, nodeEndLine, pythonVisibility } from './helpers.js'; * Extract symbols from Python files. */ export function extractPythonSymbols(tree, _filePath) { - const definitions = []; - const calls = []; - const imports = []; - const classes = []; - const exports = []; + const ctx = { + definitions: [], + calls: [], + imports: [], + classes: [], + exports: [], + }; - function walkPythonNode(node) { - switch (node.type) { - case 'function_definition': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const decorators = []; - if (node.previousSibling && node.previousSibling.type === 'decorator') { - decorators.push(node.previousSibling.text); - } - const parentClass = findPythonParentClass(node); - const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; - const kind = parentClass ? 'method' : 'function'; - const fnChildren = extractPythonParameters(node); - definitions.push({ - name: fullName, - kind, - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - decorators, - children: fnChildren.length > 0 ? fnChildren : undefined, - visibility: pythonVisibility(nameNode.text), - }); - } - break; - } + walkPythonNode(tree.rootNode, ctx); + return ctx; +} - case 'class_definition': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const clsChildren = extractPythonClassProperties(node); - definitions.push({ - name: nameNode.text, - kind: 'class', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: clsChildren.length > 0 ? clsChildren : undefined, - }); - const superclasses = - node.childForFieldName('superclasses') || findChild(node, 'argument_list'); - if (superclasses) { - for (let i = 0; i < superclasses.childCount; i++) { - const child = superclasses.child(i); - if (child && child.type === 'identifier') { - classes.push({ - name: nameNode.text, - extends: child.text, - line: node.startPosition.row + 1, - }); - } - } - } - } - break; - } +function walkPythonNode(node, ctx) { + switch (node.type) { + case 'function_definition': + handlePyFunctionDef(node, ctx); + break; + case 'class_definition': + handlePyClassDef(node, ctx); + break; + case 'decorated_definition': + for (let i = 0; i < node.childCount; i++) walkPythonNode(node.child(i), ctx); + return; + case 'call': + handlePyCall(node, ctx); + break; + case 'import_statement': + handlePyImport(node, ctx); + break; + case 'expression_statement': + handlePyExpressionStmt(node, ctx); + break; + case 'import_from_statement': + handlePyImportFrom(node, ctx); + break; + } - case 'decorated_definition': { - for (let i = 0; i < node.childCount; i++) walkPythonNode(node.child(i)); - return; - } + for (let i = 0; i < node.childCount; i++) walkPythonNode(node.child(i), ctx); +} - case 'call': { - const fn = node.childForFieldName('function'); - if (fn) { - let callName = null; - let receiver; - if (fn.type === 'identifier') callName = fn.text; - else if (fn.type === 'attribute') { - const attr = fn.childForFieldName('attribute'); - if (attr) callName = attr.text; - const obj = fn.childForFieldName('object'); - if (obj) receiver = obj.text; - } - if (callName) { - const call = { name: callName, line: node.startPosition.row + 1 }; - if (receiver) call.receiver = receiver; - calls.push(call); - } - } - break; - } +// ── Walk-path per-node-type handlers ──────────────────────────────────────── - case 'import_statement': { - const names = []; - for (let i = 0; i < node.childCount; i++) { - const child = node.child(i); - if (child && (child.type === 'dotted_name' || child.type === 'aliased_import')) { - const name = - child.type === 'aliased_import' - ? (child.childForFieldName('alias') || child.childForFieldName('name'))?.text - : child.text; - if (name) names.push(name); - } - } - if (names.length > 0) - imports.push({ - source: names[0], - names, - line: node.startPosition.row + 1, - pythonImport: true, - }); - break; - } +function handlePyFunctionDef(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const decorators = []; + if (node.previousSibling && node.previousSibling.type === 'decorator') { + decorators.push(node.previousSibling.text); + } + const parentClass = findPythonParentClass(node); + const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + const kind = parentClass ? 'method' : 'function'; + const fnChildren = extractPythonParameters(node); + ctx.definitions.push({ + name: fullName, + kind, + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + decorators, + children: fnChildren.length > 0 ? fnChildren : undefined, + visibility: pythonVisibility(nameNode.text), + }); +} - case 'expression_statement': { - // Module-level UPPER_CASE assignments → constants - if (node.parent && node.parent.type === 'module') { - const assignment = findChild(node, 'assignment'); - if (assignment) { - const left = assignment.childForFieldName('left'); - if (left && left.type === 'identifier' && /^[A-Z_][A-Z0-9_]*$/.test(left.text)) { - definitions.push({ - name: left.text, - kind: 'constant', - line: node.startPosition.row + 1, - }); - } - } - } - break; +function handlePyClassDef(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const clsChildren = extractPythonClassProperties(node); + ctx.definitions.push({ + name: nameNode.text, + kind: 'class', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: clsChildren.length > 0 ? clsChildren : undefined, + }); + const superclasses = node.childForFieldName('superclasses') || findChild(node, 'argument_list'); + if (superclasses) { + for (let i = 0; i < superclasses.childCount; i++) { + const child = superclasses.child(i); + if (child && child.type === 'identifier') { + ctx.classes.push({ + name: nameNode.text, + extends: child.text, + line: node.startPosition.row + 1, + }); } + } + } +} - case 'import_from_statement': { - let source = ''; - const names = []; - for (let i = 0; i < node.childCount; i++) { - const child = node.child(i); - if (!child) continue; - if (child.type === 'dotted_name' || child.type === 'relative_import') { - if (!source) source = child.text; - else names.push(child.text); - } - if (child.type === 'aliased_import') { - const n = child.childForFieldName('name') || child.child(0); - if (n) names.push(n.text); - } - if (child.type === 'wildcard_import') names.push('*'); - } - if (source) - imports.push({ source, names, line: node.startPosition.row + 1, pythonImport: true }); - break; +function handlePyCall(node, ctx) { + const fn = node.childForFieldName('function'); + if (!fn) return; + let callName = null; + let receiver; + if (fn.type === 'identifier') callName = fn.text; + else if (fn.type === 'attribute') { + const attr = fn.childForFieldName('attribute'); + if (attr) callName = attr.text; + const obj = fn.childForFieldName('object'); + if (obj) receiver = obj.text; + } + if (callName) { + const call = { name: callName, line: node.startPosition.row + 1 }; + if (receiver) call.receiver = receiver; + ctx.calls.push(call); + } +} + +function handlePyImport(node, ctx) { + const names = []; + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child && (child.type === 'dotted_name' || child.type === 'aliased_import')) { + const name = + child.type === 'aliased_import' + ? (child.childForFieldName('alias') || child.childForFieldName('name'))?.text + : child.text; + if (name) names.push(name); + } + } + if (names.length > 0) + ctx.imports.push({ + source: names[0], + names, + line: node.startPosition.row + 1, + pythonImport: true, + }); +} + +function handlePyExpressionStmt(node, ctx) { + if (node.parent && node.parent.type === 'module') { + const assignment = findChild(node, 'assignment'); + if (assignment) { + const left = assignment.childForFieldName('left'); + if (left && left.type === 'identifier' && /^[A-Z_][A-Z0-9_]*$/.test(left.text)) { + ctx.definitions.push({ + name: left.text, + kind: 'constant', + line: node.startPosition.row + 1, + }); } } + } +} - for (let i = 0; i < node.childCount; i++) walkPythonNode(node.child(i)); +function handlePyImportFrom(node, ctx) { + let source = ''; + const names = []; + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child) continue; + if (child.type === 'dotted_name' || child.type === 'relative_import') { + if (!source) source = child.text; + else names.push(child.text); + } + if (child.type === 'aliased_import') { + const n = child.childForFieldName('name') || child.child(0); + if (n) names.push(n.text); + } + if (child.type === 'wildcard_import') names.push('*'); } + if (source) + ctx.imports.push({ source, names, line: node.startPosition.row + 1, pythonImport: true }); +} - function extractPythonParameters(fnNode) { - const params = []; - const paramsNode = fnNode.childForFieldName('parameters') || findChild(fnNode, 'parameters'); - if (!paramsNode) return params; - for (let i = 0; i < paramsNode.childCount; i++) { - const child = paramsNode.child(i); - if (!child) continue; - const t = child.type; - if (t === 'identifier') { - params.push({ name: child.text, kind: 'parameter', line: child.startPosition.row + 1 }); - } else if ( - t === 'typed_parameter' || - t === 'default_parameter' || - t === 'typed_default_parameter' - ) { - const nameNode = child.childForFieldName('name') || child.child(0); - if (nameNode && nameNode.type === 'identifier') { - params.push({ - name: nameNode.text, - kind: 'parameter', - line: child.startPosition.row + 1, - }); - } - } else if (t === 'list_splat_pattern' || t === 'dictionary_splat_pattern') { - // *args, **kwargs - for (let j = 0; j < child.childCount; j++) { - const inner = child.child(j); - if (inner && inner.type === 'identifier') { - params.push({ name: inner.text, kind: 'parameter', line: child.startPosition.row + 1 }); - break; - } +// ── Python-specific helpers ───────────────────────────────────────────────── + +function extractPythonParameters(fnNode) { + const params = []; + const paramsNode = fnNode.childForFieldName('parameters') || findChild(fnNode, 'parameters'); + if (!paramsNode) return params; + for (let i = 0; i < paramsNode.childCount; i++) { + const child = paramsNode.child(i); + if (!child) continue; + const t = child.type; + if (t === 'identifier') { + params.push({ name: child.text, kind: 'parameter', line: child.startPosition.row + 1 }); + } else if ( + t === 'typed_parameter' || + t === 'default_parameter' || + t === 'typed_default_parameter' + ) { + const nameNode = child.childForFieldName('name') || child.child(0); + if (nameNode && nameNode.type === 'identifier') { + params.push({ + name: nameNode.text, + kind: 'parameter', + line: child.startPosition.row + 1, + }); + } + } else if (t === 'list_splat_pattern' || t === 'dictionary_splat_pattern') { + for (let j = 0; j < child.childCount; j++) { + const inner = child.child(j); + if (inner && inner.type === 'identifier') { + params.push({ name: inner.text, kind: 'parameter', line: child.startPosition.row + 1 }); + break; } } } - return params; } + return params; +} - function extractPythonClassProperties(classNode) { - const props = []; - const seen = new Set(); - const body = classNode.childForFieldName('body') || findChild(classNode, 'block'); - if (!body) return props; +function extractPythonClassProperties(classNode) { + const props = []; + const seen = new Set(); + const body = classNode.childForFieldName('body') || findChild(classNode, 'block'); + if (!body) return props; - for (let i = 0; i < body.childCount; i++) { - const child = body.child(i); - if (!child) continue; + for (let i = 0; i < body.childCount; i++) { + const child = body.child(i); + if (!child) continue; - // Direct class attribute assignments: x = 5 - if (child.type === 'expression_statement') { - const assignment = findChild(child, 'assignment'); - if (assignment) { - const left = assignment.childForFieldName('left'); - if (left && left.type === 'identifier' && !seen.has(left.text)) { - seen.add(left.text); - props.push({ - name: left.text, - kind: 'property', - line: child.startPosition.row + 1, - visibility: pythonVisibility(left.text), - }); - } + if (child.type === 'expression_statement') { + const assignment = findChild(child, 'assignment'); + if (assignment) { + const left = assignment.childForFieldName('left'); + if (left && left.type === 'identifier' && !seen.has(left.text)) { + seen.add(left.text); + props.push({ + name: left.text, + kind: 'property', + line: child.startPosition.row + 1, + visibility: pythonVisibility(left.text), + }); } } + } - // __init__ method: self.x = ... assignments - if (child.type === 'function_definition') { - const fnName = child.childForFieldName('name'); - if (fnName && fnName.text === '__init__') { - const initBody = child.childForFieldName('body') || findChild(child, 'block'); - if (initBody) { - walkInitBody(initBody, seen, props); - } + if (child.type === 'function_definition') { + const fnName = child.childForFieldName('name'); + if (fnName && fnName.text === '__init__') { + const initBody = child.childForFieldName('body') || findChild(child, 'block'); + if (initBody) { + walkInitBody(initBody, seen, props); } } + } - // decorated __init__ - if (child.type === 'decorated_definition') { - for (let j = 0; j < child.childCount; j++) { - const inner = child.child(j); - if (inner && inner.type === 'function_definition') { - const fnName = inner.childForFieldName('name'); - if (fnName && fnName.text === '__init__') { - const initBody = inner.childForFieldName('body') || findChild(inner, 'block'); - if (initBody) { - walkInitBody(initBody, seen, props); - } + if (child.type === 'decorated_definition') { + for (let j = 0; j < child.childCount; j++) { + const inner = child.child(j); + if (inner && inner.type === 'function_definition') { + const fnName = inner.childForFieldName('name'); + if (fnName && fnName.text === '__init__') { + const initBody = inner.childForFieldName('body') || findChild(inner, 'block'); + if (initBody) { + walkInitBody(initBody, seen, props); } } } } } - return props; } + return props; +} - function walkInitBody(bodyNode, seen, props) { - for (let i = 0; i < bodyNode.childCount; i++) { - const stmt = bodyNode.child(i); - if (!stmt || stmt.type !== 'expression_statement') continue; - const assignment = findChild(stmt, 'assignment'); - if (!assignment) continue; - const left = assignment.childForFieldName('left'); - if (!left || left.type !== 'attribute') continue; - const obj = left.childForFieldName('object'); - const attr = left.childForFieldName('attribute'); - if ( - obj && - obj.text === 'self' && - attr && - attr.type === 'identifier' && - !seen.has(attr.text) - ) { - seen.add(attr.text); - props.push({ - name: attr.text, - kind: 'property', - line: stmt.startPosition.row + 1, - visibility: pythonVisibility(attr.text), - }); - } +function walkInitBody(bodyNode, seen, props) { + for (let i = 0; i < bodyNode.childCount; i++) { + const stmt = bodyNode.child(i); + if (!stmt || stmt.type !== 'expression_statement') continue; + const assignment = findChild(stmt, 'assignment'); + if (!assignment) continue; + const left = assignment.childForFieldName('left'); + if (!left || left.type !== 'attribute') continue; + const obj = left.childForFieldName('object'); + const attr = left.childForFieldName('attribute'); + if (obj && obj.text === 'self' && attr && attr.type === 'identifier' && !seen.has(attr.text)) { + seen.add(attr.text); + props.push({ + name: attr.text, + kind: 'property', + line: stmt.startPosition.row + 1, + visibility: pythonVisibility(attr.text), + }); } } +} - function findPythonParentClass(node) { - let current = node.parent; - while (current) { - if (current.type === 'class_definition') { - const nameNode = current.childForFieldName('name'); - return nameNode ? nameNode.text : null; - } - current = current.parent; +function findPythonParentClass(node) { + let current = node.parent; + while (current) { + if (current.type === 'class_definition') { + const nameNode = current.childForFieldName('name'); + return nameNode ? nameNode.text : null; } - return null; + current = current.parent; } - - walkPythonNode(tree.rootNode); - return { definitions, calls, imports, classes, exports }; + return null; } From 3a656bb089cca1e595d3a28331fb5303d5c11a16 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 02:42:33 -0600 Subject: [PATCH 09/21] refactor: decompose extractJavaSymbols into per-category handlers Split walkJavaNode switch into 8 focused handlers plus an extractJavaInterfaces helper. Moved findJavaParentClass to module scope. The class_declaration case (deepest nesting in the file) is now split between handleJavaClassDecl and extractJavaInterfaces. Impact: 12 functions changed, 5 affected --- src/extractors/java.js | 418 +++++++++++++++++++++-------------------- 1 file changed, 211 insertions(+), 207 deletions(-) diff --git a/src/extractors/java.js b/src/extractors/java.js index 2bf0bb28..9da313c1 100644 --- a/src/extractors/java.js +++ b/src/extractors/java.js @@ -4,239 +4,243 @@ import { extractModifierVisibility, findChild, nodeEndLine } from './helpers.js' * Extract symbols from Java files. */ export function extractJavaSymbols(tree, _filePath) { - const definitions = []; - const calls = []; - const imports = []; - const classes = []; - const exports = []; + const ctx = { + definitions: [], + calls: [], + imports: [], + classes: [], + exports: [], + }; - function findJavaParentClass(node) { - let current = node.parent; - while (current) { - if ( - current.type === 'class_declaration' || - current.type === 'enum_declaration' || - current.type === 'interface_declaration' - ) { - const nameNode = current.childForFieldName('name'); - return nameNode ? nameNode.text : null; - } - current = current.parent; - } - return null; + walkJavaNode(tree.rootNode, ctx); + return ctx; +} + +function walkJavaNode(node, ctx) { + switch (node.type) { + case 'class_declaration': + handleJavaClassDecl(node, ctx); + break; + case 'interface_declaration': + handleJavaInterfaceDecl(node, ctx); + break; + case 'enum_declaration': + handleJavaEnumDecl(node, ctx); + break; + case 'method_declaration': + handleJavaMethodDecl(node, ctx); + break; + case 'constructor_declaration': + handleJavaConstructorDecl(node, ctx); + break; + case 'import_declaration': + handleJavaImportDecl(node, ctx); + break; + case 'method_invocation': + handleJavaMethodInvocation(node, ctx); + break; + case 'object_creation_expression': + handleJavaObjectCreation(node, ctx); + break; } - function walkJavaNode(node) { - switch (node.type) { - case 'class_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const classChildren = extractClassFields(node); - definitions.push({ - name: nameNode.text, - kind: 'class', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: classChildren.length > 0 ? classChildren : undefined, - }); + for (let i = 0; i < node.childCount; i++) walkJavaNode(node.child(i), ctx); +} - const superclass = node.childForFieldName('superclass'); - if (superclass) { - for (let i = 0; i < superclass.childCount; i++) { - const child = superclass.child(i); - if ( - child && - (child.type === 'type_identifier' || - child.type === 'identifier' || - child.type === 'generic_type') - ) { - const superName = child.type === 'generic_type' ? child.child(0)?.text : child.text; - if (superName) - classes.push({ - name: nameNode.text, - extends: superName, - line: node.startPosition.row + 1, - }); - break; - } - } - } +// ── Walk-path per-node-type handlers ──────────────────────────────────────── - const interfaces = node.childForFieldName('interfaces'); - if (interfaces) { - for (let i = 0; i < interfaces.childCount; i++) { - const child = interfaces.child(i); - if ( - child && - (child.type === 'type_identifier' || - child.type === 'identifier' || - child.type === 'type_list' || - child.type === 'generic_type') - ) { - if (child.type === 'type_list') { - for (let j = 0; j < child.childCount; j++) { - const t = child.child(j); - if ( - t && - (t.type === 'type_identifier' || - t.type === 'identifier' || - t.type === 'generic_type') - ) { - const ifaceName = t.type === 'generic_type' ? t.child(0)?.text : t.text; - if (ifaceName) - classes.push({ - name: nameNode.text, - implements: ifaceName, - line: node.startPosition.row + 1, - }); - } - } - } else { - const ifaceName = - child.type === 'generic_type' ? child.child(0)?.text : child.text; - if (ifaceName) - classes.push({ - name: nameNode.text, - implements: ifaceName, - line: node.startPosition.row + 1, - }); - } - } - } - } - } - break; - } +function handleJavaClassDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const classChildren = extractClassFields(node); + ctx.definitions.push({ + name: nameNode.text, + kind: 'class', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: classChildren.length > 0 ? classChildren : undefined, + }); - case 'interface_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ + const superclass = node.childForFieldName('superclass'); + if (superclass) { + for (let i = 0; i < superclass.childCount; i++) { + const child = superclass.child(i); + if ( + child && + (child.type === 'type_identifier' || + child.type === 'identifier' || + child.type === 'generic_type') + ) { + const superName = child.type === 'generic_type' ? child.child(0)?.text : child.text; + if (superName) + ctx.classes.push({ name: nameNode.text, - kind: 'interface', + extends: superName, line: node.startPosition.row + 1, - endLine: nodeEndLine(node), }); - const body = node.childForFieldName('body'); - if (body) { - for (let i = 0; i < body.childCount; i++) { - const child = body.child(i); - if (child && child.type === 'method_declaration') { - const methName = child.childForFieldName('name'); - if (methName) { - definitions.push({ - name: `${nameNode.text}.${methName.text}`, - kind: 'method', - line: child.startPosition.row + 1, - endLine: child.endPosition.row + 1, - }); - } - } - } - } - } break; } + } + } - case 'enum_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const enumChildren = extractEnumConstants(node); - definitions.push({ - name: nameNode.text, - kind: 'enum', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: enumChildren.length > 0 ? enumChildren : undefined, - }); - } - break; - } + const interfaces = node.childForFieldName('interfaces'); + if (interfaces) { + extractJavaInterfaces(interfaces, nameNode.text, node.startPosition.row + 1, ctx); + } +} - case 'method_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const parentClass = findJavaParentClass(node); - const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; - const params = extractJavaParameters(node.childForFieldName('parameters')); - definitions.push({ - name: fullName, - kind: 'method', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: params.length > 0 ? params : undefined, - visibility: extractModifierVisibility(node), - }); +function extractJavaInterfaces(interfaces, className, line, ctx) { + for (let i = 0; i < interfaces.childCount; i++) { + const child = interfaces.child(i); + if ( + child && + (child.type === 'type_identifier' || + child.type === 'identifier' || + child.type === 'type_list' || + child.type === 'generic_type') + ) { + if (child.type === 'type_list') { + for (let j = 0; j < child.childCount; j++) { + const t = child.child(j); + if ( + t && + (t.type === 'type_identifier' || t.type === 'identifier' || t.type === 'generic_type') + ) { + const ifaceName = t.type === 'generic_type' ? t.child(0)?.text : t.text; + if (ifaceName) ctx.classes.push({ name: className, implements: ifaceName, line }); + } } - break; + } else { + const ifaceName = child.type === 'generic_type' ? child.child(0)?.text : child.text; + if (ifaceName) ctx.classes.push({ name: className, implements: ifaceName, line }); } + } + } +} - case 'constructor_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const parentClass = findJavaParentClass(node); - const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; - const params = extractJavaParameters(node.childForFieldName('parameters')); - definitions.push({ - name: fullName, +function handleJavaInterfaceDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + ctx.definitions.push({ + name: nameNode.text, + kind: 'interface', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + const body = node.childForFieldName('body'); + if (body) { + for (let i = 0; i < body.childCount; i++) { + const child = body.child(i); + if (child && child.type === 'method_declaration') { + const methName = child.childForFieldName('name'); + if (methName) { + ctx.definitions.push({ + name: `${nameNode.text}.${methName.text}`, kind: 'method', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: params.length > 0 ? params : undefined, - visibility: extractModifierVisibility(node), + line: child.startPosition.row + 1, + endLine: child.endPosition.row + 1, }); } - break; } + } + } +} - case 'import_declaration': { - for (let i = 0; i < node.childCount; i++) { - const child = node.child(i); - if (child && (child.type === 'scoped_identifier' || child.type === 'identifier')) { - const fullPath = child.text; - const lastName = fullPath.split('.').pop(); - imports.push({ - source: fullPath, - names: [lastName], - line: node.startPosition.row + 1, - javaImport: true, - }); - } - if (child && child.type === 'asterisk') { - const lastImport = imports[imports.length - 1]; - if (lastImport) lastImport.names = ['*']; - } - } - break; - } +function handleJavaEnumDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const enumChildren = extractEnumConstants(node); + ctx.definitions.push({ + name: nameNode.text, + kind: 'enum', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: enumChildren.length > 0 ? enumChildren : undefined, + }); +} - case 'method_invocation': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const obj = node.childForFieldName('object'); - const call = { name: nameNode.text, line: node.startPosition.row + 1 }; - if (obj) call.receiver = obj.text; - calls.push(call); - } - break; - } +function handleJavaMethodDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const parentClass = findJavaParentClass(node); + const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + const params = extractJavaParameters(node.childForFieldName('parameters')); + ctx.definitions.push({ + name: fullName, + kind: 'method', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, + visibility: extractModifierVisibility(node), + }); +} - case 'object_creation_expression': { - const typeNode = node.childForFieldName('type'); - if (typeNode) { - const typeName = - typeNode.type === 'generic_type' ? typeNode.child(0)?.text : typeNode.text; - if (typeName) calls.push({ name: typeName, line: node.startPosition.row + 1 }); - } - break; - } - } +function handleJavaConstructorDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const parentClass = findJavaParentClass(node); + const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + const params = extractJavaParameters(node.childForFieldName('parameters')); + ctx.definitions.push({ + name: fullName, + kind: 'method', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, + visibility: extractModifierVisibility(node), + }); +} - for (let i = 0; i < node.childCount; i++) walkJavaNode(node.child(i)); +function handleJavaImportDecl(node, ctx) { + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child && (child.type === 'scoped_identifier' || child.type === 'identifier')) { + const fullPath = child.text; + const lastName = fullPath.split('.').pop(); + ctx.imports.push({ + source: fullPath, + names: [lastName], + line: node.startPosition.row + 1, + javaImport: true, + }); + } + if (child && child.type === 'asterisk') { + const lastImport = ctx.imports[ctx.imports.length - 1]; + if (lastImport) lastImport.names = ['*']; + } } +} + +function handleJavaMethodInvocation(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const obj = node.childForFieldName('object'); + const call = { name: nameNode.text, line: node.startPosition.row + 1 }; + if (obj) call.receiver = obj.text; + ctx.calls.push(call); +} - walkJavaNode(tree.rootNode); - return { definitions, calls, imports, classes, exports }; +function handleJavaObjectCreation(node, ctx) { + const typeNode = node.childForFieldName('type'); + if (!typeNode) return; + const typeName = typeNode.type === 'generic_type' ? typeNode.child(0)?.text : typeNode.text; + if (typeName) ctx.calls.push({ name: typeName, line: node.startPosition.row + 1 }); +} + +function findJavaParentClass(node) { + let current = node.parent; + while (current) { + if ( + current.type === 'class_declaration' || + current.type === 'enum_declaration' || + current.type === 'interface_declaration' + ) { + const nameNode = current.childForFieldName('name'); + return nameNode ? nameNode.text : null; + } + current = current.parent; + } + return null; } // ── Child extraction helpers ──────────────────────────────────────────────── From bf5b986f3407dcbf9c6734e47e28aec39229e406 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 02:47:27 -0600 Subject: [PATCH 10/21] refactor: decompose remaining language extractors Apply the same per-category handler decomposition to all remaining language extractors: Go (6 handlers), Ruby (8 handlers), PHP (11 handlers), C# (11 handlers), Rust (9 handlers), HCL (4 handlers). Each extractor now follows the template established by the JS extractor: - Thin entry function creates ctx, delegates to walkXNode - walkXNode is a thin dispatcher switch - Each case is a named handler function at module scope - Helper functions (findParentClass, etc.) moved to module scope Impact: 66 functions changed, 23 affected --- src/extractors/csharp.js | 429 ++++++++++++++++++------------------ src/extractors/go.js | 349 +++++++++++++++--------------- src/extractors/hcl.js | 172 ++++++++------- src/extractors/php.js | 453 ++++++++++++++++++++------------------- src/extractors/ruby.js | 377 ++++++++++++++++---------------- src/extractors/rust.js | 347 +++++++++++++++--------------- 6 files changed, 1097 insertions(+), 1030 deletions(-) diff --git a/src/extractors/csharp.js b/src/extractors/csharp.js index 9dafa451..d52aa893 100644 --- a/src/extractors/csharp.js +++ b/src/extractors/csharp.js @@ -4,233 +4,248 @@ import { extractModifierVisibility, findChild, nodeEndLine } from './helpers.js' * Extract symbols from C# files. */ export function extractCSharpSymbols(tree, _filePath) { - const definitions = []; - const calls = []; - const imports = []; - const classes = []; - const exports = []; + const ctx = { + definitions: [], + calls: [], + imports: [], + classes: [], + exports: [], + }; - function findCSharpParentType(node) { - let current = node.parent; - while (current) { - if ( - current.type === 'class_declaration' || - current.type === 'struct_declaration' || - current.type === 'interface_declaration' || - current.type === 'enum_declaration' || - current.type === 'record_declaration' - ) { - const nameNode = current.childForFieldName('name'); - return nameNode ? nameNode.text : null; - } - current = current.parent; - } - return null; + walkCSharpNode(tree.rootNode, ctx); + return ctx; +} + +function walkCSharpNode(node, ctx) { + switch (node.type) { + case 'class_declaration': + handleCsClassDecl(node, ctx); + break; + case 'struct_declaration': + handleCsStructDecl(node, ctx); + break; + case 'record_declaration': + handleCsRecordDecl(node, ctx); + break; + case 'interface_declaration': + handleCsInterfaceDecl(node, ctx); + break; + case 'enum_declaration': + handleCsEnumDecl(node, ctx); + break; + case 'method_declaration': + handleCsMethodDecl(node, ctx); + break; + case 'constructor_declaration': + handleCsConstructorDecl(node, ctx); + break; + case 'property_declaration': + handleCsPropertyDecl(node, ctx); + break; + case 'using_directive': + handleCsUsingDirective(node, ctx); + break; + case 'invocation_expression': + handleCsInvocationExpr(node, ctx); + break; + case 'object_creation_expression': + handleCsObjectCreation(node, ctx); + break; } - function walkCSharpNode(node) { - switch (node.type) { - case 'class_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const classChildren = extractCSharpClassFields(node); - definitions.push({ - name: nameNode.text, - kind: 'class', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: classChildren.length > 0 ? classChildren : undefined, - }); - extractCSharpBaseTypes(node, nameNode.text, classes); - } - break; - } + for (let i = 0; i < node.childCount; i++) walkCSharpNode(node.child(i), ctx); +} - case 'struct_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const structChildren = extractCSharpClassFields(node); - definitions.push({ - name: nameNode.text, - kind: 'struct', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: structChildren.length > 0 ? structChildren : undefined, - }); - extractCSharpBaseTypes(node, nameNode.text, classes); - } - break; - } +// ── Walk-path per-node-type handlers ──────────────────────────────────────── - case 'record_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'record', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - extractCSharpBaseTypes(node, nameNode.text, classes); - } - break; - } +function handleCsClassDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const classChildren = extractCSharpClassFields(node); + ctx.definitions.push({ + name: nameNode.text, + kind: 'class', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: classChildren.length > 0 ? classChildren : undefined, + }); + extractCSharpBaseTypes(node, nameNode.text, ctx.classes); +} - case 'interface_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'interface', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - const body = node.childForFieldName('body'); - if (body) { - for (let i = 0; i < body.childCount; i++) { - const child = body.child(i); - if (child && child.type === 'method_declaration') { - const methName = child.childForFieldName('name'); - if (methName) { - definitions.push({ - name: `${nameNode.text}.${methName.text}`, - kind: 'method', - line: child.startPosition.row + 1, - endLine: child.endPosition.row + 1, - }); - } - } - } - } - } - break; - } +function handleCsStructDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const structChildren = extractCSharpClassFields(node); + ctx.definitions.push({ + name: nameNode.text, + kind: 'struct', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: structChildren.length > 0 ? structChildren : undefined, + }); + extractCSharpBaseTypes(node, nameNode.text, ctx.classes); +} - case 'enum_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const enumChildren = extractCSharpEnumMembers(node); - definitions.push({ - name: nameNode.text, - kind: 'enum', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: enumChildren.length > 0 ? enumChildren : undefined, - }); - } - break; - } +function handleCsRecordDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + ctx.definitions.push({ + name: nameNode.text, + kind: 'record', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + extractCSharpBaseTypes(node, nameNode.text, ctx.classes); +} - case 'method_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const parentType = findCSharpParentType(node); - const fullName = parentType ? `${parentType}.${nameNode.text}` : nameNode.text; - const params = extractCSharpParameters(node.childForFieldName('parameters')); - definitions.push({ - name: fullName, +function handleCsInterfaceDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + ctx.definitions.push({ + name: nameNode.text, + kind: 'interface', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + const body = node.childForFieldName('body'); + if (body) { + for (let i = 0; i < body.childCount; i++) { + const child = body.child(i); + if (child && child.type === 'method_declaration') { + const methName = child.childForFieldName('name'); + if (methName) { + ctx.definitions.push({ + name: `${nameNode.text}.${methName.text}`, kind: 'method', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: params.length > 0 ? params : undefined, - visibility: extractModifierVisibility(node), + line: child.startPosition.row + 1, + endLine: child.endPosition.row + 1, }); } - break; } + } + } +} - case 'constructor_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const parentType = findCSharpParentType(node); - const fullName = parentType ? `${parentType}.${nameNode.text}` : nameNode.text; - const params = extractCSharpParameters(node.childForFieldName('parameters')); - definitions.push({ - name: fullName, - kind: 'method', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: params.length > 0 ? params : undefined, - visibility: extractModifierVisibility(node), - }); - } - break; - } +function handleCsEnumDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const enumChildren = extractCSharpEnumMembers(node); + ctx.definitions.push({ + name: nameNode.text, + kind: 'enum', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: enumChildren.length > 0 ? enumChildren : undefined, + }); +} - case 'property_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const parentType = findCSharpParentType(node); - const fullName = parentType ? `${parentType}.${nameNode.text}` : nameNode.text; - definitions.push({ - name: fullName, - kind: 'property', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - visibility: extractModifierVisibility(node), - }); - } - break; - } +function handleCsMethodDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const parentType = findCSharpParentType(node); + const fullName = parentType ? `${parentType}.${nameNode.text}` : nameNode.text; + const params = extractCSharpParameters(node.childForFieldName('parameters')); + ctx.definitions.push({ + name: fullName, + kind: 'method', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, + visibility: extractModifierVisibility(node), + }); +} - case 'using_directive': { - // using System.Collections.Generic; - const nameNode = - node.childForFieldName('name') || - findChild(node, 'qualified_name') || - findChild(node, 'identifier'); - if (nameNode) { - const fullPath = nameNode.text; - const lastName = fullPath.split('.').pop(); - imports.push({ - source: fullPath, - names: [lastName], - line: node.startPosition.row + 1, - csharpUsing: true, - }); - } - break; - } +function handleCsConstructorDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const parentType = findCSharpParentType(node); + const fullName = parentType ? `${parentType}.${nameNode.text}` : nameNode.text; + const params = extractCSharpParameters(node.childForFieldName('parameters')); + ctx.definitions.push({ + name: fullName, + kind: 'method', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, + visibility: extractModifierVisibility(node), + }); +} - case 'invocation_expression': { - const fn = node.childForFieldName('function') || node.child(0); - if (fn) { - if (fn.type === 'identifier') { - calls.push({ name: fn.text, line: node.startPosition.row + 1 }); - } else if (fn.type === 'member_access_expression') { - const name = fn.childForFieldName('name'); - if (name) { - const expr = fn.childForFieldName('expression'); - const call = { name: name.text, line: node.startPosition.row + 1 }; - if (expr) call.receiver = expr.text; - calls.push(call); - } - } else if (fn.type === 'generic_name' || fn.type === 'member_binding_expression') { - const name = fn.childForFieldName('name') || fn.child(0); - if (name) calls.push({ name: name.text, line: node.startPosition.row + 1 }); - } - } - break; - } +function handleCsPropertyDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const parentType = findCSharpParentType(node); + const fullName = parentType ? `${parentType}.${nameNode.text}` : nameNode.text; + ctx.definitions.push({ + name: fullName, + kind: 'property', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + visibility: extractModifierVisibility(node), + }); +} - case 'object_creation_expression': { - const typeNode = node.childForFieldName('type'); - if (typeNode) { - const typeName = - typeNode.type === 'generic_name' - ? typeNode.childForFieldName('name')?.text || typeNode.child(0)?.text - : typeNode.text; - if (typeName) calls.push({ name: typeName, line: node.startPosition.row + 1 }); - } - break; - } - } +function handleCsUsingDirective(node, ctx) { + const nameNode = + node.childForFieldName('name') || + findChild(node, 'qualified_name') || + findChild(node, 'identifier'); + if (!nameNode) return; + const fullPath = nameNode.text; + const lastName = fullPath.split('.').pop(); + ctx.imports.push({ + source: fullPath, + names: [lastName], + line: node.startPosition.row + 1, + csharpUsing: true, + }); +} - for (let i = 0; i < node.childCount; i++) walkCSharpNode(node.child(i)); +function handleCsInvocationExpr(node, ctx) { + const fn = node.childForFieldName('function') || node.child(0); + if (!fn) return; + if (fn.type === 'identifier') { + ctx.calls.push({ name: fn.text, line: node.startPosition.row + 1 }); + } else if (fn.type === 'member_access_expression') { + const name = fn.childForFieldName('name'); + if (name) { + const expr = fn.childForFieldName('expression'); + const call = { name: name.text, line: node.startPosition.row + 1 }; + if (expr) call.receiver = expr.text; + ctx.calls.push(call); + } + } else if (fn.type === 'generic_name' || fn.type === 'member_binding_expression') { + const name = fn.childForFieldName('name') || fn.child(0); + if (name) ctx.calls.push({ name: name.text, line: node.startPosition.row + 1 }); } +} + +function handleCsObjectCreation(node, ctx) { + const typeNode = node.childForFieldName('type'); + if (!typeNode) return; + const typeName = + typeNode.type === 'generic_name' + ? typeNode.childForFieldName('name')?.text || typeNode.child(0)?.text + : typeNode.text; + if (typeName) ctx.calls.push({ name: typeName, line: node.startPosition.row + 1 }); +} - walkCSharpNode(tree.rootNode); - return { definitions, calls, imports, classes, exports }; +function findCSharpParentType(node) { + let current = node.parent; + while (current) { + if ( + current.type === 'class_declaration' || + current.type === 'struct_declaration' || + current.type === 'interface_declaration' || + current.type === 'enum_declaration' || + current.type === 'record_declaration' + ) { + const nameNode = current.childForFieldName('name'); + return nameNode ? nameNode.text : null; + } + current = current.parent; + } + return null; } // ── Child extraction helpers ──────────────────────────────────────────────── diff --git a/src/extractors/go.js b/src/extractors/go.js index 50460c8d..57d3b2a8 100644 --- a/src/extractors/go.js +++ b/src/extractors/go.js @@ -4,196 +4,201 @@ import { findChild, goVisibility, nodeEndLine } from './helpers.js'; * Extract symbols from Go files. */ export function extractGoSymbols(tree, _filePath) { - const definitions = []; - const calls = []; - const imports = []; - const classes = []; - const exports = []; + const ctx = { + definitions: [], + calls: [], + imports: [], + classes: [], + exports: [], + }; - function walkGoNode(node) { - switch (node.type) { - case 'function_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const params = extractGoParameters(node.childForFieldName('parameters')); - definitions.push({ - name: nameNode.text, - kind: 'function', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: params.length > 0 ? params : undefined, - visibility: goVisibility(nameNode.text), - }); - } - break; - } + walkGoNode(tree.rootNode, ctx); + return ctx; +} - case 'method_declaration': { - const nameNode = node.childForFieldName('name'); - const receiver = node.childForFieldName('receiver'); - if (nameNode) { - let receiverType = null; - if (receiver) { - // receiver is a parameter_list like (r *Foo) or (r Foo) - for (let i = 0; i < receiver.childCount; i++) { - const param = receiver.child(i); - if (!param) continue; - const typeNode = param.childForFieldName('type'); - if (typeNode) { - receiverType = - typeNode.type === 'pointer_type' - ? typeNode.text.replace(/^\*/, '') - : typeNode.text; - break; - } - } - } - const fullName = receiverType ? `${receiverType}.${nameNode.text}` : nameNode.text; - const params = extractGoParameters(node.childForFieldName('parameters')); - definitions.push({ - name: fullName, - kind: 'method', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: params.length > 0 ? params : undefined, - visibility: goVisibility(nameNode.text), - }); - } - break; - } +function walkGoNode(node, ctx) { + switch (node.type) { + case 'function_declaration': + handleGoFuncDecl(node, ctx); + break; + case 'method_declaration': + handleGoMethodDecl(node, ctx); + break; + case 'type_declaration': + handleGoTypeDecl(node, ctx); + break; + case 'import_declaration': + handleGoImportDecl(node, ctx); + break; + case 'const_declaration': + handleGoConstDecl(node, ctx); + break; + case 'call_expression': + handleGoCallExpr(node, ctx); + break; + } - case 'type_declaration': { - for (let i = 0; i < node.childCount; i++) { - const spec = node.child(i); - if (!spec || spec.type !== 'type_spec') continue; - const nameNode = spec.childForFieldName('name'); - const typeNode = spec.childForFieldName('type'); - if (nameNode && typeNode) { - if (typeNode.type === 'struct_type') { - const fields = extractStructFields(typeNode); - definitions.push({ - name: nameNode.text, - kind: 'struct', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: fields.length > 0 ? fields : undefined, - }); - } else if (typeNode.type === 'interface_type') { - definitions.push({ - name: nameNode.text, - kind: 'interface', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - for (let j = 0; j < typeNode.childCount; j++) { - const member = typeNode.child(j); - if (member && member.type === 'method_elem') { - const methName = member.childForFieldName('name'); - if (methName) { - definitions.push({ - name: `${nameNode.text}.${methName.text}`, - kind: 'method', - line: member.startPosition.row + 1, - endLine: member.endPosition.row + 1, - }); - } - } - } - } else { - definitions.push({ - name: nameNode.text, - kind: 'type', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - } - } - } + for (let i = 0; i < node.childCount; i++) walkGoNode(node.child(i), ctx); +} + +// ── Walk-path per-node-type handlers ──────────────────────────────────────── + +function handleGoFuncDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + const params = extractGoParameters(node.childForFieldName('parameters')); + ctx.definitions.push({ + name: nameNode.text, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, + visibility: goVisibility(nameNode.text), + }); + } +} + +function handleGoMethodDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + const receiver = node.childForFieldName('receiver'); + if (!nameNode) return; + let receiverType = null; + if (receiver) { + for (let i = 0; i < receiver.childCount; i++) { + const param = receiver.child(i); + if (!param) continue; + const typeNode = param.childForFieldName('type'); + if (typeNode) { + receiverType = + typeNode.type === 'pointer_type' ? typeNode.text.replace(/^\*/, '') : typeNode.text; break; } + } + } + const fullName = receiverType ? `${receiverType}.${nameNode.text}` : nameNode.text; + const params = extractGoParameters(node.childForFieldName('parameters')); + ctx.definitions.push({ + name: fullName, + kind: 'method', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, + visibility: goVisibility(nameNode.text), + }); +} - case 'import_declaration': { - for (let i = 0; i < node.childCount; i++) { - const child = node.child(i); - if (!child) continue; - if (child.type === 'import_spec') { - const pathNode = child.childForFieldName('path'); - if (pathNode) { - const importPath = pathNode.text.replace(/"/g, ''); - const nameNode = child.childForFieldName('name'); - const alias = nameNode ? nameNode.text : importPath.split('/').pop(); - imports.push({ - source: importPath, - names: [alias], - line: child.startPosition.row + 1, - goImport: true, +function handleGoTypeDecl(node, ctx) { + for (let i = 0; i < node.childCount; i++) { + const spec = node.child(i); + if (!spec || spec.type !== 'type_spec') continue; + const nameNode = spec.childForFieldName('name'); + const typeNode = spec.childForFieldName('type'); + if (nameNode && typeNode) { + if (typeNode.type === 'struct_type') { + const fields = extractStructFields(typeNode); + ctx.definitions.push({ + name: nameNode.text, + kind: 'struct', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: fields.length > 0 ? fields : undefined, + }); + } else if (typeNode.type === 'interface_type') { + ctx.definitions.push({ + name: nameNode.text, + kind: 'interface', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + for (let j = 0; j < typeNode.childCount; j++) { + const member = typeNode.child(j); + if (member && member.type === 'method_elem') { + const methName = member.childForFieldName('name'); + if (methName) { + ctx.definitions.push({ + name: `${nameNode.text}.${methName.text}`, + kind: 'method', + line: member.startPosition.row + 1, + endLine: member.endPosition.row + 1, }); } } - if (child.type === 'import_spec_list') { - for (let j = 0; j < child.childCount; j++) { - const spec = child.child(j); - if (spec && spec.type === 'import_spec') { - const pathNode = spec.childForFieldName('path'); - if (pathNode) { - const importPath = pathNode.text.replace(/"/g, ''); - const nameNode = spec.childForFieldName('name'); - const alias = nameNode ? nameNode.text : importPath.split('/').pop(); - imports.push({ - source: importPath, - names: [alias], - line: spec.startPosition.row + 1, - goImport: true, - }); - } - } - } - } - } - break; - } - - case 'const_declaration': { - for (let i = 0; i < node.childCount; i++) { - const spec = node.child(i); - if (!spec || spec.type !== 'const_spec') continue; - const constName = spec.childForFieldName('name'); - if (constName) { - definitions.push({ - name: constName.text, - kind: 'constant', - line: spec.startPosition.row + 1, - endLine: spec.endPosition.row + 1, - }); - } } - break; + } else { + ctx.definitions.push({ + name: nameNode.text, + kind: 'type', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); } + } + } +} - case 'call_expression': { - const fn = node.childForFieldName('function'); - if (fn) { - if (fn.type === 'identifier') { - calls.push({ name: fn.text, line: node.startPosition.row + 1 }); - } else if (fn.type === 'selector_expression') { - const field = fn.childForFieldName('field'); - if (field) { - const operand = fn.childForFieldName('operand'); - const call = { name: field.text, line: node.startPosition.row + 1 }; - if (operand) call.receiver = operand.text; - calls.push(call); - } - } +function handleGoImportDecl(node, ctx) { + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child) continue; + if (child.type === 'import_spec') { + extractGoImportSpec(child, ctx); + } + if (child.type === 'import_spec_list') { + for (let j = 0; j < child.childCount; j++) { + const spec = child.child(j); + if (spec && spec.type === 'import_spec') { + extractGoImportSpec(spec, ctx); } - break; } } + } +} + +function extractGoImportSpec(spec, ctx) { + const pathNode = spec.childForFieldName('path'); + if (pathNode) { + const importPath = pathNode.text.replace(/"/g, ''); + const nameNode = spec.childForFieldName('name'); + const alias = nameNode ? nameNode.text : importPath.split('/').pop(); + ctx.imports.push({ + source: importPath, + names: [alias], + line: spec.startPosition.row + 1, + goImport: true, + }); + } +} - for (let i = 0; i < node.childCount; i++) walkGoNode(node.child(i)); +function handleGoConstDecl(node, ctx) { + for (let i = 0; i < node.childCount; i++) { + const spec = node.child(i); + if (!spec || spec.type !== 'const_spec') continue; + const constName = spec.childForFieldName('name'); + if (constName) { + ctx.definitions.push({ + name: constName.text, + kind: 'constant', + line: spec.startPosition.row + 1, + endLine: spec.endPosition.row + 1, + }); + } } +} - walkGoNode(tree.rootNode); - return { definitions, calls, imports, classes, exports }; +function handleGoCallExpr(node, ctx) { + const fn = node.childForFieldName('function'); + if (!fn) return; + if (fn.type === 'identifier') { + ctx.calls.push({ name: fn.text, line: node.startPosition.row + 1 }); + } else if (fn.type === 'selector_expression') { + const field = fn.childForFieldName('field'); + if (field) { + const operand = fn.childForFieldName('operand'); + const call = { name: field.text, line: node.startPosition.row + 1 }; + if (operand) call.receiver = operand.text; + ctx.calls.push(call); + } + } } // ── Child extraction helpers ──────────────────────────────────────────────── diff --git a/src/extractors/hcl.js b/src/extractors/hcl.js index aba022a5..8b13651f 100644 --- a/src/extractors/hcl.js +++ b/src/extractors/hcl.js @@ -4,92 +4,108 @@ import { nodeEndLine } from './helpers.js'; * Extract symbols from HCL (Terraform) files. */ export function extractHCLSymbols(tree, _filePath) { - const definitions = []; - const imports = []; + const ctx = { definitions: [], imports: [] }; - function walkHclNode(node) { - if (node.type === 'block') { - const children = []; - for (let i = 0; i < node.childCount; i++) children.push(node.child(i)); + walkHclNode(tree.rootNode, ctx); + return { + definitions: ctx.definitions, + calls: [], + imports: ctx.imports, + classes: [], + exports: [], + }; +} - const identifiers = children.filter((c) => c.type === 'identifier'); - const strings = children.filter((c) => c.type === 'string_lit'); +function walkHclNode(node, ctx) { + if (node.type === 'block') { + handleHclBlock(node, ctx); + } - if (identifiers.length > 0) { - const blockType = identifiers[0].text; - let name = ''; + for (let i = 0; i < node.childCount; i++) walkHclNode(node.child(i), ctx); +} - if (blockType === 'resource' && strings.length >= 2) { - name = `${strings[0].text.replace(/"/g, '')}.${strings[1].text.replace(/"/g, '')}`; - } else if (blockType === 'data' && strings.length >= 2) { - name = `data.${strings[0].text.replace(/"/g, '')}.${strings[1].text.replace(/"/g, '')}`; - } else if ( - (blockType === 'variable' || blockType === 'output' || blockType === 'module') && - strings.length >= 1 - ) { - name = `${blockType}.${strings[0].text.replace(/"/g, '')}`; - } else if (blockType === 'locals') { - name = 'locals'; - } else if (blockType === 'terraform' || blockType === 'provider') { - name = blockType; - if (strings.length >= 1) name += `.${strings[0].text.replace(/"/g, '')}`; - } +function handleHclBlock(node, ctx) { + const children = []; + for (let i = 0; i < node.childCount; i++) children.push(node.child(i)); - if (name) { - // Extract attributes as property children for variable/output blocks - let blockChildren; - if (blockType === 'variable' || blockType === 'output') { - blockChildren = []; - const body = children.find((c) => c.type === 'body'); - if (body) { - for (let j = 0; j < body.childCount; j++) { - const attr = body.child(j); - if (attr && attr.type === 'attribute') { - const key = attr.childForFieldName('key') || attr.child(0); - if (key) { - blockChildren.push({ - name: key.text, - kind: 'property', - line: attr.startPosition.row + 1, - }); - } - } - } - } - } - definitions.push({ - name, - kind: blockType, - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: blockChildren?.length > 0 ? blockChildren : undefined, - }); - } + const identifiers = children.filter((c) => c.type === 'identifier'); + const strings = children.filter((c) => c.type === 'string_lit'); - if (blockType === 'module') { - const body = children.find((c) => c.type === 'body'); - if (body) { - for (let i = 0; i < body.childCount; i++) { - const attr = body.child(i); - if (attr && attr.type === 'attribute') { - const key = attr.childForFieldName('key') || attr.child(0); - const val = attr.childForFieldName('val') || attr.child(2); - if (key && key.text === 'source' && val) { - const src = val.text.replace(/"/g, ''); - if (src.startsWith('./') || src.startsWith('../')) { - imports.push({ source: src, names: [], line: attr.startPosition.row + 1 }); - } - } - } - } - } - } - } + if (identifiers.length === 0) return; + const blockType = identifiers[0].text; + const name = resolveHclBlockName(blockType, strings); + + if (name) { + let blockChildren; + if (blockType === 'variable' || blockType === 'output') { + blockChildren = extractHclAttributes(children); } + ctx.definitions.push({ + name, + kind: blockType, + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: blockChildren?.length > 0 ? blockChildren : undefined, + }); + } - for (let i = 0; i < node.childCount; i++) walkHclNode(node.child(i)); + if (blockType === 'module') { + extractHclModuleSource(children, node, ctx); } +} - walkHclNode(tree.rootNode); - return { definitions, calls: [], imports, classes: [], exports: [] }; +function resolveHclBlockName(blockType, strings) { + if (blockType === 'resource' && strings.length >= 2) { + return `${strings[0].text.replace(/"/g, '')}.${strings[1].text.replace(/"/g, '')}`; + } + if (blockType === 'data' && strings.length >= 2) { + return `data.${strings[0].text.replace(/"/g, '')}.${strings[1].text.replace(/"/g, '')}`; + } + if ( + (blockType === 'variable' || blockType === 'output' || blockType === 'module') && + strings.length >= 1 + ) { + return `${blockType}.${strings[0].text.replace(/"/g, '')}`; + } + if (blockType === 'locals') return 'locals'; + if (blockType === 'terraform' || blockType === 'provider') { + let name = blockType; + if (strings.length >= 1) name += `.${strings[0].text.replace(/"/g, '')}`; + return name; + } + return ''; +} + +function extractHclAttributes(children) { + const attrs = []; + const body = children.find((c) => c.type === 'body'); + if (!body) return attrs; + for (let j = 0; j < body.childCount; j++) { + const attr = body.child(j); + if (attr && attr.type === 'attribute') { + const key = attr.childForFieldName('key') || attr.child(0); + if (key) { + attrs.push({ name: key.text, kind: 'property', line: attr.startPosition.row + 1 }); + } + } + } + return attrs; +} + +function extractHclModuleSource(children, _node, ctx) { + const body = children.find((c) => c.type === 'body'); + if (!body) return; + for (let i = 0; i < body.childCount; i++) { + const attr = body.child(i); + if (attr && attr.type === 'attribute') { + const key = attr.childForFieldName('key') || attr.child(0); + const val = attr.childForFieldName('val') || attr.child(2); + if (key && key.text === 'source' && val) { + const src = val.text.replace(/"/g, ''); + if (src.startsWith('./') || src.startsWith('../')) { + ctx.imports.push({ source: src, names: [], line: attr.startPosition.row + 1 }); + } + } + } + } } diff --git a/src/extractors/php.js b/src/extractors/php.js index fd008168..03f9c6d7 100644 --- a/src/extractors/php.js +++ b/src/extractors/php.js @@ -76,249 +76,260 @@ function extractPhpEnumCases(enumNode) { * Extract symbols from PHP files. */ export function extractPHPSymbols(tree, _filePath) { - const definitions = []; - const calls = []; - const imports = []; - const classes = []; - const exports = []; + const ctx = { + definitions: [], + calls: [], + imports: [], + classes: [], + exports: [], + }; - function findPHPParentClass(node) { - let current = node.parent; - while (current) { - if ( - current.type === 'class_declaration' || - current.type === 'trait_declaration' || - current.type === 'enum_declaration' - ) { - const nameNode = current.childForFieldName('name'); - return nameNode ? nameNode.text : null; - } - current = current.parent; - } - return null; + walkPhpNode(tree.rootNode, ctx); + return ctx; +} + +function walkPhpNode(node, ctx) { + switch (node.type) { + case 'function_definition': + handlePhpFuncDef(node, ctx); + break; + case 'class_declaration': + handlePhpClassDecl(node, ctx); + break; + case 'interface_declaration': + handlePhpInterfaceDecl(node, ctx); + break; + case 'trait_declaration': + handlePhpTraitDecl(node, ctx); + break; + case 'enum_declaration': + handlePhpEnumDecl(node, ctx); + break; + case 'method_declaration': + handlePhpMethodDecl(node, ctx); + break; + case 'namespace_use_declaration': + handlePhpNamespaceUse(node, ctx); + break; + case 'function_call_expression': + handlePhpFuncCall(node, ctx); + break; + case 'member_call_expression': + handlePhpMemberCall(node, ctx); + break; + case 'scoped_call_expression': + handlePhpScopedCall(node, ctx); + break; + case 'object_creation_expression': + handlePhpObjectCreation(node, ctx); + break; } - function walkPhpNode(node) { - switch (node.type) { - case 'function_definition': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const params = extractPhpParameters(node); - definitions.push({ - name: nameNode.text, - kind: 'function', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: params.length > 0 ? params : undefined, - }); - } - break; - } + for (let i = 0; i < node.childCount; i++) walkPhpNode(node.child(i), ctx); +} - case 'class_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const classChildren = extractPhpClassChildren(node); - definitions.push({ - name: nameNode.text, - kind: 'class', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: classChildren.length > 0 ? classChildren : undefined, - }); +// ── Walk-path per-node-type handlers ──────────────────────────────────────── - // Check base clause (extends) - const baseClause = - node.childForFieldName('base_clause') || findChild(node, 'base_clause'); - if (baseClause) { - for (let i = 0; i < baseClause.childCount; i++) { - const child = baseClause.child(i); - if (child && (child.type === 'name' || child.type === 'qualified_name')) { - classes.push({ - name: nameNode.text, - extends: child.text, - line: node.startPosition.row + 1, - }); - break; - } - } - } +function handlePhpFuncDef(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const params = extractPhpParameters(node); + ctx.definitions.push({ + name: nameNode.text, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, + }); +} - // Check class interface clause (implements) - const interfaceClause = findChild(node, 'class_interface_clause'); - if (interfaceClause) { - for (let i = 0; i < interfaceClause.childCount; i++) { - const child = interfaceClause.child(i); - if (child && (child.type === 'name' || child.type === 'qualified_name')) { - classes.push({ - name: nameNode.text, - implements: child.text, - line: node.startPosition.row + 1, - }); - } - } - } - } +function handlePhpClassDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const classChildren = extractPhpClassChildren(node); + ctx.definitions.push({ + name: nameNode.text, + kind: 'class', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: classChildren.length > 0 ? classChildren : undefined, + }); + const baseClause = node.childForFieldName('base_clause') || findChild(node, 'base_clause'); + if (baseClause) { + for (let i = 0; i < baseClause.childCount; i++) { + const child = baseClause.child(i); + if (child && (child.type === 'name' || child.type === 'qualified_name')) { + ctx.classes.push({ + name: nameNode.text, + extends: child.text, + line: node.startPosition.row + 1, + }); break; } - - case 'interface_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'interface', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - const body = node.childForFieldName('body'); - if (body) { - for (let i = 0; i < body.childCount; i++) { - const child = body.child(i); - if (child && child.type === 'method_declaration') { - const methName = child.childForFieldName('name'); - if (methName) { - definitions.push({ - name: `${nameNode.text}.${methName.text}`, - kind: 'method', - line: child.startPosition.row + 1, - endLine: child.endPosition.row + 1, - }); - } - } - } - } - } - break; + } + } + const interfaceClause = findChild(node, 'class_interface_clause'); + if (interfaceClause) { + for (let i = 0; i < interfaceClause.childCount; i++) { + const child = interfaceClause.child(i); + if (child && (child.type === 'name' || child.type === 'qualified_name')) { + ctx.classes.push({ + name: nameNode.text, + implements: child.text, + line: node.startPosition.row + 1, + }); } + } + } +} - case 'trait_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'trait', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), +function handlePhpInterfaceDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + ctx.definitions.push({ + name: nameNode.text, + kind: 'interface', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + const body = node.childForFieldName('body'); + if (body) { + for (let i = 0; i < body.childCount; i++) { + const child = body.child(i); + if (child && child.type === 'method_declaration') { + const methName = child.childForFieldName('name'); + if (methName) { + ctx.definitions.push({ + name: `${nameNode.text}.${methName.text}`, + kind: 'method', + line: child.startPosition.row + 1, + endLine: child.endPosition.row + 1, }); } - break; } + } + } +} - case 'enum_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const enumChildren = extractPhpEnumCases(node); - definitions.push({ - name: nameNode.text, - kind: 'enum', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: enumChildren.length > 0 ? enumChildren : undefined, - }); - } - break; - } +function handlePhpTraitDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + ctx.definitions.push({ + name: nameNode.text, + kind: 'trait', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} - case 'method_declaration': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const parentClass = findPHPParentClass(node); - const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; - const params = extractPhpParameters(node); - definitions.push({ - name: fullName, - kind: 'method', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: params.length > 0 ? params : undefined, - visibility: extractModifierVisibility(node), - }); - } - break; - } +function handlePhpEnumDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const enumChildren = extractPhpEnumCases(node); + ctx.definitions.push({ + name: nameNode.text, + kind: 'enum', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: enumChildren.length > 0 ? enumChildren : undefined, + }); +} - case 'namespace_use_declaration': { - // use App\Models\User; - for (let i = 0; i < node.childCount; i++) { - const child = node.child(i); - if (child && child.type === 'namespace_use_clause') { - const nameNode = findChild(child, 'qualified_name') || findChild(child, 'name'); - if (nameNode) { - const fullPath = nameNode.text; - const lastName = fullPath.split('\\').pop(); - const alias = child.childForFieldName('alias'); - imports.push({ - source: fullPath, - names: [alias ? alias.text : lastName], - line: node.startPosition.row + 1, - phpUse: true, - }); - } - } - // Single use clause without wrapper - if (child && (child.type === 'qualified_name' || child.type === 'name')) { - const fullPath = child.text; - const lastName = fullPath.split('\\').pop(); - imports.push({ - source: fullPath, - names: [lastName], - line: node.startPosition.row + 1, - phpUse: true, - }); - } - } - break; - } +function handlePhpMethodDecl(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const parentClass = findPHPParentClass(node); + const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + const params = extractPhpParameters(node); + ctx.definitions.push({ + name: fullName, + kind: 'method', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, + visibility: extractModifierVisibility(node), + }); +} - case 'function_call_expression': { - const fn = node.childForFieldName('function') || node.child(0); - if (fn) { - if (fn.type === 'name' || fn.type === 'identifier') { - calls.push({ name: fn.text, line: node.startPosition.row + 1 }); - } else if (fn.type === 'qualified_name') { - const parts = fn.text.split('\\'); - calls.push({ name: parts[parts.length - 1], line: node.startPosition.row + 1 }); - } - } - break; +function handlePhpNamespaceUse(node, ctx) { + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child && child.type === 'namespace_use_clause') { + const nameNode = findChild(child, 'qualified_name') || findChild(child, 'name'); + if (nameNode) { + const fullPath = nameNode.text; + const lastName = fullPath.split('\\').pop(); + const alias = child.childForFieldName('alias'); + ctx.imports.push({ + source: fullPath, + names: [alias ? alias.text : lastName], + line: node.startPosition.row + 1, + phpUse: true, + }); } + } + if (child && (child.type === 'qualified_name' || child.type === 'name')) { + const fullPath = child.text; + const lastName = fullPath.split('\\').pop(); + ctx.imports.push({ + source: fullPath, + names: [lastName], + line: node.startPosition.row + 1, + phpUse: true, + }); + } + } +} - case 'member_call_expression': { - const name = node.childForFieldName('name'); - if (name) { - const obj = node.childForFieldName('object'); - const call = { name: name.text, line: node.startPosition.row + 1 }; - if (obj) call.receiver = obj.text; - calls.push(call); - } - break; - } +function handlePhpFuncCall(node, ctx) { + const fn = node.childForFieldName('function') || node.child(0); + if (!fn) return; + if (fn.type === 'name' || fn.type === 'identifier') { + ctx.calls.push({ name: fn.text, line: node.startPosition.row + 1 }); + } else if (fn.type === 'qualified_name') { + const parts = fn.text.split('\\'); + ctx.calls.push({ name: parts[parts.length - 1], line: node.startPosition.row + 1 }); + } +} - case 'scoped_call_expression': { - const name = node.childForFieldName('name'); - if (name) { - const scope = node.childForFieldName('scope'); - const call = { name: name.text, line: node.startPosition.row + 1 }; - if (scope) call.receiver = scope.text; - calls.push(call); - } - break; - } +function handlePhpMemberCall(node, ctx) { + const name = node.childForFieldName('name'); + if (!name) return; + const obj = node.childForFieldName('object'); + const call = { name: name.text, line: node.startPosition.row + 1 }; + if (obj) call.receiver = obj.text; + ctx.calls.push(call); +} - case 'object_creation_expression': { - const classNode = node.child(1); // skip 'new' keyword - if (classNode && (classNode.type === 'name' || classNode.type === 'qualified_name')) { - const parts = classNode.text.split('\\'); - calls.push({ name: parts[parts.length - 1], line: node.startPosition.row + 1 }); - } - break; - } - } +function handlePhpScopedCall(node, ctx) { + const name = node.childForFieldName('name'); + if (!name) return; + const scope = node.childForFieldName('scope'); + const call = { name: name.text, line: node.startPosition.row + 1 }; + if (scope) call.receiver = scope.text; + ctx.calls.push(call); +} - for (let i = 0; i < node.childCount; i++) walkPhpNode(node.child(i)); +function handlePhpObjectCreation(node, ctx) { + const classNode = node.child(1); + if (classNode && (classNode.type === 'name' || classNode.type === 'qualified_name')) { + const parts = classNode.text.split('\\'); + ctx.calls.push({ name: parts[parts.length - 1], line: node.startPosition.row + 1 }); } +} - walkPhpNode(tree.rootNode); - return { definitions, calls, imports, classes, exports }; +function findPHPParentClass(node) { + let current = node.parent; + while (current) { + if ( + current.type === 'class_declaration' || + current.type === 'trait_declaration' || + current.type === 'enum_declaration' + ) { + const nameNode = current.childForFieldName('name'); + return nameNode ? nameNode.text : null; + } + current = current.parent; + } + return null; } diff --git a/src/extractors/ruby.js b/src/extractors/ruby.js index 400d410d..cc0da5fd 100644 --- a/src/extractors/ruby.js +++ b/src/extractors/ruby.js @@ -4,211 +4,218 @@ import { findChild, nodeEndLine } from './helpers.js'; * Extract symbols from Ruby files. */ export function extractRubySymbols(tree, _filePath) { - const definitions = []; - const calls = []; - const imports = []; - const classes = []; - const exports = []; + const ctx = { + definitions: [], + calls: [], + imports: [], + classes: [], + exports: [], + }; - function findRubyParentClass(node) { - let current = node.parent; - while (current) { - if (current.type === 'class') { - const nameNode = current.childForFieldName('name'); - return nameNode ? nameNode.text : null; - } - if (current.type === 'module') { - const nameNode = current.childForFieldName('name'); - return nameNode ? nameNode.text : null; - } - current = current.parent; - } - return null; + walkRubyNode(tree.rootNode, ctx); + return ctx; +} + +function walkRubyNode(node, ctx) { + switch (node.type) { + case 'class': + handleRubyClass(node, ctx); + break; + case 'module': + handleRubyModule(node, ctx); + break; + case 'method': + handleRubyMethod(node, ctx); + break; + case 'singleton_method': + handleRubySingletonMethod(node, ctx); + break; + case 'assignment': + handleRubyAssignment(node, ctx); + break; + case 'call': + handleRubyCall(node, ctx); + break; } - function walkRubyNode(node) { - switch (node.type) { - case 'class': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const classChildren = extractRubyClassChildren(node); - definitions.push({ - name: nameNode.text, - kind: 'class', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: classChildren.length > 0 ? classChildren : undefined, - }); - const superclass = node.childForFieldName('superclass'); - if (superclass) { - // superclass wraps the < token and class name - for (let i = 0; i < superclass.childCount; i++) { - const child = superclass.child(i); - if (child && (child.type === 'constant' || child.type === 'scope_resolution')) { - classes.push({ - name: nameNode.text, - extends: child.text, - line: node.startPosition.row + 1, - }); - break; - } - } - // Direct superclass node may be a constant - if (superclass.type === 'superclass') { - for (let i = 0; i < superclass.childCount; i++) { - const child = superclass.child(i); - if (child && (child.type === 'constant' || child.type === 'scope_resolution')) { - classes.push({ - name: nameNode.text, - extends: child.text, - line: node.startPosition.row + 1, - }); - break; - } - } - } - } - } + for (let i = 0; i < node.childCount; i++) walkRubyNode(node.child(i), ctx); +} + +// ── Walk-path per-node-type handlers ──────────────────────────────────────── + +function handleRubyClass(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const classChildren = extractRubyClassChildren(node); + ctx.definitions.push({ + name: nameNode.text, + kind: 'class', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: classChildren.length > 0 ? classChildren : undefined, + }); + const superclass = node.childForFieldName('superclass'); + if (superclass) { + for (let i = 0; i < superclass.childCount; i++) { + const child = superclass.child(i); + if (child && (child.type === 'constant' || child.type === 'scope_resolution')) { + ctx.classes.push({ + name: nameNode.text, + extends: child.text, + line: node.startPosition.row + 1, + }); break; } - - case 'module': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const moduleChildren = extractRubyBodyConstants(node); - definitions.push({ + } + if (superclass.type === 'superclass') { + for (let i = 0; i < superclass.childCount; i++) { + const child = superclass.child(i); + if (child && (child.type === 'constant' || child.type === 'scope_resolution')) { + ctx.classes.push({ name: nameNode.text, - kind: 'module', + extends: child.text, line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: moduleChildren.length > 0 ? moduleChildren : undefined, }); + break; } - break; } + } + } +} - case 'method': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const parentClass = findRubyParentClass(node); - const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; - const params = extractRubyParameters(node); - definitions.push({ - name: fullName, - kind: 'method', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: params.length > 0 ? params : undefined, - }); - } - break; - } +function handleRubyModule(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const moduleChildren = extractRubyBodyConstants(node); + ctx.definitions.push({ + name: nameNode.text, + kind: 'module', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: moduleChildren.length > 0 ? moduleChildren : undefined, + }); +} - case 'singleton_method': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const parentClass = findRubyParentClass(node); - const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; - const params = extractRubyParameters(node); - definitions.push({ - name: fullName, - kind: 'function', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: params.length > 0 ? params : undefined, - }); - } - break; - } +function handleRubyMethod(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const parentClass = findRubyParentClass(node); + const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + const params = extractRubyParameters(node); + ctx.definitions.push({ + name: fullName, + kind: 'method', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, + }); +} - case 'assignment': { - // Top-level constant assignments (parent is program) - if (node.parent && node.parent.type === 'program') { - const left = node.childForFieldName('left'); - if (left && left.type === 'constant') { - definitions.push({ - name: left.text, - kind: 'constant', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - } - } - break; - } +function handleRubySingletonMethod(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const parentClass = findRubyParentClass(node); + const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + const params = extractRubyParameters(node); + ctx.definitions.push({ + name: fullName, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, + }); +} - case 'call': { - const methodNode = node.childForFieldName('method'); - if (methodNode) { - // Check for require/require_relative - if (methodNode.text === 'require' || methodNode.text === 'require_relative') { - const args = node.childForFieldName('arguments'); - if (args) { - for (let i = 0; i < args.childCount; i++) { - const arg = args.child(i); - if (arg && (arg.type === 'string' || arg.type === 'string_content')) { - const strContent = arg.text.replace(/^['"]|['"]$/g, ''); - imports.push({ - source: strContent, - names: [strContent.split('/').pop()], - line: node.startPosition.row + 1, - rubyRequire: true, - }); - break; - } - // Look inside string for string_content - if (arg && arg.type === 'string') { - const content = findChild(arg, 'string_content'); - if (content) { - imports.push({ - source: content.text, - names: [content.text.split('/').pop()], - line: node.startPosition.row + 1, - rubyRequire: true, - }); - break; - } - } - } - } - } else if ( - methodNode.text === 'include' || - methodNode.text === 'extend' || - methodNode.text === 'prepend' - ) { - // Module inclusion — treated like implements - const parentClass = findRubyParentClass(node); - if (parentClass) { - const args = node.childForFieldName('arguments'); - if (args) { - for (let i = 0; i < args.childCount; i++) { - const arg = args.child(i); - if (arg && (arg.type === 'constant' || arg.type === 'scope_resolution')) { - classes.push({ - name: parentClass, - implements: arg.text, - line: node.startPosition.row + 1, - }); - } - } - } - } - } else { - const recv = node.childForFieldName('receiver'); - const call = { name: methodNode.text, line: node.startPosition.row + 1 }; - if (recv) call.receiver = recv.text; - calls.push(call); - } - } +function handleRubyAssignment(node, ctx) { + if (node.parent && node.parent.type === 'program') { + const left = node.childForFieldName('left'); + if (left && left.type === 'constant') { + ctx.definitions.push({ + name: left.text, + kind: 'constant', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + } + } +} + +function handleRubyCall(node, ctx) { + const methodNode = node.childForFieldName('method'); + if (!methodNode) return; + if (methodNode.text === 'require' || methodNode.text === 'require_relative') { + handleRubyRequire(node, ctx); + } else if ( + methodNode.text === 'include' || + methodNode.text === 'extend' || + methodNode.text === 'prepend' + ) { + handleRubyModuleInclusion(node, methodNode, ctx); + } else { + const recv = node.childForFieldName('receiver'); + const call = { name: methodNode.text, line: node.startPosition.row + 1 }; + if (recv) call.receiver = recv.text; + ctx.calls.push(call); + } +} + +function handleRubyRequire(node, ctx) { + const args = node.childForFieldName('arguments'); + if (!args) return; + for (let i = 0; i < args.childCount; i++) { + const arg = args.child(i); + if (arg && (arg.type === 'string' || arg.type === 'string_content')) { + const strContent = arg.text.replace(/^['"]|['"]$/g, ''); + ctx.imports.push({ + source: strContent, + names: [strContent.split('/').pop()], + line: node.startPosition.row + 1, + rubyRequire: true, + }); + break; + } + if (arg && arg.type === 'string') { + const content = findChild(arg, 'string_content'); + if (content) { + ctx.imports.push({ + source: content.text, + names: [content.text.split('/').pop()], + line: node.startPosition.row + 1, + rubyRequire: true, + }); break; } } + } +} - for (let i = 0; i < node.childCount; i++) walkRubyNode(node.child(i)); +function handleRubyModuleInclusion(node, _methodNode, ctx) { + const parentClass = findRubyParentClass(node); + if (!parentClass) return; + const args = node.childForFieldName('arguments'); + if (!args) return; + for (let i = 0; i < args.childCount; i++) { + const arg = args.child(i); + if (arg && (arg.type === 'constant' || arg.type === 'scope_resolution')) { + ctx.classes.push({ + name: parentClass, + implements: arg.text, + line: node.startPosition.row + 1, + }); + } } +} - walkRubyNode(tree.rootNode); - return { definitions, calls, imports, classes, exports }; +function findRubyParentClass(node) { + let current = node.parent; + while (current) { + if (current.type === 'class' || current.type === 'module') { + const nameNode = current.childForFieldName('name'); + return nameNode ? nameNode.text : null; + } + current = current.parent; + } + return null; } // ── Child extraction helpers ──────────────────────────────────────────────── diff --git a/src/extractors/rust.js b/src/extractors/rust.js index 705f9bd0..389bec00 100644 --- a/src/extractors/rust.js +++ b/src/extractors/rust.js @@ -4,191 +4,204 @@ import { findChild, nodeEndLine, rustVisibility } from './helpers.js'; * Extract symbols from Rust files. */ export function extractRustSymbols(tree, _filePath) { - const definitions = []; - const calls = []; - const imports = []; - const classes = []; - const exports = []; + const ctx = { + definitions: [], + calls: [], + imports: [], + classes: [], + exports: [], + }; - function findCurrentImpl(node) { - let current = node.parent; - while (current) { - if (current.type === 'impl_item') { - const typeNode = current.childForFieldName('type'); - return typeNode ? typeNode.text : null; - } - current = current.parent; - } - return null; + walkRustNode(tree.rootNode, ctx); + return ctx; +} + +function walkRustNode(node, ctx) { + switch (node.type) { + case 'function_item': + handleRustFuncItem(node, ctx); + break; + case 'struct_item': + handleRustStructItem(node, ctx); + break; + case 'enum_item': + handleRustEnumItem(node, ctx); + break; + case 'const_item': + handleRustConstItem(node, ctx); + break; + case 'trait_item': + handleRustTraitItem(node, ctx); + break; + case 'impl_item': + handleRustImplItem(node, ctx); + break; + case 'use_declaration': + handleRustUseDecl(node, ctx); + break; + case 'call_expression': + handleRustCallExpr(node, ctx); + break; + case 'macro_invocation': + handleRustMacroInvocation(node, ctx); + break; } - function walkRustNode(node) { - switch (node.type) { - case 'function_item': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const implType = findCurrentImpl(node); - const fullName = implType ? `${implType}.${nameNode.text}` : nameNode.text; - const kind = implType ? 'method' : 'function'; - const params = extractRustParameters(node.childForFieldName('parameters')); - definitions.push({ - name: fullName, - kind, - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: params.length > 0 ? params : undefined, - visibility: rustVisibility(node), - }); - } - break; - } + for (let i = 0; i < node.childCount; i++) walkRustNode(node.child(i), ctx); +} - case 'struct_item': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const fields = extractStructFields(node); - definitions.push({ - name: nameNode.text, - kind: 'struct', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: fields.length > 0 ? fields : undefined, - visibility: rustVisibility(node), - }); - } - break; - } +// ── Walk-path per-node-type handlers ──────────────────────────────────────── - case 'enum_item': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - const variants = extractEnumVariants(node); - definitions.push({ - name: nameNode.text, - kind: 'enum', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - children: variants.length > 0 ? variants : undefined, - }); - } - break; - } +function handleRustFuncItem(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const implType = findCurrentImpl(node); + const fullName = implType ? `${implType}.${nameNode.text}` : nameNode.text; + const kind = implType ? 'method' : 'function'; + const params = extractRustParameters(node.childForFieldName('parameters')); + ctx.definitions.push({ + name: fullName, + kind, + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, + visibility: rustVisibility(node), + }); +} - case 'const_item': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'constant', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - } - break; - } +function handleRustStructItem(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const fields = extractStructFields(node); + ctx.definitions.push({ + name: nameNode.text, + kind: 'struct', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: fields.length > 0 ? fields : undefined, + visibility: rustVisibility(node), + }); +} - case 'trait_item': { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - definitions.push({ - name: nameNode.text, - kind: 'trait', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - const body = node.childForFieldName('body'); - if (body) { - for (let i = 0; i < body.childCount; i++) { - const child = body.child(i); - if ( - child && - (child.type === 'function_signature_item' || child.type === 'function_item') - ) { - const methName = child.childForFieldName('name'); - if (methName) { - definitions.push({ - name: `${nameNode.text}.${methName.text}`, - kind: 'method', - line: child.startPosition.row + 1, - endLine: child.endPosition.row + 1, - }); - } - } - } - } - } - break; - } +function handleRustEnumItem(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const variants = extractEnumVariants(node); + ctx.definitions.push({ + name: nameNode.text, + kind: 'enum', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: variants.length > 0 ? variants : undefined, + }); +} + +function handleRustConstItem(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + ctx.definitions.push({ + name: nameNode.text, + kind: 'constant', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} - case 'impl_item': { - const typeNode = node.childForFieldName('type'); - const traitNode = node.childForFieldName('trait'); - if (typeNode && traitNode) { - classes.push({ - name: typeNode.text, - implements: traitNode.text, - line: node.startPosition.row + 1, +function handleRustTraitItem(node, ctx) { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + ctx.definitions.push({ + name: nameNode.text, + kind: 'trait', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + const body = node.childForFieldName('body'); + if (body) { + for (let i = 0; i < body.childCount; i++) { + const child = body.child(i); + if (child && (child.type === 'function_signature_item' || child.type === 'function_item')) { + const methName = child.childForFieldName('name'); + if (methName) { + ctx.definitions.push({ + name: `${nameNode.text}.${methName.text}`, + kind: 'method', + line: child.startPosition.row + 1, + endLine: child.endPosition.row + 1, }); } - break; } + } + } +} - case 'use_declaration': { - const argNode = node.child(1); - if (argNode) { - const usePaths = extractRustUsePath(argNode); - for (const imp of usePaths) { - imports.push({ - source: imp.source, - names: imp.names, - line: node.startPosition.row + 1, - rustUse: true, - }); - } - } - break; - } +function handleRustImplItem(node, ctx) { + const typeNode = node.childForFieldName('type'); + const traitNode = node.childForFieldName('trait'); + if (typeNode && traitNode) { + ctx.classes.push({ + name: typeNode.text, + implements: traitNode.text, + line: node.startPosition.row + 1, + }); + } +} - case 'call_expression': { - const fn = node.childForFieldName('function'); - if (fn) { - if (fn.type === 'identifier') { - calls.push({ name: fn.text, line: node.startPosition.row + 1 }); - } else if (fn.type === 'field_expression') { - const field = fn.childForFieldName('field'); - if (field) { - const value = fn.childForFieldName('value'); - const call = { name: field.text, line: node.startPosition.row + 1 }; - if (value) call.receiver = value.text; - calls.push(call); - } - } else if (fn.type === 'scoped_identifier') { - const name = fn.childForFieldName('name'); - if (name) { - const path = fn.childForFieldName('path'); - const call = { name: name.text, line: node.startPosition.row + 1 }; - if (path) call.receiver = path.text; - calls.push(call); - } - } - } - break; - } +function handleRustUseDecl(node, ctx) { + const argNode = node.child(1); + if (!argNode) return; + const usePaths = extractRustUsePath(argNode); + for (const imp of usePaths) { + ctx.imports.push({ + source: imp.source, + names: imp.names, + line: node.startPosition.row + 1, + rustUse: true, + }); + } +} - case 'macro_invocation': { - const macroNode = node.child(0); - if (macroNode) { - calls.push({ name: `${macroNode.text}!`, line: node.startPosition.row + 1 }); - } - break; - } +function handleRustCallExpr(node, ctx) { + const fn = node.childForFieldName('function'); + if (!fn) return; + if (fn.type === 'identifier') { + ctx.calls.push({ name: fn.text, line: node.startPosition.row + 1 }); + } else if (fn.type === 'field_expression') { + const field = fn.childForFieldName('field'); + if (field) { + const value = fn.childForFieldName('value'); + const call = { name: field.text, line: node.startPosition.row + 1 }; + if (value) call.receiver = value.text; + ctx.calls.push(call); + } + } else if (fn.type === 'scoped_identifier') { + const name = fn.childForFieldName('name'); + if (name) { + const path = fn.childForFieldName('path'); + const call = { name: name.text, line: node.startPosition.row + 1 }; + if (path) call.receiver = path.text; + ctx.calls.push(call); } + } +} - for (let i = 0; i < node.childCount; i++) walkRustNode(node.child(i)); +function handleRustMacroInvocation(node, ctx) { + const macroNode = node.child(0); + if (macroNode) { + ctx.calls.push({ name: `${macroNode.text}!`, line: node.startPosition.row + 1 }); } +} - walkRustNode(tree.rootNode); - return { definitions, calls, imports, classes, exports }; +function findCurrentImpl(node) { + let current = node.parent; + while (current) { + if (current.type === 'impl_item') { + const typeNode = current.childForFieldName('type'); + return typeNode ? typeNode.text : null; + } + current = current.parent; + } + return null; } // ── Child extraction helpers ──────────────────────────────────────────────── From eafdf193777b9723e0c0ded0834b0c5bd9688684 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 03:19:21 -0600 Subject: [PATCH 11/21] refactor: decompose AST analysis visitors and engine into focused helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move nested handler functions to module level in cfg-visitor.js, dataflow-visitor.js, and complexity-visitor.js — reducing cognitive complexity of each factory function from 100-337 down to thin coordinators. Extract WASM pre-parse, visitor setup, result storage, and build delegation from runAnalyses into focused helper functions. Impact: 66 functions changed, 43 affected --- src/ast-analysis/engine.js | 510 ++++--- src/ast-analysis/visitors/cfg-visitor.js | 1284 ++++++++--------- .../visitors/complexity-visitor.js | 274 ++-- src/ast-analysis/visitors/dataflow-visitor.js | 454 +++--- 4 files changed, 1252 insertions(+), 1270 deletions(-) diff --git a/src/ast-analysis/engine.js b/src/ast-analysis/engine.js index 981ec514..76ba8cd2 100644 --- a/src/ast-analysis/engine.js +++ b/src/ast-analysis/engine.js @@ -50,294 +50,227 @@ async function getParserModule() { return _parserModule; } -// ─── Public API ────────────────────────────────────────────────────────── +// ─── WASM pre-parse ───────────────────────────────────────────────────── -/** - * Run all enabled AST analyses in a coordinated pass. - * - * @param {object} db - open better-sqlite3 database (read-write) - * @param {Map} fileSymbols - Map - * @param {string} rootDir - absolute project root path - * @param {object} opts - build options (ast, complexity, cfg, dataflow toggles) - * @param {object} [engineOpts] - engine options - * @returns {Promise<{ astMs: number, complexityMs: number, cfgMs: number, dataflowMs: number }>} - */ -export async function runAnalyses(db, fileSymbols, rootDir, opts, engineOpts) { - const timing = { astMs: 0, complexityMs: 0, cfgMs: 0, dataflowMs: 0 }; - - const doAst = opts.ast !== false; +async function ensureWasmTreesIfNeeded(fileSymbols, opts) { const doComplexity = opts.complexity !== false; const doCfg = opts.cfg !== false; const doDataflow = opts.dataflow !== false; - if (!doAst && !doComplexity && !doCfg && !doDataflow) return timing; - - const extToLang = buildExtToLangMap(); - - // ── WASM pre-parse for files that need it ─────────────────────────── - // The native engine only handles parsing (symbols, calls, imports). - // Complexity, CFG, and dataflow all require a WASM tree-sitter tree - // for their visitor walks. Without this, incremental rebuilds on the - // native engine silently lose these analyses for changed files (#468). - if (doComplexity || doCfg || doDataflow) { - let needsWasmTrees = false; - for (const [relPath, symbols] of fileSymbols) { - if (symbols._tree) continue; - const ext = path.extname(relPath).toLowerCase(); - const defs = symbols.definitions || []; - - const needsComplexity = - doComplexity && - COMPLEXITY_EXTENSIONS.has(ext) && - defs.some((d) => (d.kind === 'function' || d.kind === 'method') && d.line && !d.complexity); - const needsCfg = - doCfg && - CFG_EXTENSIONS.has(ext) && - defs.some( - (d) => - (d.kind === 'function' || d.kind === 'method') && - d.line && - d.cfg !== null && - !Array.isArray(d.cfg?.blocks), - ); - const needsDataflow = doDataflow && !symbols.dataflow && DATAFLOW_EXTENSIONS.has(ext); - - if (needsComplexity || needsCfg || needsDataflow) { - needsWasmTrees = true; - break; - } - } - - if (needsWasmTrees) { - try { - const { ensureWasmTrees } = await getParserModule(); - await ensureWasmTrees(fileSymbols, rootDir); - } catch (err) { - debug(`ensureWasmTrees failed: ${err.message}`); - } - } - } - - // ── Phase 7 Optimization: Unified pre-walk ───────────────────────── - // For files with WASM trees, run all applicable visitors in a SINGLE - // walkWithVisitors call. Store results in the format that buildXxx - // functions already expect as pre-computed data (same fields as native - // engine output). This eliminates ~3 redundant tree traversals per file. - const t0walk = performance.now(); + if (!doComplexity && !doCfg && !doDataflow) return; + let needsWasmTrees = false; for (const [relPath, symbols] of fileSymbols) { - if (!symbols._tree) continue; // No WASM tree — native path handles it - + if (symbols._tree) continue; const ext = path.extname(relPath).toLowerCase(); - const langId = symbols._langId || extToLang.get(ext); - if (!langId) continue; - const defs = symbols.definitions || []; - const visitors = []; - const walkerOpts = { - functionNodeTypes: new Set(), - nestingNodeTypes: new Set(), - getFunctionName: (_node) => null, - }; - - // ─ AST-store visitor ─ - const astTypeMap = AST_TYPE_MAPS.get(langId); - let astVisitor = null; - if (doAst && astTypeMap && WALK_EXTENSIONS.has(ext) && !symbols.astNodes?.length) { - const nodeIdMap = new Map(); - for (const row of bulkNodeIdsByFile(db, relPath)) { - nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id); - } - astVisitor = createAstStoreVisitor(astTypeMap, defs, relPath, nodeIdMap); - visitors.push(astVisitor); - } - // ─ Complexity visitor (file-level mode) ─ - const cRules = COMPLEXITY_RULES.get(langId); - const hRules = HALSTEAD_RULES.get(langId); - let complexityVisitor = null; - if (doComplexity && cRules) { - // Only use visitor if some functions lack pre-computed complexity - const needsWasmComplexity = defs.some( - (d) => (d.kind === 'function' || d.kind === 'method') && d.line && !d.complexity, - ); - if (needsWasmComplexity) { - complexityVisitor = createComplexityVisitor(cRules, hRules, { - fileLevelWalk: true, - langId, - }); - visitors.push(complexityVisitor); - - // Merge nesting nodes for complexity tracking - // NOTE: do NOT add functionNodes here — funcDepth in the complexity - // visitor already tracks function-level nesting. Adding them to - // nestingNodeTypes would inflate context.nestingLevel by +1 inside - // every function body, double-counting in cognitive += 1 + nestingLevel. - for (const t of cRules.nestingNodes) walkerOpts.nestingNodeTypes.add(t); - - // Provide getFunctionName for complexity visitor - const dfRules = DATAFLOW_RULES.get(langId); - walkerOpts.getFunctionName = (node) => { - // Try complexity rules' function name field first - const nameNode = node.childForFieldName('name'); - if (nameNode) return nameNode.text; - // Fall back to dataflow rules' richer name extraction - if (dfRules) return getFuncName(node, dfRules); - return null; - }; - } - } - - // ─ CFG visitor ─ - const cfgRulesForLang = CFG_RULES.get(langId); - let cfgVisitor = null; - if (doCfg && cfgRulesForLang && CFG_EXTENSIONS.has(ext)) { - // Only use visitor if some functions lack pre-computed CFG - const needsWasmCfg = defs.some( + const needsComplexity = + doComplexity && + COMPLEXITY_EXTENSIONS.has(ext) && + defs.some((d) => (d.kind === 'function' || d.kind === 'method') && d.line && !d.complexity); + const needsCfg = + doCfg && + CFG_EXTENSIONS.has(ext) && + defs.some( (d) => (d.kind === 'function' || d.kind === 'method') && d.line && d.cfg !== null && !Array.isArray(d.cfg?.blocks), ); - if (needsWasmCfg) { - cfgVisitor = createCfgVisitor(cfgRulesForLang); - visitors.push(cfgVisitor); - } + const needsDataflow = doDataflow && !symbols.dataflow && DATAFLOW_EXTENSIONS.has(ext); + + if (needsComplexity || needsCfg || needsDataflow) { + needsWasmTrees = true; + break; } + } - // ─ Dataflow visitor ─ - const dfRules = DATAFLOW_RULES.get(langId); - let dataflowVisitor = null; - if (doDataflow && dfRules && DATAFLOW_EXTENSIONS.has(ext) && !symbols.dataflow) { - dataflowVisitor = createDataflowVisitor(dfRules); - visitors.push(dataflowVisitor); + if (needsWasmTrees) { + try { + const { ensureWasmTrees } = await getParserModule(); + await ensureWasmTrees(fileSymbols); + } catch (err) { + debug(`ensureWasmTrees failed: ${err.message}`); } + } +} - // ─ Run unified walk if we have visitors ─ - if (visitors.length === 0) continue; +// ─── Per-file visitor setup ───────────────────────────────────────────── - const results = walkWithVisitors(symbols._tree.rootNode, visitors, langId, walkerOpts); +function setupVisitors(db, relPath, symbols, langId, opts) { + const ext = path.extname(relPath).toLowerCase(); + const defs = symbols.definitions || []; + const doAst = opts.ast !== false; + const doComplexity = opts.complexity !== false; + const doCfg = opts.cfg !== false; + const doDataflow = opts.dataflow !== false; - // ─ Store AST results (buildAstNodes will find symbols.astNodes and skip its walk) ─ - if (astVisitor) { - const astRows = results['ast-store'] || []; - if (astRows.length > 0) { - // Store in the format buildAstNodes expects for the native path - symbols.astNodes = astRows; - } + const visitors = []; + const walkerOpts = { + functionNodeTypes: new Set(), + nestingNodeTypes: new Set(), + getFunctionName: (_node) => null, + }; + + // AST-store visitor + let astVisitor = null; + const astTypeMap = AST_TYPE_MAPS.get(langId); + if (doAst && astTypeMap && WALK_EXTENSIONS.has(ext) && !symbols.astNodes?.length) { + const nodeIdMap = new Map(); + for (const row of bulkNodeIdsByFile(db, relPath)) { + nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id); } + astVisitor = createAstStoreVisitor(astTypeMap, defs, relPath, nodeIdMap); + visitors.push(astVisitor); + } - // ─ Store complexity results on definitions (buildComplexityMetrics will find def.complexity) ─ - if (complexityVisitor) { - const complexityResults = results.complexity || []; - // Match results back to definitions by function start line - // Store the full result (metrics + funcNode) for O(1) lookup - const resultByLine = new Map(); - for (const r of complexityResults) { - if (r.funcNode) { - const line = r.funcNode.startPosition.row + 1; - if (!resultByLine.has(line)) resultByLine.set(line, []); - resultByLine.get(line).push(r); - } - } - for (const def of defs) { - if ((def.kind === 'function' || def.kind === 'method') && def.line && !def.complexity) { - const candidates = resultByLine.get(def.line); - const funcResult = !candidates - ? undefined - : candidates.length === 1 - ? candidates[0] - : (candidates.find((r) => { - const n = r.funcNode.childForFieldName('name'); - return n && n.text === def.name; - }) ?? candidates[0]); - if (funcResult) { - const { metrics } = funcResult; - const loc = computeLOCMetrics(funcResult.funcNode, langId); - const volume = metrics.halstead ? metrics.halstead.volume : 0; - const commentRatio = loc.loc > 0 ? loc.commentLines / loc.loc : 0; - const mi = computeMaintainabilityIndex( - volume, - metrics.cyclomatic, - loc.sloc, - commentRatio, - ); - - def.complexity = { - cognitive: metrics.cognitive, - cyclomatic: metrics.cyclomatic, - maxNesting: metrics.maxNesting, - halstead: metrics.halstead, - loc, - maintainabilityIndex: mi, - }; - } - } - } + // Complexity visitor (file-level mode) + let complexityVisitor = null; + const cRules = COMPLEXITY_RULES.get(langId); + const hRules = HALSTEAD_RULES.get(langId); + if (doComplexity && cRules) { + const needsWasmComplexity = defs.some( + (d) => (d.kind === 'function' || d.kind === 'method') && d.line && !d.complexity, + ); + if (needsWasmComplexity) { + complexityVisitor = createComplexityVisitor(cRules, hRules, { fileLevelWalk: true, langId }); + visitors.push(complexityVisitor); + + for (const t of cRules.nestingNodes) walkerOpts.nestingNodeTypes.add(t); + + const dfRules = DATAFLOW_RULES.get(langId); + walkerOpts.getFunctionName = (node) => { + const nameNode = node.childForFieldName('name'); + if (nameNode) return nameNode.text; + if (dfRules) return getFuncName(node, dfRules); + return null; + }; } + } - // ─ Store CFG results on definitions (buildCFGData will find def.cfg and skip its walk) ─ - if (cfgVisitor) { - const cfgResults = results.cfg || []; - const cfgByLine = new Map(); - for (const r of cfgResults) { - if (r.funcNode) { - const line = r.funcNode.startPosition.row + 1; - if (!cfgByLine.has(line)) cfgByLine.set(line, []); - cfgByLine.get(line).push(r); - } - } - for (const def of defs) { - if ( - (def.kind === 'function' || def.kind === 'method') && - def.line && - !def.cfg?.blocks?.length - ) { - const candidates = cfgByLine.get(def.line); - const cfgResult = !candidates - ? undefined - : candidates.length === 1 - ? candidates[0] - : (candidates.find((r) => { - const n = r.funcNode.childForFieldName('name'); - return n && n.text === def.name; - }) ?? candidates[0]); - if (cfgResult) { - def.cfg = { blocks: cfgResult.blocks, edges: cfgResult.edges }; - - // Override complexity's cyclomatic with CFG-derived value (single source of truth) - // and recompute maintainability index to stay consistent - if (def.complexity && cfgResult.cyclomatic != null) { - def.complexity.cyclomatic = cfgResult.cyclomatic; - const { loc, halstead } = def.complexity; - const volume = halstead ? halstead.volume : 0; - const commentRatio = loc?.loc > 0 ? loc.commentLines / loc.loc : 0; - def.complexity.maintainabilityIndex = computeMaintainabilityIndex( - volume, - cfgResult.cyclomatic, - loc?.sloc ?? 0, - commentRatio, - ); - } - } - } - } + // CFG visitor + let cfgVisitor = null; + const cfgRulesForLang = CFG_RULES.get(langId); + if (doCfg && cfgRulesForLang && CFG_EXTENSIONS.has(ext)) { + const needsWasmCfg = defs.some( + (d) => + (d.kind === 'function' || d.kind === 'method') && + d.line && + d.cfg !== null && + !Array.isArray(d.cfg?.blocks), + ); + if (needsWasmCfg) { + cfgVisitor = createCfgVisitor(cfgRulesForLang); + visitors.push(cfgVisitor); } + } + + // Dataflow visitor + let dataflowVisitor = null; + const dfRules = DATAFLOW_RULES.get(langId); + if (doDataflow && dfRules && DATAFLOW_EXTENSIONS.has(ext) && !symbols.dataflow) { + dataflowVisitor = createDataflowVisitor(dfRules); + visitors.push(dataflowVisitor); + } + + return { visitors, walkerOpts, astVisitor, complexityVisitor, cfgVisitor, dataflowVisitor }; +} - // ─ Store dataflow results (buildDataflowEdges will find symbols.dataflow and skip its walk) ─ - if (dataflowVisitor) { - symbols.dataflow = results.dataflow; +// ─── Result storage helpers ───────────────────────────────────────────── + +function storeComplexityResults(results, defs, langId) { + const complexityResults = results.complexity || []; + const resultByLine = new Map(); + for (const r of complexityResults) { + if (r.funcNode) { + const line = r.funcNode.startPosition.row + 1; + if (!resultByLine.has(line)) resultByLine.set(line, []); + resultByLine.get(line).push(r); } } + for (const def of defs) { + if ((def.kind === 'function' || def.kind === 'method') && def.line && !def.complexity) { + const candidates = resultByLine.get(def.line); + const funcResult = !candidates + ? undefined + : candidates.length === 1 + ? candidates[0] + : (candidates.find((r) => { + const n = r.funcNode.childForFieldName('name'); + return n && n.text === def.name; + }) ?? candidates[0]); + if (funcResult) { + const { metrics } = funcResult; + const loc = computeLOCMetrics(funcResult.funcNode, langId); + const volume = metrics.halstead ? metrics.halstead.volume : 0; + const commentRatio = loc.loc > 0 ? loc.commentLines / loc.loc : 0; + const mi = computeMaintainabilityIndex(volume, metrics.cyclomatic, loc.sloc, commentRatio); + + def.complexity = { + cognitive: metrics.cognitive, + cyclomatic: metrics.cyclomatic, + maxNesting: metrics.maxNesting, + halstead: metrics.halstead, + loc, + maintainabilityIndex: mi, + }; + } + } + } +} - timing._unifiedWalkMs = performance.now() - t0walk; +function storeCfgResults(results, defs) { + const cfgResults = results.cfg || []; + const cfgByLine = new Map(); + for (const r of cfgResults) { + if (r.funcNode) { + const line = r.funcNode.startPosition.row + 1; + if (!cfgByLine.has(line)) cfgByLine.set(line, []); + cfgByLine.get(line).push(r); + } + } + for (const def of defs) { + if ( + (def.kind === 'function' || def.kind === 'method') && + def.line && + !def.cfg?.blocks?.length + ) { + const candidates = cfgByLine.get(def.line); + const cfgResult = !candidates + ? undefined + : candidates.length === 1 + ? candidates[0] + : (candidates.find((r) => { + const n = r.funcNode.childForFieldName('name'); + return n && n.text === def.name; + }) ?? candidates[0]); + if (cfgResult) { + def.cfg = { blocks: cfgResult.blocks, edges: cfgResult.edges }; + + // Override complexity's cyclomatic with CFG-derived value (single source of truth) + if (def.complexity && cfgResult.cyclomatic != null) { + def.complexity.cyclomatic = cfgResult.cyclomatic; + const { loc, halstead } = def.complexity; + const volume = halstead ? halstead.volume : 0; + const commentRatio = loc?.loc > 0 ? loc.commentLines / loc.loc : 0; + def.complexity.maintainabilityIndex = computeMaintainabilityIndex( + volume, + cfgResult.cyclomatic, + loc?.sloc ?? 0, + commentRatio, + ); + } + } + } + } +} - // ── Delegate to buildXxx functions ───────────────────────────────── - // Each function finds pre-computed data from the unified walk above - // (or from the native engine) and only does DB writes + native fallback. +// ─── Build delegation ─────────────────────────────────────────────────── - if (doAst) { +async function delegateToBuildFunctions(db, fileSymbols, rootDir, opts, engineOpts, timing) { + if (opts.ast !== false) { const t0 = performance.now(); try { const { buildAstNodes } = await import('../features/ast.js'); @@ -348,7 +281,7 @@ export async function runAnalyses(db, fileSymbols, rootDir, opts, engineOpts) { timing.astMs = performance.now() - t0; } - if (doComplexity) { + if (opts.complexity !== false) { const t0 = performance.now(); try { const { buildComplexityMetrics } = await import('../features/complexity.js'); @@ -359,7 +292,7 @@ export async function runAnalyses(db, fileSymbols, rootDir, opts, engineOpts) { timing.complexityMs = performance.now() - t0; } - if (doCfg) { + if (opts.cfg !== false) { const t0 = performance.now(); try { const { buildCFGData } = await import('../features/cfg.js'); @@ -370,7 +303,7 @@ export async function runAnalyses(db, fileSymbols, rootDir, opts, engineOpts) { timing.cfgMs = performance.now() - t0; } - if (doDataflow) { + if (opts.dataflow !== false) { const t0 = performance.now(); try { const { buildDataflowEdges } = await import('../features/dataflow.js'); @@ -380,6 +313,67 @@ export async function runAnalyses(db, fileSymbols, rootDir, opts, engineOpts) { } timing.dataflowMs = performance.now() - t0; } +} + +// ─── Public API ────────────────────────────────────────────────────────── + +/** + * Run all enabled AST analyses in a coordinated pass. + * + * @param {object} db - open better-sqlite3 database (read-write) + * @param {Map} fileSymbols - Map + * @param {string} rootDir - absolute project root path + * @param {object} opts - build options (ast, complexity, cfg, dataflow toggles) + * @param {object} [engineOpts] - engine options + * @returns {Promise<{ astMs: number, complexityMs: number, cfgMs: number, dataflowMs: number }>} + */ +export async function runAnalyses(db, fileSymbols, rootDir, opts, engineOpts) { + const timing = { astMs: 0, complexityMs: 0, cfgMs: 0, dataflowMs: 0 }; + + const doAst = opts.ast !== false; + const doComplexity = opts.complexity !== false; + const doCfg = opts.cfg !== false; + const doDataflow = opts.dataflow !== false; + + if (!doAst && !doComplexity && !doCfg && !doDataflow) return timing; + + const extToLang = buildExtToLangMap(); + + // WASM pre-parse for files that need it + await ensureWasmTreesIfNeeded(fileSymbols, opts); + + // Unified pre-walk: run all applicable visitors in a single DFS per file + const t0walk = performance.now(); + + for (const [relPath, symbols] of fileSymbols) { + if (!symbols._tree) continue; + + const ext = path.extname(relPath).toLowerCase(); + const langId = symbols._langId || extToLang.get(ext); + if (!langId) continue; + + const { visitors, walkerOpts, astVisitor, complexityVisitor, cfgVisitor, dataflowVisitor } = + setupVisitors(db, relPath, symbols, langId, opts); + + if (visitors.length === 0) continue; + + const results = walkWithVisitors(symbols._tree.rootNode, visitors, langId, walkerOpts); + const defs = symbols.definitions || []; + + if (astVisitor) { + const astRows = results['ast-store'] || []; + if (astRows.length > 0) symbols.astNodes = astRows; + } + + if (complexityVisitor) storeComplexityResults(results, defs, langId); + if (cfgVisitor) storeCfgResults(results, defs); + if (dataflowVisitor) symbols.dataflow = results.dataflow; + } + + timing._unifiedWalkMs = performance.now() - t0walk; + + // Delegate to buildXxx functions for DB writes + native fallback + await delegateToBuildFunctions(db, fileSymbols, rootDir, opts, engineOpts, timing); return timing; } diff --git a/src/ast-analysis/visitors/cfg-visitor.js b/src/ast-analysis/visitors/cfg-visitor.js index 1fb1de50..97bb344f 100644 --- a/src/ast-analysis/visitors/cfg-visitor.js +++ b/src/ast-analysis/visitors/cfg-visitor.js @@ -10,756 +10,746 @@ * hooks, using a control-flow frame stack to track branch/loop/switch context. */ -/** - * Create a CFG visitor for use with walkWithVisitors. - * - * @param {object} cfgRules - CFG_RULES for the language - * @returns {Visitor} - */ -export function createCfgVisitor(cfgRules) { - // ── Per-function state ────────────────────────────────────────────── - // Pushed/popped on enterFunction/exitFunction for nested function support. - - /** @type {Array} Stack of per-function CFG state */ - const funcStateStack = []; - - /** @type {object|null} Active per-function state */ - let S = null; - - // Collected results (one per top-level function) - const results = []; - - function makeFuncState() { - const blocks = []; - const edges = []; - let nextIndex = 0; - - function makeBlock(type, startLine = null, endLine = null, label = null) { - const block = { index: nextIndex++, type, startLine, endLine, label }; - blocks.push(block); - return block; - } - - function addEdge(source, target, kind) { - edges.push({ sourceIndex: source.index, targetIndex: target.index, kind }); - } +// ── Node-type predicates ──────────────────────────────────────────────── - const entry = makeBlock('entry'); - const exit = makeBlock('exit'); - const firstBody = makeBlock('body'); - addEdge(entry, firstBody, 'fallthrough'); - - return { - blocks, - edges, - makeBlock, - addEdge, - entryBlock: entry, - exitBlock: exit, - currentBlock: firstBody, - loopStack: [], - labelMap: new Map(), - /** Control-flow frame stack for nested if/switch/try/loop/labeled */ - cfgStack: [], - funcNode: null, - }; - } - - // ── Helpers ───────────────────────────────────────────────────────── +function isIfNode(type, cfgRules) { + return type === cfgRules.ifNode || cfgRules.ifNodes?.has(type); +} - function isIfNode(type) { - return type === cfgRules.ifNode || cfgRules.ifNodes?.has(type); - } +function isForNode(type, cfgRules) { + return cfgRules.forNodes.has(type); +} - function isForNode(type) { - return cfgRules.forNodes.has(type); - } +function isWhileNode(type, cfgRules) { + return type === cfgRules.whileNode || cfgRules.whileNodes?.has(type); +} - function isWhileNode(type) { - return type === cfgRules.whileNode || cfgRules.whileNodes?.has(type); - } +function isSwitchNode(type, cfgRules) { + return type === cfgRules.switchNode || cfgRules.switchNodes?.has(type); +} - function isSwitchNode(type) { - return type === cfgRules.switchNode || cfgRules.switchNodes?.has(type); - } +function isCaseNode(type, cfgRules) { + return ( + type === cfgRules.caseNode || type === cfgRules.defaultNode || cfgRules.caseNodes?.has(type) + ); +} - function isCaseNode(type) { - return ( - type === cfgRules.caseNode || type === cfgRules.defaultNode || cfgRules.caseNodes?.has(type) - ); - } +function isBlockNode(type, cfgRules) { + return type === 'statement_list' || type === cfgRules.blockNode || cfgRules.blockNodes?.has(type); +} - function isBlockNode(type) { - return ( - type === 'statement_list' || type === cfgRules.blockNode || cfgRules.blockNodes?.has(type) - ); - } +/** Check if a node is a control-flow statement that we handle specially */ +function isControlFlow(type, cfgRules) { + return ( + isIfNode(type, cfgRules) || + (cfgRules.unlessNode && type === cfgRules.unlessNode) || + isForNode(type, cfgRules) || + isWhileNode(type, cfgRules) || + (cfgRules.untilNode && type === cfgRules.untilNode) || + (cfgRules.doNode && type === cfgRules.doNode) || + (cfgRules.infiniteLoopNode && type === cfgRules.infiniteLoopNode) || + isSwitchNode(type, cfgRules) || + (cfgRules.tryNode && type === cfgRules.tryNode) || + type === cfgRules.returnNode || + type === cfgRules.throwNode || + type === cfgRules.breakNode || + type === cfgRules.continueNode || + type === cfgRules.labeledNode + ); +} - /** Check if a node is a control-flow statement that we handle specially */ - function isControlFlow(type) { - return ( - isIfNode(type) || - (cfgRules.unlessNode && type === cfgRules.unlessNode) || - isForNode(type) || - isWhileNode(type) || - (cfgRules.untilNode && type === cfgRules.untilNode) || - (cfgRules.doNode && type === cfgRules.doNode) || - (cfgRules.infiniteLoopNode && type === cfgRules.infiniteLoopNode) || - isSwitchNode(type) || - (cfgRules.tryNode && type === cfgRules.tryNode) || - type === cfgRules.returnNode || - type === cfgRules.throwNode || - type === cfgRules.breakNode || - type === cfgRules.continueNode || - type === cfgRules.labeledNode - ); - } +// ── Utility functions ─────────────────────────────────────────────────── - /** - * Get the actual control-flow node (unwrapping expression_statement if needed). - */ - function effectiveNode(node) { - if (node.type === 'expression_statement' && node.namedChildCount === 1) { - const inner = node.namedChild(0); - if (isControlFlow(inner.type)) return inner; - } - return node; +/** + * Get the actual control-flow node (unwrapping expression_statement if needed). + */ +function effectiveNode(node, cfgRules) { + if (node.type === 'expression_statement' && node.namedChildCount === 1) { + const inner = node.namedChild(0); + if (isControlFlow(inner.type, cfgRules)) return inner; } + return node; +} - /** - * Register a loop/switch in label map for labeled break/continue. - */ - function registerLabelCtx(headerBlock, exitBlock) { - for (const [, ctx] of S.labelMap) { - if (!ctx.headerBlock) { - ctx.headerBlock = headerBlock; - ctx.exitBlock = exitBlock; - } +/** + * Register a loop/switch in label map for labeled break/continue. + */ +function registerLabelCtx(S, headerBlock, exitBlock) { + for (const [, ctx] of S.labelMap) { + if (!ctx.headerBlock) { + ctx.headerBlock = headerBlock; + ctx.exitBlock = exitBlock; } } +} - /** - * Get statements from a body node (block or single statement). - * Returns effective (unwrapped) nodes. - */ - function getBodyStatements(bodyNode) { - if (!bodyNode) return []; - if (isBlockNode(bodyNode.type)) { - const stmts = []; - for (let i = 0; i < bodyNode.namedChildCount; i++) { - const child = bodyNode.namedChild(i); - if (child.type === 'statement_list') { - for (let j = 0; j < child.namedChildCount; j++) { - stmts.push(child.namedChild(j)); - } - } else { - stmts.push(child); +/** + * Get statements from a body node (block or single statement). + * Returns effective (unwrapped) nodes. + */ +function getBodyStatements(bodyNode, cfgRules) { + if (!bodyNode) return []; + if (isBlockNode(bodyNode.type, cfgRules)) { + const stmts = []; + for (let i = 0; i < bodyNode.namedChildCount; i++) { + const child = bodyNode.namedChild(i); + if (child.type === 'statement_list') { + for (let j = 0; j < child.namedChildCount; j++) { + stmts.push(child.namedChild(j)); } + } else { + stmts.push(child); } - return stmts; } - return [bodyNode]; + return stmts; } + return [bodyNode]; +} - // ── Statement-level processing (replicates buildFunctionCFG logic) ── - // The visitor delegates to these for each control-flow construct, - // processing the body statements sequentially just like the original. +function makeFuncState() { + const blocks = []; + const edges = []; + let nextIndex = 0; - function processStatements(stmts, currentBlock) { - let cur = currentBlock; - for (const stmt of stmts) { - if (!cur) break; - cur = processStatement(stmt, cur); - } - return cur; + function makeBlock(type, startLine = null, endLine = null, label = null) { + const block = { index: nextIndex++, type, startLine, endLine, label }; + blocks.push(block); + return block; } - function processStatement(stmt, currentBlock) { - if (!stmt || !currentBlock) return currentBlock; + function addEdge(source, target, kind) { + edges.push({ sourceIndex: source.index, targetIndex: target.index, kind }); + } - // Unwrap expression_statement for Rust-style control flow expressions - const effNode = effectiveNode(stmt); - const type = effNode.type; + const entry = makeBlock('entry'); + const exit = makeBlock('exit'); + const firstBody = makeBlock('body'); + addEdge(entry, firstBody, 'fallthrough'); - // Labeled statement - if (type === cfgRules.labeledNode) { - return processLabeled(effNode, currentBlock); - } + return { + blocks, + edges, + makeBlock, + addEdge, + entryBlock: entry, + exitBlock: exit, + currentBlock: firstBody, + loopStack: [], + labelMap: new Map(), + cfgStack: [], + funcNode: null, + }; +} - // If / unless - if (isIfNode(type) || (cfgRules.unlessNode && type === cfgRules.unlessNode)) { - return processIf(effNode, currentBlock); - } +// ── Statement processors ──────────────────────────────────────────────── - // For loops - if (isForNode(type)) { - return processForLoop(effNode, currentBlock); - } +function processStatements(stmts, currentBlock, S, cfgRules) { + let cur = currentBlock; + for (const stmt of stmts) { + if (!cur) break; + cur = processStatement(stmt, cur, S, cfgRules); + } + return cur; +} - // While / until - if (isWhileNode(type) || (cfgRules.untilNode && type === cfgRules.untilNode)) { - return processWhileLoop(effNode, currentBlock); - } +function processStatement(stmt, currentBlock, S, cfgRules) { + if (!stmt || !currentBlock) return currentBlock; - // Do-while - if (cfgRules.doNode && type === cfgRules.doNode) { - return processDoWhileLoop(effNode, currentBlock); - } + const effNode = effectiveNode(stmt, cfgRules); + const type = effNode.type; - // Infinite loop (Rust) - if (cfgRules.infiniteLoopNode && type === cfgRules.infiniteLoopNode) { - return processInfiniteLoop(effNode, currentBlock); - } + if (type === cfgRules.labeledNode) { + return processLabeled(effNode, currentBlock, S, cfgRules); + } + if (isIfNode(type, cfgRules) || (cfgRules.unlessNode && type === cfgRules.unlessNode)) { + return processIf(effNode, currentBlock, S, cfgRules); + } + if (isForNode(type, cfgRules)) { + return processForLoop(effNode, currentBlock, S, cfgRules); + } + if (isWhileNode(type, cfgRules) || (cfgRules.untilNode && type === cfgRules.untilNode)) { + return processWhileLoop(effNode, currentBlock, S, cfgRules); + } + if (cfgRules.doNode && type === cfgRules.doNode) { + return processDoWhileLoop(effNode, currentBlock, S, cfgRules); + } + if (cfgRules.infiniteLoopNode && type === cfgRules.infiniteLoopNode) { + return processInfiniteLoop(effNode, currentBlock, S, cfgRules); + } + if (isSwitchNode(type, cfgRules)) { + return processSwitch(effNode, currentBlock, S, cfgRules); + } + if (cfgRules.tryNode && type === cfgRules.tryNode) { + return processTryCatch(effNode, currentBlock, S, cfgRules); + } + if (type === cfgRules.returnNode) { + currentBlock.endLine = effNode.startPosition.row + 1; + S.addEdge(currentBlock, S.exitBlock, 'return'); + return null; + } + if (type === cfgRules.throwNode) { + currentBlock.endLine = effNode.startPosition.row + 1; + S.addEdge(currentBlock, S.exitBlock, 'exception'); + return null; + } + if (type === cfgRules.breakNode) { + return processBreak(effNode, currentBlock, S); + } + if (type === cfgRules.continueNode) { + return processContinue(effNode, currentBlock, S); + } - // Switch / match - if (isSwitchNode(type)) { - return processSwitch(effNode, currentBlock); - } + // Regular statement — extend current block + if (!currentBlock.startLine) { + currentBlock.startLine = stmt.startPosition.row + 1; + } + currentBlock.endLine = stmt.endPosition.row + 1; + return currentBlock; +} - // Try/catch/finally - if (cfgRules.tryNode && type === cfgRules.tryNode) { - return processTryCatch(effNode, currentBlock); - } +// ── Labeled / break / continue ────────────────────────────────────────── + +function processLabeled(node, currentBlock, S, cfgRules) { + const labelNode = node.childForFieldName('label'); + const labelName = labelNode ? labelNode.text : null; + const body = node.childForFieldName('body'); + if (body && labelName) { + const labelCtx = { headerBlock: null, exitBlock: null }; + S.labelMap.set(labelName, labelCtx); + const result = processStatement(body, currentBlock, S, cfgRules); + S.labelMap.delete(labelName); + return result; + } + return currentBlock; +} - // Return - if (type === cfgRules.returnNode) { - currentBlock.endLine = effNode.startPosition.row + 1; - S.addEdge(currentBlock, S.exitBlock, 'return'); - return null; - } +function processBreak(node, currentBlock, S) { + const labelNode = node.childForFieldName('label'); + const labelName = labelNode ? labelNode.text : null; - // Throw - if (type === cfgRules.throwNode) { - currentBlock.endLine = effNode.startPosition.row + 1; - S.addEdge(currentBlock, S.exitBlock, 'exception'); - return null; - } + let target = null; + if (labelName && S.labelMap.has(labelName)) { + target = S.labelMap.get(labelName).exitBlock; + } else if (S.loopStack.length > 0) { + target = S.loopStack[S.loopStack.length - 1].exitBlock; + } - // Break - if (type === cfgRules.breakNode) { - return processBreak(effNode, currentBlock); - } + if (target) { + currentBlock.endLine = node.startPosition.row + 1; + S.addEdge(currentBlock, target, 'break'); + return null; + } + return currentBlock; +} - // Continue - if (type === cfgRules.continueNode) { - return processContinue(effNode, currentBlock); - } +function processContinue(node, currentBlock, S) { + const labelNode = node.childForFieldName('label'); + const labelName = labelNode ? labelNode.text : null; - // Regular statement — extend current block - if (!currentBlock.startLine) { - currentBlock.startLine = stmt.startPosition.row + 1; - } - currentBlock.endLine = stmt.endPosition.row + 1; - return currentBlock; - } - - function processLabeled(node, currentBlock) { - const labelNode = node.childForFieldName('label'); - const labelName = labelNode ? labelNode.text : null; - const body = node.childForFieldName('body'); - if (body && labelName) { - const labelCtx = { headerBlock: null, exitBlock: null }; - S.labelMap.set(labelName, labelCtx); - const result = processStatement(body, currentBlock); - S.labelMap.delete(labelName); - return result; - } - return currentBlock; + let target = null; + if (labelName && S.labelMap.has(labelName)) { + target = S.labelMap.get(labelName).headerBlock; + } else if (S.loopStack.length > 0) { + target = S.loopStack[S.loopStack.length - 1].headerBlock; } - function processBreak(node, currentBlock) { - const labelNode = node.childForFieldName('label'); - const labelName = labelNode ? labelNode.text : null; + if (target) { + currentBlock.endLine = node.startPosition.row + 1; + S.addEdge(currentBlock, target, 'continue'); + return null; + } + return currentBlock; +} - let target = null; - if (labelName && S.labelMap.has(labelName)) { - target = S.labelMap.get(labelName).exitBlock; - } else if (S.loopStack.length > 0) { - target = S.loopStack[S.loopStack.length - 1].exitBlock; - } +// ── If / else-if / else ───────────────────────────────────────────────── + +function processIf(ifStmt, currentBlock, S, cfgRules) { + currentBlock.endLine = ifStmt.startPosition.row + 1; + + const condBlock = S.makeBlock( + 'condition', + ifStmt.startPosition.row + 1, + ifStmt.startPosition.row + 1, + 'if', + ); + S.addEdge(currentBlock, condBlock, 'fallthrough'); + + const joinBlock = S.makeBlock('body'); + + // True branch + const consequentField = cfgRules.ifConsequentField || 'consequence'; + const consequent = ifStmt.childForFieldName(consequentField); + const trueBlock = S.makeBlock('branch_true', null, null, 'then'); + S.addEdge(condBlock, trueBlock, 'branch_true'); + const trueStmts = getBodyStatements(consequent, cfgRules); + const trueEnd = processStatements(trueStmts, trueBlock, S, cfgRules); + if (trueEnd) { + S.addEdge(trueEnd, joinBlock, 'fallthrough'); + } - if (target) { - currentBlock.endLine = node.startPosition.row + 1; - S.addEdge(currentBlock, target, 'break'); - return null; - } - return currentBlock; + // False branch + if (cfgRules.elifNode) { + processElifSiblings(ifStmt, condBlock, joinBlock, S, cfgRules); + } else { + processAlternative(ifStmt, condBlock, joinBlock, S, cfgRules); } - function processContinue(node, currentBlock) { - const labelNode = node.childForFieldName('label'); - const labelName = labelNode ? labelNode.text : null; + return joinBlock; +} - let target = null; - if (labelName && S.labelMap.has(labelName)) { - target = S.labelMap.get(labelName).headerBlock; - } else if (S.loopStack.length > 0) { - target = S.loopStack[S.loopStack.length - 1].headerBlock; - } +function processAlternative(ifStmt, condBlock, joinBlock, S, cfgRules) { + const alternative = ifStmt.childForFieldName('alternative'); + if (!alternative) { + S.addEdge(condBlock, joinBlock, 'branch_false'); + return; + } - if (target) { - currentBlock.endLine = node.startPosition.row + 1; - S.addEdge(currentBlock, target, 'continue'); - return null; + if (cfgRules.elseViaAlternative && alternative.type !== cfgRules.elseClause) { + // Pattern C: direct alternative (Go, Java, C#) + if (isIfNode(alternative.type, cfgRules)) { + const falseBlock = S.makeBlock('branch_false', null, null, 'else-if'); + S.addEdge(condBlock, falseBlock, 'branch_false'); + const elseIfEnd = processIf(alternative, falseBlock, S, cfgRules); + if (elseIfEnd) S.addEdge(elseIfEnd, joinBlock, 'fallthrough'); + } else { + const falseBlock = S.makeBlock('branch_false', null, null, 'else'); + S.addEdge(condBlock, falseBlock, 'branch_false'); + const falseStmts = getBodyStatements(alternative, cfgRules); + const falseEnd = processStatements(falseStmts, falseBlock, S, cfgRules); + if (falseEnd) S.addEdge(falseEnd, joinBlock, 'fallthrough'); + } + } else if (alternative.type === cfgRules.elseClause) { + // Pattern A: else_clause wrapper (JS/TS, Rust) + const elseChildren = []; + for (let i = 0; i < alternative.namedChildCount; i++) { + elseChildren.push(alternative.namedChild(i)); + } + if (elseChildren.length === 1 && isIfNode(elseChildren[0].type, cfgRules)) { + const falseBlock = S.makeBlock('branch_false', null, null, 'else-if'); + S.addEdge(condBlock, falseBlock, 'branch_false'); + const elseIfEnd = processIf(elseChildren[0], falseBlock, S, cfgRules); + if (elseIfEnd) S.addEdge(elseIfEnd, joinBlock, 'fallthrough'); + } else { + const falseBlock = S.makeBlock('branch_false', null, null, 'else'); + S.addEdge(condBlock, falseBlock, 'branch_false'); + const falseEnd = processStatements(elseChildren, falseBlock, S, cfgRules); + if (falseEnd) S.addEdge(falseEnd, joinBlock, 'fallthrough'); } - return currentBlock; } +} - // ── If/else-if/else ───────────────────────────────────────────────── - - function processIf(ifStmt, currentBlock) { - currentBlock.endLine = ifStmt.startPosition.row + 1; +function processElifSiblings(ifStmt, firstCondBlock, joinBlock, S, cfgRules) { + let lastCondBlock = firstCondBlock; + let foundElse = false; - const condBlock = S.makeBlock( - 'condition', - ifStmt.startPosition.row + 1, - ifStmt.startPosition.row + 1, - 'if', - ); - S.addEdge(currentBlock, condBlock, 'fallthrough'); - - const joinBlock = S.makeBlock('body'); - - // True branch - const consequentField = cfgRules.ifConsequentField || 'consequence'; - const consequent = ifStmt.childForFieldName(consequentField); - const trueBlock = S.makeBlock('branch_true', null, null, 'then'); - S.addEdge(condBlock, trueBlock, 'branch_true'); - const trueStmts = getBodyStatements(consequent); - const trueEnd = processStatements(trueStmts, trueBlock); - if (trueEnd) { - S.addEdge(trueEnd, joinBlock, 'fallthrough'); - } + for (let i = 0; i < ifStmt.namedChildCount; i++) { + const child = ifStmt.namedChild(i); - // False branch - if (cfgRules.elifNode) { - processElifSiblings(ifStmt, condBlock, joinBlock); - } else { - const alternative = ifStmt.childForFieldName('alternative'); - if (alternative) { - if (cfgRules.elseViaAlternative && alternative.type !== cfgRules.elseClause) { - // Pattern C: direct alternative (Go, Java, C#) - if (isIfNode(alternative.type)) { - const falseBlock = S.makeBlock('branch_false', null, null, 'else-if'); - S.addEdge(condBlock, falseBlock, 'branch_false'); - const elseIfEnd = processIf(alternative, falseBlock); - if (elseIfEnd) S.addEdge(elseIfEnd, joinBlock, 'fallthrough'); - } else { - const falseBlock = S.makeBlock('branch_false', null, null, 'else'); - S.addEdge(condBlock, falseBlock, 'branch_false'); - const falseStmts = getBodyStatements(alternative); - const falseEnd = processStatements(falseStmts, falseBlock); - if (falseEnd) S.addEdge(falseEnd, joinBlock, 'fallthrough'); - } - } else if (alternative.type === cfgRules.elseClause) { - // Pattern A: else_clause wrapper (JS/TS, Rust) - const elseChildren = []; - for (let i = 0; i < alternative.namedChildCount; i++) { - elseChildren.push(alternative.namedChild(i)); - } - if (elseChildren.length === 1 && isIfNode(elseChildren[0].type)) { - const falseBlock = S.makeBlock('branch_false', null, null, 'else-if'); - S.addEdge(condBlock, falseBlock, 'branch_false'); - const elseIfEnd = processIf(elseChildren[0], falseBlock); - if (elseIfEnd) S.addEdge(elseIfEnd, joinBlock, 'fallthrough'); - } else { - const falseBlock = S.makeBlock('branch_false', null, null, 'else'); - S.addEdge(condBlock, falseBlock, 'branch_false'); - const falseEnd = processStatements(elseChildren, falseBlock); - if (falseEnd) S.addEdge(falseEnd, joinBlock, 'fallthrough'); - } - } + if (child.type === cfgRules.elifNode) { + const elifCondBlock = S.makeBlock( + 'condition', + child.startPosition.row + 1, + child.startPosition.row + 1, + 'else-if', + ); + S.addEdge(lastCondBlock, elifCondBlock, 'branch_false'); + + const elifConsequentField = cfgRules.ifConsequentField || 'consequence'; + const elifConsequent = child.childForFieldName(elifConsequentField); + const elifTrueBlock = S.makeBlock('branch_true', null, null, 'then'); + S.addEdge(elifCondBlock, elifTrueBlock, 'branch_true'); + const elifTrueStmts = getBodyStatements(elifConsequent, cfgRules); + const elifTrueEnd = processStatements(elifTrueStmts, elifTrueBlock, S, cfgRules); + if (elifTrueEnd) S.addEdge(elifTrueEnd, joinBlock, 'fallthrough'); + + lastCondBlock = elifCondBlock; + } else if (child.type === cfgRules.elseClause) { + const elseBlock = S.makeBlock('branch_false', null, null, 'else'); + S.addEdge(lastCondBlock, elseBlock, 'branch_false'); + + const elseBody = child.childForFieldName('body'); + let elseStmts; + if (elseBody) { + elseStmts = getBodyStatements(elseBody, cfgRules); } else { - // No else - S.addEdge(condBlock, joinBlock, 'branch_false'); - } - } - - return joinBlock; - } - - function processElifSiblings(ifStmt, firstCondBlock, joinBlock) { - let lastCondBlock = firstCondBlock; - let foundElse = false; - - for (let i = 0; i < ifStmt.namedChildCount; i++) { - const child = ifStmt.namedChild(i); - - if (child.type === cfgRules.elifNode) { - const elifCondBlock = S.makeBlock( - 'condition', - child.startPosition.row + 1, - child.startPosition.row + 1, - 'else-if', - ); - S.addEdge(lastCondBlock, elifCondBlock, 'branch_false'); - - const elifConsequentField = cfgRules.ifConsequentField || 'consequence'; - const elifConsequent = child.childForFieldName(elifConsequentField); - const elifTrueBlock = S.makeBlock('branch_true', null, null, 'then'); - S.addEdge(elifCondBlock, elifTrueBlock, 'branch_true'); - const elifTrueStmts = getBodyStatements(elifConsequent); - const elifTrueEnd = processStatements(elifTrueStmts, elifTrueBlock); - if (elifTrueEnd) S.addEdge(elifTrueEnd, joinBlock, 'fallthrough'); - - lastCondBlock = elifCondBlock; - } else if (child.type === cfgRules.elseClause) { - const elseBlock = S.makeBlock('branch_false', null, null, 'else'); - S.addEdge(lastCondBlock, elseBlock, 'branch_false'); - - const elseBody = child.childForFieldName('body'); - let elseStmts; - if (elseBody) { - elseStmts = getBodyStatements(elseBody); - } else { - elseStmts = []; - for (let j = 0; j < child.namedChildCount; j++) { - elseStmts.push(child.namedChild(j)); - } + elseStmts = []; + for (let j = 0; j < child.namedChildCount; j++) { + elseStmts.push(child.namedChild(j)); } - const elseEnd = processStatements(elseStmts, elseBlock); - if (elseEnd) S.addEdge(elseEnd, joinBlock, 'fallthrough'); - - foundElse = true; } - } + const elseEnd = processStatements(elseStmts, elseBlock, S, cfgRules); + if (elseEnd) S.addEdge(elseEnd, joinBlock, 'fallthrough'); - if (!foundElse) { - S.addEdge(lastCondBlock, joinBlock, 'branch_false'); + foundElse = true; } } - // ── Loops ─────────────────────────────────────────────────────────── - - function processForLoop(forStmt, currentBlock) { - const headerBlock = S.makeBlock( - 'loop_header', - forStmt.startPosition.row + 1, - forStmt.startPosition.row + 1, - 'for', - ); - S.addEdge(currentBlock, headerBlock, 'fallthrough'); - - const loopExitBlock = S.makeBlock('body'); - const loopCtx = { headerBlock, exitBlock: loopExitBlock }; - S.loopStack.push(loopCtx); - registerLabelCtx(headerBlock, loopExitBlock); + if (!foundElse) { + S.addEdge(lastCondBlock, joinBlock, 'branch_false'); + } +} - const body = forStmt.childForFieldName('body'); - const bodyBlock = S.makeBlock('loop_body'); - S.addEdge(headerBlock, bodyBlock, 'branch_true'); +// ── Loops ─────────────────────────────────────────────────────────────── + +function processForLoop(forStmt, currentBlock, S, cfgRules) { + const headerBlock = S.makeBlock( + 'loop_header', + forStmt.startPosition.row + 1, + forStmt.startPosition.row + 1, + 'for', + ); + S.addEdge(currentBlock, headerBlock, 'fallthrough'); + + const loopExitBlock = S.makeBlock('body'); + const loopCtx = { headerBlock, exitBlock: loopExitBlock }; + S.loopStack.push(loopCtx); + registerLabelCtx(S, headerBlock, loopExitBlock); + + const body = forStmt.childForFieldName('body'); + const bodyBlock = S.makeBlock('loop_body'); + S.addEdge(headerBlock, bodyBlock, 'branch_true'); + + const bodyStmts = getBodyStatements(body, cfgRules); + const bodyEnd = processStatements(bodyStmts, bodyBlock, S, cfgRules); + if (bodyEnd) S.addEdge(bodyEnd, headerBlock, 'loop_back'); + + S.addEdge(headerBlock, loopExitBlock, 'loop_exit'); + S.loopStack.pop(); + return loopExitBlock; +} - const bodyStmts = getBodyStatements(body); - const bodyEnd = processStatements(bodyStmts, bodyBlock); - if (bodyEnd) S.addEdge(bodyEnd, headerBlock, 'loop_back'); +function processWhileLoop(whileStmt, currentBlock, S, cfgRules) { + const headerBlock = S.makeBlock( + 'loop_header', + whileStmt.startPosition.row + 1, + whileStmt.startPosition.row + 1, + 'while', + ); + S.addEdge(currentBlock, headerBlock, 'fallthrough'); + + const loopExitBlock = S.makeBlock('body'); + const loopCtx = { headerBlock, exitBlock: loopExitBlock }; + S.loopStack.push(loopCtx); + registerLabelCtx(S, headerBlock, loopExitBlock); + + const body = whileStmt.childForFieldName('body'); + const bodyBlock = S.makeBlock('loop_body'); + S.addEdge(headerBlock, bodyBlock, 'branch_true'); + + const bodyStmts = getBodyStatements(body, cfgRules); + const bodyEnd = processStatements(bodyStmts, bodyBlock, S, cfgRules); + if (bodyEnd) S.addEdge(bodyEnd, headerBlock, 'loop_back'); + + S.addEdge(headerBlock, loopExitBlock, 'loop_exit'); + S.loopStack.pop(); + return loopExitBlock; +} - S.addEdge(headerBlock, loopExitBlock, 'loop_exit'); - S.loopStack.pop(); - return loopExitBlock; - } +function processDoWhileLoop(doStmt, currentBlock, S, cfgRules) { + const bodyBlock = S.makeBlock('loop_body', doStmt.startPosition.row + 1, null, 'do'); + S.addEdge(currentBlock, bodyBlock, 'fallthrough'); - function processWhileLoop(whileStmt, currentBlock) { - const headerBlock = S.makeBlock( - 'loop_header', - whileStmt.startPosition.row + 1, - whileStmt.startPosition.row + 1, - 'while', - ); - S.addEdge(currentBlock, headerBlock, 'fallthrough'); + const condBlock = S.makeBlock('loop_header', null, null, 'do-while'); + const loopExitBlock = S.makeBlock('body'); - const loopExitBlock = S.makeBlock('body'); - const loopCtx = { headerBlock, exitBlock: loopExitBlock }; - S.loopStack.push(loopCtx); - registerLabelCtx(headerBlock, loopExitBlock); + const loopCtx = { headerBlock: condBlock, exitBlock: loopExitBlock }; + S.loopStack.push(loopCtx); + registerLabelCtx(S, condBlock, loopExitBlock); - const body = whileStmt.childForFieldName('body'); - const bodyBlock = S.makeBlock('loop_body'); - S.addEdge(headerBlock, bodyBlock, 'branch_true'); + const body = doStmt.childForFieldName('body'); + const bodyStmts = getBodyStatements(body, cfgRules); + const bodyEnd = processStatements(bodyStmts, bodyBlock, S, cfgRules); + if (bodyEnd) S.addEdge(bodyEnd, condBlock, 'fallthrough'); - const bodyStmts = getBodyStatements(body); - const bodyEnd = processStatements(bodyStmts, bodyBlock); - if (bodyEnd) S.addEdge(bodyEnd, headerBlock, 'loop_back'); + S.addEdge(condBlock, bodyBlock, 'loop_back'); + S.addEdge(condBlock, loopExitBlock, 'loop_exit'); - S.addEdge(headerBlock, loopExitBlock, 'loop_exit'); - S.loopStack.pop(); - return loopExitBlock; - } + S.loopStack.pop(); + return loopExitBlock; +} - function processDoWhileLoop(doStmt, currentBlock) { - const bodyBlock = S.makeBlock('loop_body', doStmt.startPosition.row + 1, null, 'do'); - S.addEdge(currentBlock, bodyBlock, 'fallthrough'); +function processInfiniteLoop(loopStmt, currentBlock, S, cfgRules) { + const headerBlock = S.makeBlock( + 'loop_header', + loopStmt.startPosition.row + 1, + loopStmt.startPosition.row + 1, + 'loop', + ); + S.addEdge(currentBlock, headerBlock, 'fallthrough'); + + const loopExitBlock = S.makeBlock('body'); + const loopCtx = { headerBlock, exitBlock: loopExitBlock }; + S.loopStack.push(loopCtx); + registerLabelCtx(S, headerBlock, loopExitBlock); + + const body = loopStmt.childForFieldName('body'); + const bodyBlock = S.makeBlock('loop_body'); + S.addEdge(headerBlock, bodyBlock, 'branch_true'); + + const bodyStmts = getBodyStatements(body, cfgRules); + const bodyEnd = processStatements(bodyStmts, bodyBlock, S, cfgRules); + if (bodyEnd) S.addEdge(bodyEnd, headerBlock, 'loop_back'); + + // No loop_exit from header — only via break + S.loopStack.pop(); + return loopExitBlock; +} - const condBlock = S.makeBlock('loop_header', null, null, 'do-while'); - const loopExitBlock = S.makeBlock('body'); +// ── Switch / match ────────────────────────────────────────────────────── - const loopCtx = { headerBlock: condBlock, exitBlock: loopExitBlock }; - S.loopStack.push(loopCtx); - registerLabelCtx(condBlock, loopExitBlock); +function processSwitch(switchStmt, currentBlock, S, cfgRules) { + currentBlock.endLine = switchStmt.startPosition.row + 1; - const body = doStmt.childForFieldName('body'); - const bodyStmts = getBodyStatements(body); - const bodyEnd = processStatements(bodyStmts, bodyBlock); - if (bodyEnd) S.addEdge(bodyEnd, condBlock, 'fallthrough'); + const switchHeader = S.makeBlock( + 'condition', + switchStmt.startPosition.row + 1, + switchStmt.startPosition.row + 1, + 'switch', + ); + S.addEdge(currentBlock, switchHeader, 'fallthrough'); - S.addEdge(condBlock, bodyBlock, 'loop_back'); - S.addEdge(condBlock, loopExitBlock, 'loop_exit'); + const joinBlock = S.makeBlock('body'); + const switchCtx = { headerBlock: switchHeader, exitBlock: joinBlock }; + S.loopStack.push(switchCtx); - S.loopStack.pop(); - return loopExitBlock; - } + const switchBody = switchStmt.childForFieldName('body'); + const container = switchBody || switchStmt; - function processInfiniteLoop(loopStmt, currentBlock) { - const headerBlock = S.makeBlock( - 'loop_header', - loopStmt.startPosition.row + 1, - loopStmt.startPosition.row + 1, - 'loop', - ); - S.addEdge(currentBlock, headerBlock, 'fallthrough'); + let hasDefault = false; + for (let i = 0; i < container.namedChildCount; i++) { + const caseClause = container.namedChild(i); - const loopExitBlock = S.makeBlock('body'); - const loopCtx = { headerBlock, exitBlock: loopExitBlock }; - S.loopStack.push(loopCtx); - registerLabelCtx(headerBlock, loopExitBlock); + const isDefault = caseClause.type === cfgRules.defaultNode; + const isCase = isDefault || isCaseNode(caseClause.type, cfgRules); + if (!isCase) continue; - const body = loopStmt.childForFieldName('body'); - const bodyBlock = S.makeBlock('loop_body'); - S.addEdge(headerBlock, bodyBlock, 'branch_true'); + const caseLabel = isDefault ? 'default' : 'case'; + const caseBlock = S.makeBlock('case', caseClause.startPosition.row + 1, null, caseLabel); + S.addEdge(switchHeader, caseBlock, isDefault ? 'branch_false' : 'branch_true'); + if (isDefault) hasDefault = true; - const bodyStmts = getBodyStatements(body); - const bodyEnd = processStatements(bodyStmts, bodyBlock); - if (bodyEnd) S.addEdge(bodyEnd, headerBlock, 'loop_back'); + const caseStmts = extractCaseBody(caseClause, cfgRules); + const caseEnd = processStatements(caseStmts, caseBlock, S, cfgRules); + if (caseEnd) S.addEdge(caseEnd, joinBlock, 'fallthrough'); + } - // No loop_exit from header — only via break - S.loopStack.pop(); - return loopExitBlock; + if (!hasDefault) { + S.addEdge(switchHeader, joinBlock, 'branch_false'); } - // ── Switch / match ────────────────────────────────────────────────── + S.loopStack.pop(); + return joinBlock; +} - function processSwitch(switchStmt, currentBlock) { - currentBlock.endLine = switchStmt.startPosition.row + 1; +function extractCaseBody(caseClause, cfgRules) { + const caseBodyNode = + caseClause.childForFieldName('body') || caseClause.childForFieldName('consequence'); + if (caseBodyNode) { + return getBodyStatements(caseBodyNode, cfgRules); + } - const switchHeader = S.makeBlock( - 'condition', - switchStmt.startPosition.row + 1, - switchStmt.startPosition.row + 1, - 'switch', - ); - S.addEdge(currentBlock, switchHeader, 'fallthrough'); - - const joinBlock = S.makeBlock('body'); - const switchCtx = { headerBlock: switchHeader, exitBlock: joinBlock }; - S.loopStack.push(switchCtx); - - const switchBody = switchStmt.childForFieldName('body'); - const container = switchBody || switchStmt; - - let hasDefault = false; - for (let i = 0; i < container.namedChildCount; i++) { - const caseClause = container.namedChild(i); - - const isDefault = caseClause.type === cfgRules.defaultNode; - const isCase = isDefault || isCaseNode(caseClause.type); - if (!isCase) continue; - - const caseLabel = isDefault ? 'default' : 'case'; - const caseBlock = S.makeBlock('case', caseClause.startPosition.row + 1, null, caseLabel); - S.addEdge(switchHeader, caseBlock, isDefault ? 'branch_false' : 'branch_true'); - if (isDefault) hasDefault = true; - - // Extract case body - const caseBodyNode = - caseClause.childForFieldName('body') || caseClause.childForFieldName('consequence'); - let caseStmts; - if (caseBodyNode) { - caseStmts = getBodyStatements(caseBodyNode); - } else { - caseStmts = []; - const valueNode = caseClause.childForFieldName('value'); - const patternNode = caseClause.childForFieldName('pattern'); - for (let j = 0; j < caseClause.namedChildCount; j++) { - const child = caseClause.namedChild(j); - if (child !== valueNode && child !== patternNode && child.type !== 'switch_label') { - if (child.type === 'statement_list') { - for (let k = 0; k < child.namedChildCount; k++) { - caseStmts.push(child.namedChild(k)); - } - } else { - caseStmts.push(child); - } - } + const stmts = []; + const valueNode = caseClause.childForFieldName('value'); + const patternNode = caseClause.childForFieldName('pattern'); + for (let j = 0; j < caseClause.namedChildCount; j++) { + const child = caseClause.namedChild(j); + if (child !== valueNode && child !== patternNode && child.type !== 'switch_label') { + if (child.type === 'statement_list') { + for (let k = 0; k < child.namedChildCount; k++) { + stmts.push(child.namedChild(k)); } + } else { + stmts.push(child); } - - const caseEnd = processStatements(caseStmts, caseBlock); - if (caseEnd) S.addEdge(caseEnd, joinBlock, 'fallthrough'); } + } + return stmts; +} - if (!hasDefault) { - S.addEdge(switchHeader, joinBlock, 'branch_false'); - } +// ── Try / catch / finally ─────────────────────────────────────────────── + +function processTryCatch(tryStmt, currentBlock, S, cfgRules) { + currentBlock.endLine = tryStmt.startPosition.row + 1; - S.loopStack.pop(); - return joinBlock; + const joinBlock = S.makeBlock('body'); + + // Try body + const tryBody = tryStmt.childForFieldName('body'); + let tryBodyStart; + let tryStmts; + if (tryBody) { + tryBodyStart = tryBody.startPosition.row + 1; + tryStmts = getBodyStatements(tryBody, cfgRules); + } else { + tryBodyStart = tryStmt.startPosition.row + 1; + tryStmts = []; + for (let i = 0; i < tryStmt.namedChildCount; i++) { + const child = tryStmt.namedChild(i); + if (cfgRules.catchNode && child.type === cfgRules.catchNode) continue; + if (cfgRules.finallyNode && child.type === cfgRules.finallyNode) continue; + tryStmts.push(child); + } } - // ── Try/catch/finally ─────────────────────────────────────────────── + const tryBlock = S.makeBlock('body', tryBodyStart, null, 'try'); + S.addEdge(currentBlock, tryBlock, 'fallthrough'); + const tryEnd = processStatements(tryStmts, tryBlock, S, cfgRules); - function processTryCatch(tryStmt, currentBlock) { - currentBlock.endLine = tryStmt.startPosition.row + 1; + // Find catch and finally handlers + const { catchHandler, finallyHandler } = findTryHandlers(tryStmt, cfgRules); - const joinBlock = S.makeBlock('body'); + if (catchHandler) { + processCatchHandler(catchHandler, tryBlock, tryEnd, finallyHandler, joinBlock, S, cfgRules); + } else if (finallyHandler) { + processFinallyOnly(finallyHandler, tryEnd, joinBlock, S, cfgRules); + } else { + if (tryEnd) S.addEdge(tryEnd, joinBlock, 'fallthrough'); + } - // Try body - const tryBody = tryStmt.childForFieldName('body'); - let tryBodyStart; - let tryStmts; - if (tryBody) { - tryBodyStart = tryBody.startPosition.row + 1; - tryStmts = getBodyStatements(tryBody); - } else { - tryBodyStart = tryStmt.startPosition.row + 1; - tryStmts = []; - for (let i = 0; i < tryStmt.namedChildCount; i++) { - const child = tryStmt.namedChild(i); - if (cfgRules.catchNode && child.type === cfgRules.catchNode) continue; - if (cfgRules.finallyNode && child.type === cfgRules.finallyNode) continue; - tryStmts.push(child); - } - } + return joinBlock; +} - const tryBlock = S.makeBlock('body', tryBodyStart, null, 'try'); - S.addEdge(currentBlock, tryBlock, 'fallthrough'); - const tryEnd = processStatements(tryStmts, tryBlock); +function findTryHandlers(tryStmt, cfgRules) { + let catchHandler = null; + let finallyHandler = null; + for (let i = 0; i < tryStmt.namedChildCount; i++) { + const child = tryStmt.namedChild(i); + if (cfgRules.catchNode && child.type === cfgRules.catchNode) catchHandler = child; + if (cfgRules.finallyNode && child.type === cfgRules.finallyNode) finallyHandler = child; + } + return { catchHandler, finallyHandler }; +} - // Find catch and finally handlers - let catchHandler = null; - let finallyHandler = null; - for (let i = 0; i < tryStmt.namedChildCount; i++) { - const child = tryStmt.namedChild(i); - if (cfgRules.catchNode && child.type === cfgRules.catchNode) catchHandler = child; - if (cfgRules.finallyNode && child.type === cfgRules.finallyNode) finallyHandler = child; +function processCatchHandler( + catchHandler, + tryBlock, + tryEnd, + finallyHandler, + joinBlock, + S, + cfgRules, +) { + const catchBlock = S.makeBlock('catch', catchHandler.startPosition.row + 1, null, 'catch'); + S.addEdge(tryBlock, catchBlock, 'exception'); + + const catchBodyNode = catchHandler.childForFieldName('body'); + let catchStmts; + if (catchBodyNode) { + catchStmts = getBodyStatements(catchBodyNode, cfgRules); + } else { + catchStmts = []; + for (let i = 0; i < catchHandler.namedChildCount; i++) { + catchStmts.push(catchHandler.namedChild(i)); } + } + const catchEnd = processStatements(catchStmts, catchBlock, S, cfgRules); + + if (finallyHandler) { + const finallyBlock = S.makeBlock( + 'finally', + finallyHandler.startPosition.row + 1, + null, + 'finally', + ); + if (tryEnd) S.addEdge(tryEnd, finallyBlock, 'fallthrough'); + if (catchEnd) S.addEdge(catchEnd, finallyBlock, 'fallthrough'); + + const finallyBodyNode = finallyHandler.childForFieldName('body'); + const finallyStmts = finallyBodyNode + ? getBodyStatements(finallyBodyNode, cfgRules) + : getBodyStatements(finallyHandler, cfgRules); + const finallyEnd = processStatements(finallyStmts, finallyBlock, S, cfgRules); + if (finallyEnd) S.addEdge(finallyEnd, joinBlock, 'fallthrough'); + } else { + if (tryEnd) S.addEdge(tryEnd, joinBlock, 'fallthrough'); + if (catchEnd) S.addEdge(catchEnd, joinBlock, 'fallthrough'); + } +} - if (catchHandler) { - const catchBlock = S.makeBlock('catch', catchHandler.startPosition.row + 1, null, 'catch'); - S.addEdge(tryBlock, catchBlock, 'exception'); +function processFinallyOnly(finallyHandler, tryEnd, joinBlock, S, cfgRules) { + const finallyBlock = S.makeBlock( + 'finally', + finallyHandler.startPosition.row + 1, + null, + 'finally', + ); + if (tryEnd) S.addEdge(tryEnd, finallyBlock, 'fallthrough'); + + const finallyBodyNode = finallyHandler.childForFieldName('body'); + const finallyStmts = finallyBodyNode + ? getBodyStatements(finallyBodyNode, cfgRules) + : getBodyStatements(finallyHandler, cfgRules); + const finallyEnd = processStatements(finallyStmts, finallyBlock, S, cfgRules); + if (finallyEnd) S.addEdge(finallyEnd, joinBlock, 'fallthrough'); +} - const catchBodyNode = catchHandler.childForFieldName('body'); - let catchStmts; - if (catchBodyNode) { - catchStmts = getBodyStatements(catchBodyNode); - } else { - catchStmts = []; - for (let i = 0; i < catchHandler.namedChildCount; i++) { - catchStmts.push(catchHandler.namedChild(i)); - } - } - const catchEnd = processStatements(catchStmts, catchBlock); - - if (finallyHandler) { - const finallyBlock = S.makeBlock( - 'finally', - finallyHandler.startPosition.row + 1, - null, - 'finally', - ); - if (tryEnd) S.addEdge(tryEnd, finallyBlock, 'fallthrough'); - if (catchEnd) S.addEdge(catchEnd, finallyBlock, 'fallthrough'); - - const finallyBodyNode = finallyHandler.childForFieldName('body'); - const finallyStmts = finallyBodyNode - ? getBodyStatements(finallyBodyNode) - : getBodyStatements(finallyHandler); - const finallyEnd = processStatements(finallyStmts, finallyBlock); - if (finallyEnd) S.addEdge(finallyEnd, joinBlock, 'fallthrough'); - } else { - if (tryEnd) S.addEdge(tryEnd, joinBlock, 'fallthrough'); - if (catchEnd) S.addEdge(catchEnd, joinBlock, 'fallthrough'); - } - } else if (finallyHandler) { - const finallyBlock = S.makeBlock( - 'finally', - finallyHandler.startPosition.row + 1, - null, - 'finally', - ); - if (tryEnd) S.addEdge(tryEnd, finallyBlock, 'fallthrough'); - - const finallyBodyNode = finallyHandler.childForFieldName('body'); - const finallyStmts = finallyBodyNode - ? getBodyStatements(finallyBodyNode) - : getBodyStatements(finallyHandler); - const finallyEnd = processStatements(finallyStmts, finallyBlock); - if (finallyEnd) S.addEdge(finallyEnd, joinBlock, 'fallthrough'); - } else { - if (tryEnd) S.addEdge(tryEnd, joinBlock, 'fallthrough'); - } +// ── Enter-function body processing ────────────────────────────────────── + +function processFunctionBody(funcNode, S, cfgRules) { + const body = funcNode.childForFieldName('body'); + if (!body) { + // No body — entry → exit + S.blocks.length = 2; + S.edges.length = 0; + S.addEdge(S.entryBlock, S.exitBlock, 'fallthrough'); + S.currentBlock = null; + return; + } - return joinBlock; + if (!isBlockNode(body.type, cfgRules)) { + // Expression body (e.g., arrow function `(x) => x + 1`) + const bodyBlock = S.blocks[2]; + bodyBlock.startLine = body.startPosition.row + 1; + bodyBlock.endLine = body.endPosition.row + 1; + S.addEdge(bodyBlock, S.exitBlock, 'fallthrough'); + S.currentBlock = null; + return; } - // ── Visitor interface ─────────────────────────────────────────────── + // Block body — process statements + const stmts = getBodyStatements(body, cfgRules); + if (stmts.length === 0) { + S.blocks.length = 2; + S.edges.length = 0; + S.addEdge(S.entryBlock, S.exitBlock, 'fallthrough'); + S.currentBlock = null; + return; + } + + const firstBody = S.blocks[2]; + const lastBlock = processStatements(stmts, firstBody, S, cfgRules); + if (lastBlock) { + S.addEdge(lastBlock, S.exitBlock, 'fallthrough'); + } + S.currentBlock = null; +} + +// ── Visitor factory ───────────────────────────────────────────────────── + +/** + * Create a CFG visitor for use with walkWithVisitors. + * + * @param {object} cfgRules - CFG_RULES for the language + * @returns {Visitor} + */ +export function createCfgVisitor(cfgRules) { + const funcStateStack = []; + let S = null; + const results = []; return { name: 'cfg', functionNodeTypes: cfgRules.functionNodes, enterFunction(funcNode, _funcName, _context) { - if (S) { - // Nested function — push current state - funcStateStack.push(S); - } + if (S) funcStateStack.push(S); S = makeFuncState(); S.funcNode = funcNode; - - // Check for expression body (arrow functions): no block body - const body = funcNode.childForFieldName('body'); - if (!body) { - // No body at all — entry → exit - // Remove the firstBody block and its edge - S.blocks.length = 2; // keep entry + exit - S.edges.length = 0; - S.addEdge(S.entryBlock, S.exitBlock, 'fallthrough'); - S.currentBlock = null; - return; - } - - if (!isBlockNode(body.type)) { - // Expression body (e.g., arrow function `(x) => x + 1`) - // entry → body → exit (body is the expression) - const bodyBlock = S.blocks[2]; // the firstBody we already created - bodyBlock.startLine = body.startPosition.row + 1; - bodyBlock.endLine = body.endPosition.row + 1; - S.addEdge(bodyBlock, S.exitBlock, 'fallthrough'); - S.currentBlock = null; // no further processing needed - return; - } - - // Block body — process statements - const stmts = getBodyStatements(body); - if (stmts.length === 0) { - // Empty function - S.blocks.length = 2; - S.edges.length = 0; - S.addEdge(S.entryBlock, S.exitBlock, 'fallthrough'); - S.currentBlock = null; - return; - } - - // Process all body statements using the statement-level processor - const firstBody = S.blocks[2]; // the firstBody block - const lastBlock = processStatements(stmts, firstBody); - if (lastBlock) { - S.addEdge(lastBlock, S.exitBlock, 'fallthrough'); - } - S.currentBlock = null; // done processing + processFunctionBody(funcNode, S, cfgRules); }, exitFunction(funcNode, _funcName, _context) { if (S && S.funcNode === funcNode) { - // Derive cyclomatic complexity from CFG: E - N + 2 const cyclomatic = S.edges.length - S.blocks.length + 2; results.push({ funcNode: S.funcNode, @@ -768,21 +758,17 @@ export function createCfgVisitor(cfgRules) { cyclomatic: Math.max(cyclomatic, 1), }); } - - // Pop to parent function state (if nested) S = funcStateStack.length > 0 ? funcStateStack.pop() : null; }, enterNode(_node, _context) { - // No-op — all CFG construction is done in enterFunction via - // processStatements. We intentionally do NOT return skipChildren here - // so that the walker still recurses into children, allowing nested - // function definitions to trigger enterFunction/exitFunction and get - // their own CFG computed via the funcStateStack. + // No-op — all CFG construction is done in enterFunction via processStatements. + // We intentionally do NOT return skipChildren so the walker recurses into + // children, allowing nested functions to trigger enterFunction/exitFunction. }, exitNode(_node, _context) { - // No-op — all work done in enterFunction/exitFunction + // No-op }, finish() { diff --git a/src/ast-analysis/visitors/complexity-visitor.js b/src/ast-analysis/visitors/complexity-visitor.js index df386afc..ca19c0c5 100644 --- a/src/ast-analysis/visitors/complexity-visitor.js +++ b/src/ast-analysis/visitors/complexity-visitor.js @@ -12,6 +12,122 @@ import { computeMaintainabilityIndex, } from '../metrics.js'; +// ── Halstead classification ───────────────────────────────────────────── + +function classifyHalstead(node, hRules, acc) { + const type = node.type; + if (hRules.skipTypes.has(type)) acc.halsteadSkipDepth++; + if (acc.halsteadSkipDepth > 0) return; + + if (hRules.compoundOperators.has(type)) { + acc.operators.set(type, (acc.operators.get(type) || 0) + 1); + } + if (node.childCount === 0) { + if (hRules.operatorLeafTypes.has(type)) { + acc.operators.set(type, (acc.operators.get(type) || 0) + 1); + } else if (hRules.operandLeafTypes.has(type)) { + const text = node.text; + acc.operands.set(text, (acc.operands.get(text) || 0) + 1); + } + } +} + +// ── Branch complexity classification ──────────────────────────────────── + +function classifyBranchNode(node, type, nestingLevel, cRules, acc) { + // Pattern A: else clause wraps if (JS/C#/Rust) + if (cRules.elseNodeType && type === cRules.elseNodeType) { + const firstChild = node.namedChild(0); + if (firstChild && firstChild.type === cRules.ifNodeType) { + // else-if: the if_statement child handles its own increment + return; + } + acc.cognitive++; + return; + } + + // Pattern B: explicit elif node (Python/Ruby/PHP) + if (cRules.elifNodeType && type === cRules.elifNodeType) { + acc.cognitive++; + acc.cyclomatic++; + return; + } + + // Detect else-if via Pattern A or C + let isElseIf = false; + if (type === cRules.ifNodeType) { + if (cRules.elseViaAlternative) { + isElseIf = + node.parent?.type === cRules.ifNodeType && + node.parent.childForFieldName('alternative')?.id === node.id; + } else if (cRules.elseNodeType) { + isElseIf = node.parent?.type === cRules.elseNodeType; + } + } + + if (isElseIf) { + acc.cognitive++; + acc.cyclomatic++; + return; + } + + // Regular branch node + acc.cognitive += 1 + nestingLevel; + acc.cyclomatic++; + + if (cRules.switchLikeNodes?.has(type)) { + acc.cyclomatic--; + } +} + +// ── Plain-else detection (Pattern C: Go/Java) ────────────────────────── + +function classifyPlainElse(node, type, cRules, acc) { + if ( + cRules.elseViaAlternative && + type !== cRules.ifNodeType && + node.parent?.type === cRules.ifNodeType && + node.parent.childForFieldName('alternative')?.id === node.id + ) { + acc.cognitive++; + } +} + +// ── Result collection ─────────────────────────────────────────────────── + +function collectResult(funcNode, acc, hRules, langId) { + const halstead = + hRules && acc.operators && acc.operands + ? computeHalsteadDerived(acc.operators, acc.operands) + : null; + const loc = computeLOCMetrics(funcNode, langId); + const volume = halstead ? halstead.volume : 0; + const commentRatio = loc.loc > 0 ? loc.commentLines / loc.loc : 0; + const mi = computeMaintainabilityIndex(volume, acc.cyclomatic, loc.sloc, commentRatio); + + return { + cognitive: acc.cognitive, + cyclomatic: acc.cyclomatic, + maxNesting: acc.maxNesting, + halstead, + loc, + mi, + }; +} + +function resetAccumulators(hRules) { + return { + cognitive: 0, + cyclomatic: 1, + maxNesting: 0, + operators: hRules ? new Map() : null, + operands: hRules ? new Map() : null, + halsteadSkipDepth: 0, + }; +} + +// ── Visitor factory ───────────────────────────────────────────────────── + /** * Create a complexity visitor for use with walkWithVisitors. * @@ -28,43 +144,12 @@ import { export function createComplexityVisitor(cRules, hRules, options = {}) { const { fileLevelWalk = false, langId = null } = options; - // Per-function accumulators - let cognitive = 0; - let cyclomatic = 1; - let maxNesting = 0; - let operators = hRules ? new Map() : null; - let operands = hRules ? new Map() : null; - let halsteadSkipDepth = 0; - - // In file-level mode, we only count when inside a function + let acc = resetAccumulators(hRules); let activeFuncNode = null; let activeFuncName = null; - // Nesting depth relative to the active function (for nested functions) let funcDepth = 0; - - // Collected results (one per function) const results = []; - function reset() { - cognitive = 0; - cyclomatic = 1; - maxNesting = 0; - operators = hRules ? new Map() : null; - operands = hRules ? new Map() : null; - halsteadSkipDepth = 0; - } - - function collectResult(funcNode) { - const halstead = - hRules && operators && operands ? computeHalsteadDerived(operators, operands) : null; - const loc = computeLOCMetrics(funcNode, langId); - const volume = halstead ? halstead.volume : 0; - const commentRatio = loc.loc > 0 ? loc.commentLines / loc.loc : 0; - const mi = computeMaintainabilityIndex(volume, cyclomatic, loc.sloc, commentRatio); - - return { cognitive, cyclomatic, maxNesting, halstead, loc, mi }; - } - return { name: 'complexity', functionNodeTypes: cRules.functionNodes, @@ -72,17 +157,14 @@ export function createComplexityVisitor(cRules, hRules, options = {}) { enterFunction(funcNode, funcName, _context) { if (fileLevelWalk) { if (!activeFuncNode) { - // Top-level function: start fresh - reset(); + acc = resetAccumulators(hRules); activeFuncNode = funcNode; activeFuncName = funcName; funcDepth = 0; } else { - // Nested function: increase nesting for complexity funcDepth++; } } else { - // Function-level mode: track nested functions for correct nesting depth funcDepth++; } }, @@ -90,11 +172,10 @@ export function createComplexityVisitor(cRules, hRules, options = {}) { exitFunction(funcNode, _funcName, _context) { if (fileLevelWalk) { if (funcNode === activeFuncNode) { - // Leaving the top-level function: emit result results.push({ funcNode, funcName: activeFuncName, - metrics: collectResult(funcNode), + metrics: collectResult(funcNode, acc, hRules, langId), }); activeFuncNode = null; activeFuncName = null; @@ -107,137 +188,52 @@ export function createComplexityVisitor(cRules, hRules, options = {}) { }, enterNode(node, context) { - // In file-level mode, skip nodes outside any function if (fileLevelWalk && !activeFuncNode) return; const type = node.type; const nestingLevel = fileLevelWalk ? context.nestingLevel + funcDepth : context.nestingLevel; - // ── Halstead classification ── - if (hRules) { - if (hRules.skipTypes.has(type)) halsteadSkipDepth++; - if (halsteadSkipDepth === 0) { - if (hRules.compoundOperators.has(type)) { - operators.set(type, (operators.get(type) || 0) + 1); - } - if (node.childCount === 0) { - if (hRules.operatorLeafTypes.has(type)) { - operators.set(type, (operators.get(type) || 0) + 1); - } else if (hRules.operandLeafTypes.has(type)) { - const text = node.text; - operands.set(text, (operands.get(text) || 0) + 1); - } - } - } - } + if (hRules) classifyHalstead(node, hRules, acc); - // ── Complexity: track nesting depth ── - if (nestingLevel > maxNesting) maxNesting = nestingLevel; + if (nestingLevel > acc.maxNesting) acc.maxNesting = nestingLevel; - // Handle logical operators in binary expressions + // Logical operators in binary expressions if (type === cRules.logicalNodeType) { const op = node.child(1)?.type; if (op && cRules.logicalOperators.has(op)) { - cyclomatic++; + acc.cyclomatic++; const parent = node.parent; let sameSequence = false; if (parent && parent.type === cRules.logicalNodeType) { const parentOp = parent.child(1)?.type; if (parentOp === op) sameSequence = true; } - if (!sameSequence) cognitive++; - // Don't skip children — walker handles recursion + if (!sameSequence) acc.cognitive++; } } - // Handle optional chaining (cyclomatic only) - if (type === cRules.optionalChainType) { - cyclomatic++; - } + // Optional chaining (cyclomatic only) + if (type === cRules.optionalChainType) acc.cyclomatic++; - // Handle branch/control flow nodes (skip keyword leaf tokens) + // Branch/control flow nodes (skip keyword leaf tokens) if (cRules.branchNodes.has(type) && node.childCount > 0) { - // Pattern A: else clause wraps if (JS/C#/Rust) - if (cRules.elseNodeType && type === cRules.elseNodeType) { - const firstChild = node.namedChild(0); - if (firstChild && firstChild.type === cRules.ifNodeType) { - // else-if: the if_statement child handles its own increment - return; - } - cognitive++; - return; - } - - // Pattern B: explicit elif node (Python/Ruby/PHP) - if (cRules.elifNodeType && type === cRules.elifNodeType) { - cognitive++; - cyclomatic++; - return; - } - - // Detect else-if via Pattern A or C - let isElseIf = false; - if (type === cRules.ifNodeType) { - if (cRules.elseViaAlternative) { - isElseIf = - node.parent?.type === cRules.ifNodeType && - node.parent.childForFieldName('alternative')?.id === node.id; - } else if (cRules.elseNodeType) { - isElseIf = node.parent?.type === cRules.elseNodeType; - } - } - - if (isElseIf) { - cognitive++; - cyclomatic++; - return; - } - - // Regular branch node - cognitive += 1 + nestingLevel; - cyclomatic++; - - if (cRules.switchLikeNodes?.has(type)) { - cyclomatic--; - } - - // Nesting nodes are handled by the walker's nestingNodeTypes option - // But we still need them to count in complexity — they already do above - } - - // Pattern C plain else: block that is the alternative of an if_statement (Go/Java) - if ( - cRules.elseViaAlternative && - type !== cRules.ifNodeType && - node.parent?.type === cRules.ifNodeType && - node.parent.childForFieldName('alternative')?.id === node.id - ) { - cognitive++; + classifyBranchNode(node, type, nestingLevel, cRules, acc); } - // Handle case nodes (cyclomatic only, skip keyword leaves) - if (cRules.caseNodes.has(type) && node.childCount > 0) { - cyclomatic++; - } + // Pattern C plain else (Go/Java) + classifyPlainElse(node, type, cRules, acc); - // Handle nested function definitions (increase nesting) - // In file-level mode funcDepth handles this; in function-level mode the - // nestingNodeTypes option should include function nodes + // Case nodes (cyclomatic only, skip keyword leaves) + if (cRules.caseNodes.has(type) && node.childCount > 0) acc.cyclomatic++; }, exitNode(node) { - // Decrement skip depth when leaving a skip-type subtree - if (hRules?.skipTypes.has(node.type)) { - halsteadSkipDepth--; - } + if (hRules?.skipTypes.has(node.type)) acc.halsteadSkipDepth--; }, finish() { - if (fileLevelWalk) { - return results; - } - // Function-level mode: return single result (no funcNode reference needed) - return collectResult({ text: '' }); + if (fileLevelWalk) return results; + return collectResult({ text: '' }, acc, hRules, langId); }, }; } diff --git a/src/ast-analysis/visitors/dataflow-visitor.js b/src/ast-analysis/visitors/dataflow-visitor.js index c6fe9fa9..644490be 100644 --- a/src/ast-analysis/visitors/dataflow-visitor.js +++ b/src/ast-analysis/visitors/dataflow-visitor.js @@ -21,254 +21,280 @@ import { truncate, } from '../visitor-utils.js'; -/** - * Create a dataflow visitor for use with walkWithVisitors. - * - * @param {object} rules - DATAFLOW_RULES for the language - * @returns {Visitor} - */ -export function createDataflowVisitor(rules) { - const isCallNode = rules.callNodes ? (t) => rules.callNodes.has(t) : (t) => t === rules.callNode; - - const parameters = []; - const returns = []; - const assignments = []; - const argFlows = []; - const mutations = []; +// ── Scope helpers ─────────────────────────────────────────────────────── - const scopeStack = []; +function currentScope(scopeStack) { + return scopeStack.length > 0 ? scopeStack[scopeStack.length - 1] : null; +} - function currentScope() { - return scopeStack.length > 0 ? scopeStack[scopeStack.length - 1] : null; +function findBinding(name, scopeStack) { + for (let i = scopeStack.length - 1; i >= 0; i--) { + const scope = scopeStack[i]; + if (scope.params.has(name)) + return { type: 'param', index: scope.params.get(name), funcName: scope.funcName }; + if (scope.locals.has(name)) + return { type: 'local', source: scope.locals.get(name), funcName: scope.funcName }; } + return null; +} - function findBinding(name) { - for (let i = scopeStack.length - 1; i >= 0; i--) { - const scope = scopeStack[i]; - if (scope.params.has(name)) - return { type: 'param', index: scope.params.get(name), funcName: scope.funcName }; - if (scope.locals.has(name)) - return { type: 'local', source: scope.locals.get(name), funcName: scope.funcName }; - } - return null; +function bindingConfidence(binding) { + if (!binding) return 0.5; + if (binding.type === 'param') return 1.0; + if (binding.type === 'local') { + if (binding.source?.type === 'call_return') return 0.9; + if (binding.source?.type === 'destructured') return 0.8; + return 0.9; } + return 0.5; +} - function bindingConfidence(binding) { - if (!binding) return 0.5; - if (binding.type === 'param') return 1.0; - if (binding.type === 'local') { - if (binding.source?.type === 'call_return') return 0.9; - if (binding.source?.type === 'destructured') return 0.8; - return 0.9; - } - return 0.5; - } +// ── Node helpers ──────────────────────────────────────────────────────── - function unwrapAwait(node) { - if (rules.awaitNode && node.type === rules.awaitNode) { - return node.namedChildren[0] || node; - } - return node; +function unwrapAwait(node, rules) { + if (rules.awaitNode && node.type === rules.awaitNode) { + return node.namedChildren[0] || node; } + return node; +} - function isCall(node) { - return node && isCallNode(node.type); - } +function isCall(node, isCallNode) { + return node && isCallNode(node.type); +} - function handleVarDeclarator(node) { - let nameNode = node.childForFieldName(rules.varNameField); - let valueNode = rules.varValueField ? node.childForFieldName(rules.varValueField) : null; +// ── Node handlers ─────────────────────────────────────────────────────── - if (!valueNode && rules.equalsClauseType) { - for (const child of node.namedChildren) { - if (child.type === rules.equalsClauseType) { - valueNode = child.childForFieldName('value') || child.namedChildren[0]; - break; - } - } - } +function handleVarDeclarator(node, rules, scopeStack, assignments, isCallNode) { + let nameNode = node.childForFieldName(rules.varNameField); + let valueNode = rules.varValueField ? node.childForFieldName(rules.varValueField) : null; - if (!valueNode) { - for (const child of node.namedChildren) { - if (child !== nameNode && isCall(unwrapAwait(child))) { - valueNode = child; - break; - } + if (!valueNode && rules.equalsClauseType) { + for (const child of node.namedChildren) { + if (child.type === rules.equalsClauseType) { + valueNode = child.childForFieldName('value') || child.namedChildren[0]; + break; } } + } - if (rules.expressionListType) { - if (nameNode?.type === rules.expressionListType) nameNode = nameNode.namedChildren[0]; - if (valueNode?.type === rules.expressionListType) valueNode = valueNode.namedChildren[0]; + if (!valueNode) { + for (const child of node.namedChildren) { + if (child !== nameNode && isCall(unwrapAwait(child, rules), isCallNode)) { + valueNode = child; + break; + } } + } - const scope = currentScope(); - if (!nameNode || !valueNode || !scope) return; - - const unwrapped = unwrapAwait(valueNode); - const callExpr = isCall(unwrapped) ? unwrapped : null; + if (rules.expressionListType) { + if (nameNode?.type === rules.expressionListType) nameNode = nameNode.namedChildren[0]; + if (valueNode?.type === rules.expressionListType) valueNode = valueNode.namedChildren[0]; + } - if (callExpr) { - const callee = resolveCalleeName(callExpr, rules); - if (callee && scope.funcName) { - if ( - (rules.objectDestructType && nameNode.type === rules.objectDestructType) || - (rules.arrayDestructType && nameNode.type === rules.arrayDestructType) - ) { - const names = extractParamNames(nameNode, rules); - for (const n of names) { - assignments.push({ - varName: n, - callerFunc: scope.funcName, - sourceCallName: callee, - expression: truncate(node.text), - line: node.startPosition.row + 1, - }); - scope.locals.set(n, { type: 'destructured', callee }); - } - } else { - const varName = - nameNode.type === 'identifier' || nameNode.type === rules.paramIdentifier - ? nameNode.text - : nameNode.text; + const scope = currentScope(scopeStack); + if (!nameNode || !valueNode || !scope) return; + + const unwrapped = unwrapAwait(valueNode, rules); + const callExpr = isCall(unwrapped, isCallNode) ? unwrapped : null; + + if (callExpr) { + const callee = resolveCalleeName(callExpr, rules); + if (callee && scope.funcName) { + if ( + (rules.objectDestructType && nameNode.type === rules.objectDestructType) || + (rules.arrayDestructType && nameNode.type === rules.arrayDestructType) + ) { + const names = extractParamNames(nameNode, rules); + for (const n of names) { assignments.push({ - varName, + varName: n, callerFunc: scope.funcName, sourceCallName: callee, expression: truncate(node.text), line: node.startPosition.row + 1, }); - scope.locals.set(varName, { type: 'call_return', callee }); + scope.locals.set(n, { type: 'destructured', callee }); } + } else { + const varName = + nameNode.type === 'identifier' || nameNode.type === rules.paramIdentifier + ? nameNode.text + : nameNode.text; + assignments.push({ + varName, + callerFunc: scope.funcName, + sourceCallName: callee, + expression: truncate(node.text), + line: node.startPosition.row + 1, + }); + scope.locals.set(varName, { type: 'call_return', callee }); } } } +} - function handleAssignment(node) { - const left = node.childForFieldName(rules.assignLeftField); - const right = node.childForFieldName(rules.assignRightField); - const scope = currentScope(); - if (!scope?.funcName) return; - - if (left && rules.memberNode && left.type === rules.memberNode) { - const receiver = memberReceiver(left, rules); - if (receiver) { - const binding = findBinding(receiver); - if (binding) { - mutations.push({ - funcName: scope.funcName, - receiverName: receiver, - binding, - mutatingExpr: truncate(node.text), - line: node.startPosition.row + 1, - }); - } +function handleAssignment(node, rules, scopeStack, assignments, mutations, isCallNode) { + const left = node.childForFieldName(rules.assignLeftField); + const right = node.childForFieldName(rules.assignRightField); + const scope = currentScope(scopeStack); + if (!scope?.funcName) return; + + if (left && rules.memberNode && left.type === rules.memberNode) { + const receiver = memberReceiver(left, rules); + if (receiver) { + const binding = findBinding(receiver, scopeStack); + if (binding) { + mutations.push({ + funcName: scope.funcName, + receiverName: receiver, + binding, + mutatingExpr: truncate(node.text), + line: node.startPosition.row + 1, + }); } } + } - if (left && isIdent(left.type, rules) && right) { - const unwrapped = unwrapAwait(right); - const callExpr = isCall(unwrapped) ? unwrapped : null; - if (callExpr) { - const callee = resolveCalleeName(callExpr, rules); - if (callee) { - assignments.push({ - varName: left.text, - callerFunc: scope.funcName, - sourceCallName: callee, - expression: truncate(node.text), - line: node.startPosition.row + 1, - }); - scope.locals.set(left.text, { type: 'call_return', callee }); - } + if (left && isIdent(left.type, rules) && right) { + const unwrapped = unwrapAwait(right, rules); + const callExpr = isCall(unwrapped, isCallNode) ? unwrapped : null; + if (callExpr) { + const callee = resolveCalleeName(callExpr, rules); + if (callee) { + assignments.push({ + varName: left.text, + callerFunc: scope.funcName, + sourceCallName: callee, + expression: truncate(node.text), + line: node.startPosition.row + 1, + }); + scope.locals.set(left.text, { type: 'call_return', callee }); } } } +} - function handleCallExpr(node) { - const callee = resolveCalleeName(node, rules); - const argsNode = node.childForFieldName(rules.callArgsField); - const scope = currentScope(); - if (!callee || !argsNode || !scope?.funcName) return; +function handleCallExpr(node, rules, scopeStack, argFlows) { + const callee = resolveCalleeName(node, rules); + const argsNode = node.childForFieldName(rules.callArgsField); + const scope = currentScope(scopeStack); + if (!callee || !argsNode || !scope?.funcName) return; - let argIndex = 0; - for (let arg of argsNode.namedChildren) { - if (rules.argumentWrapperType && arg.type === rules.argumentWrapperType) { - arg = arg.namedChildren[0] || arg; - } - const unwrapped = - rules.spreadType && arg.type === rules.spreadType ? arg.namedChildren[0] || arg : arg; - if (!unwrapped) { - argIndex++; - continue; - } + let argIndex = 0; + for (let arg of argsNode.namedChildren) { + if (rules.argumentWrapperType && arg.type === rules.argumentWrapperType) { + arg = arg.namedChildren[0] || arg; + } + const unwrapped = + rules.spreadType && arg.type === rules.spreadType ? arg.namedChildren[0] || arg : arg; + if (!unwrapped) { + argIndex++; + continue; + } - const argName = isIdent(unwrapped.type, rules) ? unwrapped.text : null; - const argMember = - rules.memberNode && unwrapped.type === rules.memberNode - ? memberReceiver(unwrapped, rules) - : null; - const trackedName = argName || argMember; - - if (trackedName) { - const binding = findBinding(trackedName); - if (binding) { - argFlows.push({ - callerFunc: scope.funcName, - calleeName: callee, - argIndex, - argName: trackedName, - binding, - confidence: bindingConfidence(binding), - expression: truncate(arg.text), - line: node.startPosition.row + 1, - }); - } + const argName = isIdent(unwrapped.type, rules) ? unwrapped.text : null; + const argMember = + rules.memberNode && unwrapped.type === rules.memberNode + ? memberReceiver(unwrapped, rules) + : null; + const trackedName = argName || argMember; + + if (trackedName) { + const binding = findBinding(trackedName, scopeStack); + if (binding) { + argFlows.push({ + callerFunc: scope.funcName, + calleeName: callee, + argIndex, + argName: trackedName, + binding, + confidence: bindingConfidence(binding), + expression: truncate(arg.text), + line: node.startPosition.row + 1, + }); } - argIndex++; } + argIndex++; } +} - function handleExprStmtMutation(node) { - if (rules.mutatingMethods.size === 0) return; - const expr = node.namedChildren[0]; - if (!expr || !isCall(expr)) return; +function handleExprStmtMutation(node, rules, scopeStack, mutations, isCallNode) { + if (rules.mutatingMethods.size === 0) return; + const expr = node.namedChildren[0]; + if (!expr || !isCall(expr, isCallNode)) return; - let methodName = null; - let receiver = null; + let methodName = null; + let receiver = null; - const fn = expr.childForFieldName(rules.callFunctionField); - if (fn && fn.type === rules.memberNode) { - const prop = fn.childForFieldName(rules.memberPropertyField); - methodName = prop ? prop.text : null; - receiver = memberReceiver(fn, rules); - } + const fn = expr.childForFieldName(rules.callFunctionField); + if (fn && fn.type === rules.memberNode) { + const prop = fn.childForFieldName(rules.memberPropertyField); + methodName = prop ? prop.text : null; + receiver = memberReceiver(fn, rules); + } - if (!receiver && rules.callObjectField) { - const obj = expr.childForFieldName(rules.callObjectField); - const name = expr.childForFieldName(rules.callFunctionField); - if (obj && name) { - methodName = name.text; - receiver = isIdent(obj.type, rules) ? obj.text : null; - } + if (!receiver && rules.callObjectField) { + const obj = expr.childForFieldName(rules.callObjectField); + const name = expr.childForFieldName(rules.callFunctionField); + if (obj && name) { + methodName = name.text; + receiver = isIdent(obj.type, rules) ? obj.text : null; } + } - if (!methodName || !rules.mutatingMethods.has(methodName)) return; + if (!methodName || !rules.mutatingMethods.has(methodName)) return; - const scope = currentScope(); - if (!receiver || !scope?.funcName) return; + const scope = currentScope(scopeStack); + if (!receiver || !scope?.funcName) return; - const binding = findBinding(receiver); - if (binding) { - mutations.push({ - funcName: scope.funcName, - receiverName: receiver, - binding, - mutatingExpr: truncate(expr.text), - line: node.startPosition.row + 1, - }); - } + const binding = findBinding(receiver, scopeStack); + if (binding) { + mutations.push({ + funcName: scope.funcName, + receiverName: receiver, + binding, + mutatingExpr: truncate(expr.text), + line: node.startPosition.row + 1, + }); } +} + +// ── Return statement handler ──────────────────────────────────────────── + +function handleReturn(node, rules, scopeStack, returns) { + if (node.parent?.type === rules.returnNode) return; // keyword token, not statement + + const scope = currentScope(scopeStack); + if (scope?.funcName) { + const expr = node.namedChildren[0]; + const referencedNames = []; + if (expr) collectIdentifiers(expr, referencedNames, rules); + returns.push({ + funcName: scope.funcName, + expression: truncate(expr ? expr.text : ''), + referencedNames, + line: node.startPosition.row + 1, + }); + } +} + +// ── Visitor factory ───────────────────────────────────────────────────── + +/** + * Create a dataflow visitor for use with walkWithVisitors. + * + * @param {object} rules - DATAFLOW_RULES for the language + * @returns {Visitor} + */ +export function createDataflowVisitor(rules) { + const isCallNode = rules.callNodes ? (t) => rules.callNodes.has(t) : (t) => t === rules.callNode; + + const parameters = []; + const returns = []; + const assignments = []; + const argFlows = []; + const mutations = []; + const scopeStack = []; return { name: 'dataflow', @@ -300,54 +326,34 @@ export function createDataflowVisitor(rules) { enterNode(node, _context) { const t = node.type; - // Skip function nodes — handled by enterFunction/exitFunction if (rules.functionNodes.has(t)) return; - // Return statements (skip keyword tokens inside return statements, e.g. Ruby's - // `return` node nests a `return` keyword child with the same type string) if (rules.returnNode && t === rules.returnNode) { - if (node.parent?.type === rules.returnNode) return; // keyword token, not statement - - const scope = currentScope(); - if (scope?.funcName) { - const expr = node.namedChildren[0]; - const referencedNames = []; - if (expr) collectIdentifiers(expr, referencedNames, rules); - returns.push({ - funcName: scope.funcName, - expression: truncate(expr ? expr.text : ''), - referencedNames, - line: node.startPosition.row + 1, - }); - } + handleReturn(node, rules, scopeStack, returns); return; } - // Variable declarations if (rules.varDeclaratorNode && t === rules.varDeclaratorNode) { - handleVarDeclarator(node); + handleVarDeclarator(node, rules, scopeStack, assignments, isCallNode); return; } if (rules.varDeclaratorNodes?.has(t)) { - handleVarDeclarator(node); + handleVarDeclarator(node, rules, scopeStack, assignments, isCallNode); return; } - // Call expressions if (isCallNode(t)) { - handleCallExpr(node); + handleCallExpr(node, rules, scopeStack, argFlows); return; } - // Assignment expressions if (rules.assignmentNode && t === rules.assignmentNode) { - handleAssignment(node); + handleAssignment(node, rules, scopeStack, assignments, mutations, isCallNode); return; } - // Mutation detection via expression_statement if (rules.expressionStmtNode && t === rules.expressionStmtNode) { - handleExprStmtMutation(node); + handleExprStmtMutation(node, rules, scopeStack, mutations, isCallNode); } }, From 46a95aeaa4b5d8c1a62a9c8dbdc465c8890b6dad Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 03:24:42 -0600 Subject: [PATCH 12/21] refactor: decompose domain builder stages into focused helpers Extract edge-building by type (import, call-native, call-JS, class hierarchy) from buildEdges. Extract per-phase insertion logic from insertNodes. Extract scoped/incremental/full-build paths and reverse-dep cascade from detectChanges. Extract setup, engine init, alias loading from pipeline.js. Extract node/edge-building helpers from incremental.js rebuildFile. Impact: 44 functions changed, 19 affected --- src/domain/graph/builder/incremental.js | 206 ++++--- src/domain/graph/builder/pipeline.js | 186 +++--- .../graph/builder/stages/build-edges.js | 557 ++++++++++-------- .../graph/builder/stages/detect-changes.js | 372 ++++++------ .../graph/builder/stages/insert-nodes.js | 286 ++++----- 5 files changed, 889 insertions(+), 718 deletions(-) diff --git a/src/domain/graph/builder/incremental.js b/src/domain/graph/builder/incremental.js index f04a136e..63694385 100644 --- a/src/domain/graph/builder/incremental.js +++ b/src/domain/graph/builder/incremental.js @@ -12,10 +12,121 @@ import { parseFileIncremental } from '../../parser.js'; import { computeConfidence, resolveImportPath } from '../resolve.js'; import { BUILTIN_RECEIVERS, readFileSafe } from './helpers.js'; +// ── Node insertion ────────────────────────────────────────────────────── + +function insertFileNodes(stmts, relPath, symbols) { + stmts.insertNode.run(relPath, 'file', relPath, 0, null); + for (const def of symbols.definitions) { + stmts.insertNode.run(def.name, def.kind, relPath, def.line, def.endLine || null); + } + for (const exp of symbols.exports) { + stmts.insertNode.run(exp.name, exp.kind, relPath, exp.line, null); + } +} + +// ── Import edge building ──────────────────────────────────────────────── + +function buildImportEdges(stmts, relPath, symbols, rootDir, fileNodeId, aliases) { + let edgesAdded = 0; + for (const imp of symbols.imports) { + const resolvedPath = resolveImportPath( + path.join(rootDir, relPath), + imp.source, + rootDir, + aliases, + ); + const targetRow = stmts.getNodeId.get(resolvedPath, 'file', resolvedPath, 0); + if (targetRow) { + const edgeKind = imp.reexport ? 'reexports' : imp.typeOnly ? 'imports-type' : 'imports'; + stmts.insertEdge.run(fileNodeId, targetRow.id, edgeKind, 1.0, 0); + edgesAdded++; + } + } + return edgesAdded; +} + +function buildImportedNamesMap(symbols, rootDir, relPath, aliases) { + const importedNames = new Map(); + for (const imp of symbols.imports) { + const resolvedPath = resolveImportPath( + path.join(rootDir, relPath), + imp.source, + rootDir, + aliases, + ); + for (const name of imp.names) { + importedNames.set(name.replace(/^\*\s+as\s+/, ''), resolvedPath); + } + } + return importedNames; +} + +// ── Call edge building ────────────────────────────────────────────────── + +function findCaller(call, definitions, relPath, stmts) { + let caller = null; + let callerSpan = Infinity; + for (const def of definitions) { + if (def.line <= call.line) { + const end = def.endLine || Infinity; + if (call.line <= end) { + const span = end - def.line; + if (span < callerSpan) { + const row = stmts.getNodeId.get(def.name, def.kind, relPath, def.line); + if (row) { + caller = row; + callerSpan = span; + } + } + } else if (!caller) { + const row = stmts.getNodeId.get(def.name, def.kind, relPath, def.line); + if (row) caller = row; + } + } + } + return caller; +} + +function resolveCallTargets(stmts, call, relPath, importedNames) { + const importedFrom = importedNames.get(call.name); + let targets; + if (importedFrom) { + targets = stmts.findNodeInFile.all(call.name, importedFrom); + } + if (!targets || targets.length === 0) { + targets = stmts.findNodeInFile.all(call.name, relPath); + if (targets.length === 0) { + targets = stmts.findNodeByName.all(call.name); + } + } + return { targets, importedFrom }; +} + +function buildCallEdges(stmts, relPath, symbols, fileNodeRow, importedNames) { + let edgesAdded = 0; + for (const call of symbols.calls) { + if (call.receiver && BUILTIN_RECEIVERS.has(call.receiver)) continue; + + const caller = findCaller(call, symbols.definitions, relPath, stmts) || fileNodeRow; + const { targets, importedFrom } = resolveCallTargets(stmts, call, relPath, importedNames); + + for (const t of targets) { + if (t.id !== caller.id) { + const confidence = computeConfidence(relPath, t.file, importedFrom ?? null); + stmts.insertEdge.run(caller.id, t.id, 'calls', confidence, call.dynamic ? 1 : 0); + edgesAdded++; + } + } + } + return edgesAdded; +} + +// ── Main entry point ──────────────────────────────────────────────────── + /** * Parse a single file and update the database incrementally. * - * @param {import('better-sqlite3').Database} db + * @param {import('better-sqlite3').Database} _db * @param {string} rootDir - Absolute root directory * @param {string} filePath - Absolute file path * @param {object} stmts - Prepared DB statements @@ -61,105 +172,20 @@ export async function rebuildFile(_db, rootDir, filePath, stmts, engineOpts, cac const symbols = await parseFileIncremental(cache, filePath, code, engineOpts); if (!symbols) return null; - // Insert nodes - stmts.insertNode.run(relPath, 'file', relPath, 0, null); - for (const def of symbols.definitions) { - stmts.insertNode.run(def.name, def.kind, relPath, def.line, def.endLine || null); - } - for (const exp of symbols.exports) { - stmts.insertNode.run(exp.name, exp.kind, relPath, exp.line, null); - } + insertFileNodes(stmts, relPath, symbols); const newNodes = stmts.countNodes.get(relPath)?.c || 0; const newSymbols = diffSymbols ? stmts.listSymbols.all(relPath) : []; - let edgesAdded = 0; const fileNodeRow = stmts.getNodeId.get(relPath, 'file', relPath, 0); if (!fileNodeRow) return { file: relPath, nodesAdded: newNodes, nodesRemoved: oldNodes, edgesAdded: 0 }; - const fileNodeId = fileNodeRow.id; - // Load aliases for import resolution const aliases = { baseUrl: null, paths: {} }; - // Import edges - for (const imp of symbols.imports) { - const resolvedPath = resolveImportPath( - path.join(rootDir, relPath), - imp.source, - rootDir, - aliases, - ); - const targetRow = stmts.getNodeId.get(resolvedPath, 'file', resolvedPath, 0); - if (targetRow) { - const edgeKind = imp.reexport ? 'reexports' : imp.typeOnly ? 'imports-type' : 'imports'; - stmts.insertEdge.run(fileNodeId, targetRow.id, edgeKind, 1.0, 0); - edgesAdded++; - } - } - - // Build import name → resolved file mapping - const importedNames = new Map(); - for (const imp of symbols.imports) { - const resolvedPath = resolveImportPath( - path.join(rootDir, relPath), - imp.source, - rootDir, - aliases, - ); - for (const name of imp.names) { - importedNames.set(name.replace(/^\*\s+as\s+/, ''), resolvedPath); - } - } - - // Call edges - for (const call of symbols.calls) { - if (call.receiver && BUILTIN_RECEIVERS.has(call.receiver)) continue; - - let caller = null; - let callerSpan = Infinity; - for (const def of symbols.definitions) { - if (def.line <= call.line) { - const end = def.endLine || Infinity; - if (call.line <= end) { - const span = end - def.line; - if (span < callerSpan) { - const row = stmts.getNodeId.get(def.name, def.kind, relPath, def.line); - if (row) { - caller = row; - callerSpan = span; - } - } - } else if (!caller) { - const row = stmts.getNodeId.get(def.name, def.kind, relPath, def.line); - if (row) caller = row; - } - } - } - if (!caller) caller = fileNodeRow; - - const importedFrom = importedNames.get(call.name); - let targets; - if (importedFrom) { - targets = stmts.findNodeInFile.all(call.name, importedFrom); - } - if (!targets || targets.length === 0) { - targets = stmts.findNodeInFile.all(call.name, relPath); - if (targets.length === 0) { - targets = stmts.findNodeByName.all(call.name); - } - } - - for (const t of targets) { - if (t.id !== caller.id) { - const confidence = importedFrom - ? computeConfidence(relPath, t.file, importedFrom) - : computeConfidence(relPath, t.file, null); - stmts.insertEdge.run(caller.id, t.id, 'calls', confidence, call.dynamic ? 1 : 0); - edgesAdded++; - } - } - } + let edgesAdded = buildImportEdges(stmts, relPath, symbols, rootDir, fileNodeRow.id, aliases); + const importedNames = buildImportedNamesMap(symbols, rootDir, relPath, aliases); + edgesAdded += buildCallEdges(stmts, relPath, symbols, fileNodeRow, importedNames); const symbolDiff = diffSymbols ? diffSymbols(oldSymbols, newSymbols) : null; const event = oldNodes === 0 ? 'added' : 'modified'; diff --git a/src/domain/graph/builder/pipeline.js b/src/domain/graph/builder/pipeline.js index ea9848c5..963a0086 100644 --- a/src/domain/graph/builder/pipeline.js +++ b/src/domain/graph/builder/pipeline.js @@ -23,94 +23,73 @@ import { parseFiles } from './stages/parse-files.js'; import { resolveImports } from './stages/resolve-imports.js'; import { runAnalyses } from './stages/run-analyses.js'; -/** - * Build the dependency graph for a codebase. - * - * Signature and return value are identical to the original monolithic buildGraph(). - * - * @param {string} rootDir - Root directory to scan - * @param {object} [opts] - Build options - * @returns {Promise<{ phases: object } | undefined>} - */ -export async function buildGraph(rootDir, opts = {}) { - const ctx = new PipelineContext(); - ctx.buildStart = performance.now(); - ctx.opts = opts; +// ── Setup helpers ─────────────────────────────────────────────────────── - // ── Setup (creates DB, loads config, selects engine) ────────────── - ctx.rootDir = path.resolve(rootDir); - ctx.dbPath = path.join(ctx.rootDir, '.codegraph', 'graph.db'); - ctx.db = openDb(ctx.dbPath); - try { - initSchema(ctx.db); - - ctx.config = loadConfig(ctx.rootDir); - ctx.incremental = - opts.incremental !== false && ctx.config.build && ctx.config.build.incremental !== false; - - ctx.engineOpts = { - engine: opts.engine || 'auto', - dataflow: opts.dataflow !== false, - ast: opts.ast !== false, - }; - const { name: engineName, version: engineVersion } = getActiveEngine(ctx.engineOpts); - ctx.engineName = engineName; - ctx.engineVersion = engineVersion; - info(`Using ${engineName} engine${engineVersion ? ` (v${engineVersion})` : ''}`); - - // Engine/schema mismatch detection - ctx.schemaVersion = MIGRATIONS[MIGRATIONS.length - 1].version; - ctx.forceFullRebuild = false; - if (ctx.incremental) { - const prevEngine = getBuildMeta(ctx.db, 'engine'); - if (prevEngine && prevEngine !== engineName) { - info(`Engine changed (${prevEngine} → ${engineName}), promoting to full rebuild.`); - ctx.forceFullRebuild = true; - } - const prevSchema = getBuildMeta(ctx.db, 'schema_version'); - if (prevSchema && Number(prevSchema) !== ctx.schemaVersion) { - info( - `Schema version changed (${prevSchema} → ${ctx.schemaVersion}), promoting to full rebuild.`, - ); - ctx.forceFullRebuild = true; - } - } +function initializeEngine(ctx) { + ctx.engineOpts = { + engine: ctx.opts.engine || 'auto', + dataflow: ctx.opts.dataflow !== false, + ast: ctx.opts.ast !== false, + }; + const { name: engineName, version: engineVersion } = getActiveEngine(ctx.engineOpts); + ctx.engineName = engineName; + ctx.engineVersion = engineVersion; + info(`Using ${engineName} engine${engineVersion ? ` (v${engineVersion})` : ''}`); +} - // Path aliases - ctx.aliases = loadPathAliases(ctx.rootDir); - if (ctx.config.aliases) { - for (const [key, value] of Object.entries(ctx.config.aliases)) { - const pattern = key.endsWith('/') ? `${key}*` : key; - const target = path.resolve(ctx.rootDir, value); - ctx.aliases.paths[pattern] = [target.endsWith('/') ? `${target}*` : `${target}/*`]; - } - } - if (ctx.aliases.baseUrl || Object.keys(ctx.aliases.paths).length > 0) { - info( - `Loaded path aliases: baseUrl=${ctx.aliases.baseUrl || 'none'}, ${Object.keys(ctx.aliases.paths).length} path mappings`, - ); +function checkEngineSchemaMismatch(ctx) { + ctx.schemaVersion = MIGRATIONS[MIGRATIONS.length - 1].version; + ctx.forceFullRebuild = false; + if (!ctx.incremental) return; + + const prevEngine = getBuildMeta(ctx.db, 'engine'); + if (prevEngine && prevEngine !== ctx.engineName) { + info(`Engine changed (${prevEngine} → ${ctx.engineName}), promoting to full rebuild.`); + ctx.forceFullRebuild = true; + } + const prevSchema = getBuildMeta(ctx.db, 'schema_version'); + if (prevSchema && Number(prevSchema) !== ctx.schemaVersion) { + info( + `Schema version changed (${prevSchema} → ${ctx.schemaVersion}), promoting to full rebuild.`, + ); + ctx.forceFullRebuild = true; + } +} + +function loadAliases(ctx) { + ctx.aliases = loadPathAliases(ctx.rootDir); + if (ctx.config.aliases) { + for (const [key, value] of Object.entries(ctx.config.aliases)) { + const pattern = key.endsWith('/') ? `${key}*` : key; + const target = path.resolve(ctx.rootDir, value); + ctx.aliases.paths[pattern] = [target.endsWith('/') ? `${target}*` : `${target}/*`]; } + } + if (ctx.aliases.baseUrl || Object.keys(ctx.aliases.paths).length > 0) { + info( + `Loaded path aliases: baseUrl=${ctx.aliases.baseUrl || 'none'}, ${Object.keys(ctx.aliases.paths).length} path mappings`, + ); + } +} - ctx.timing.setupMs = performance.now() - ctx.buildStart; +function setupPipeline(ctx) { + ctx.rootDir = path.resolve(ctx.rootDir); + ctx.dbPath = path.join(ctx.rootDir, '.codegraph', 'graph.db'); + ctx.db = openDb(ctx.dbPath); + initSchema(ctx.db); - // ── Pipeline stages ───────────────────────────────────────────── - await collectFiles(ctx); - await detectChanges(ctx); + ctx.config = loadConfig(ctx.rootDir); + ctx.incremental = + ctx.opts.incremental !== false && ctx.config.build && ctx.config.build.incremental !== false; - if (ctx.earlyExit) return; + initializeEngine(ctx); + checkEngineSchemaMismatch(ctx); + loadAliases(ctx); - await parseFiles(ctx); - await insertNodes(ctx); - await resolveImports(ctx); - await buildEdges(ctx); - await buildStructure(ctx); - await runAnalyses(ctx); - await finalize(ctx); - } catch (err) { - if (!ctx.earlyExit) closeDb(ctx.db); - throw err; - } + ctx.timing.setupMs = performance.now() - ctx.buildStart; +} +function formatTimingResult(ctx) { return { phases: { setupMs: +ctx.timing.setupMs.toFixed(1), @@ -128,3 +107,50 @@ export async function buildGraph(rootDir, opts = {}) { }, }; } + +// ── Pipeline stages execution ─────────────────────────────────────────── + +async function runPipelineStages(ctx) { + await collectFiles(ctx); + await detectChanges(ctx); + + if (ctx.earlyExit) return; + + await parseFiles(ctx); + await insertNodes(ctx); + await resolveImports(ctx); + await buildEdges(ctx); + await buildStructure(ctx); + await runAnalyses(ctx); + await finalize(ctx); +} + +// ── Main entry point ──────────────────────────────────────────────────── + +/** + * Build the dependency graph for a codebase. + * + * Signature and return value are identical to the original monolithic buildGraph(). + * + * @param {string} rootDir - Root directory to scan + * @param {object} [opts] - Build options + * @returns {Promise<{ phases: object } | undefined>} + */ +export async function buildGraph(rootDir, opts = {}) { + const ctx = new PipelineContext(); + ctx.buildStart = performance.now(); + ctx.opts = opts; + ctx.rootDir = rootDir; + + try { + setupPipeline(ctx); + await runPipelineStages(ctx); + } catch (err) { + if (!ctx.earlyExit) closeDb(ctx.db); + throw err; + } + + if (ctx.earlyExit) return; + + return formatTimingResult(ctx); +} diff --git a/src/domain/graph/builder/stages/build-edges.js b/src/domain/graph/builder/stages/build-edges.js index a8879b62..f830ed1c 100644 --- a/src/domain/graph/builder/stages/build-edges.js +++ b/src/domain/graph/builder/stages/build-edges.js @@ -12,25 +12,18 @@ import { computeConfidence } from '../../resolve.js'; import { BUILTIN_RECEIVERS, batchInsertEdges } from '../helpers.js'; import { getResolved, isBarrelFile, resolveBarrelExport } from './resolve-imports.js'; -/** - * @param {import('../context.js').PipelineContext} ctx - */ -export async function buildEdges(ctx) { - const { db, fileSymbols, barrelOnlyFiles, rootDir, engineName } = ctx; +// ── Node lookup setup ─────────────────────────────────────────────────── - const getNodeIdStmt = { +function makeGetNodeIdStmt(db) { + return { get: (name, kind, file, line) => { const id = getNodeId(db, name, kind, file, line); return id != null ? { id } : undefined; }, }; +} - // Pre-load all nodes into lookup maps - const allNodes = db - .prepare( - `SELECT id, name, kind, file, line FROM nodes WHERE kind IN ('function','method','class','interface','struct','type','module','enum','trait')`, - ) - .all(); +function setupNodeLookups(ctx, allNodes) { ctx.nodesByName = new Map(); for (const node of allNodes) { if (!ctx.nodesByName.has(node.name)) ctx.nodesByName.set(node.name, []); @@ -42,253 +35,339 @@ export async function buildEdges(ctx) { if (!ctx.nodesByNameAndFile.has(key)) ctx.nodesByNameAndFile.set(key, []); ctx.nodesByNameAndFile.get(key).push(node); } +} - const t0 = performance.now(); - const buildEdgesTx = db.transaction(() => { - const allEdgeRows = []; +// ── Import edges ──────────────────────────────────────────────────────── - // ── Import edges ──────────────────────────────────────────────── - for (const [relPath, symbols] of fileSymbols) { - if (barrelOnlyFiles.has(relPath)) continue; - const fileNodeRow = getNodeIdStmt.get(relPath, 'file', relPath, 0); - if (!fileNodeRow) continue; - const fileNodeId = fileNodeRow.id; - - for (const imp of symbols.imports) { - const resolvedPath = getResolved(ctx, path.join(rootDir, relPath), imp.source); - const targetRow = getNodeIdStmt.get(resolvedPath, 'file', resolvedPath, 0); - if (targetRow) { - const edgeKind = imp.reexport - ? 'reexports' - : imp.typeOnly - ? 'imports-type' - : imp.dynamicImport - ? 'dynamic-imports' - : 'imports'; - allEdgeRows.push([fileNodeId, targetRow.id, edgeKind, 1.0, 0]); - - if (!imp.reexport && isBarrelFile(ctx, resolvedPath)) { - const resolvedSources = new Set(); - for (const name of imp.names) { - const cleanName = name.replace(/^\*\s+as\s+/, ''); - const actualSource = resolveBarrelExport(ctx, resolvedPath, cleanName); - if ( - actualSource && - actualSource !== resolvedPath && - !resolvedSources.has(actualSource) - ) { - resolvedSources.add(actualSource); - const actualRow = getNodeIdStmt.get(actualSource, 'file', actualSource, 0); - if (actualRow) { - allEdgeRows.push([ - fileNodeId, - actualRow.id, - edgeKind === 'imports-type' - ? 'imports-type' - : edgeKind === 'dynamic-imports' - ? 'dynamic-imports' - : 'imports', - 0.9, - 0, - ]); - } - } - } - } - } +function buildImportEdges(ctx, getNodeIdStmt, allEdgeRows) { + const { fileSymbols, barrelOnlyFiles, rootDir } = ctx; + + for (const [relPath, symbols] of fileSymbols) { + if (barrelOnlyFiles.has(relPath)) continue; + const fileNodeRow = getNodeIdStmt.get(relPath, 'file', relPath, 0); + if (!fileNodeRow) continue; + const fileNodeId = fileNodeRow.id; + + for (const imp of symbols.imports) { + const resolvedPath = getResolved(ctx, path.join(rootDir, relPath), imp.source); + const targetRow = getNodeIdStmt.get(resolvedPath, 'file', resolvedPath, 0); + if (!targetRow) continue; + + const edgeKind = imp.reexport + ? 'reexports' + : imp.typeOnly + ? 'imports-type' + : imp.dynamicImport + ? 'dynamic-imports' + : 'imports'; + allEdgeRows.push([fileNodeId, targetRow.id, edgeKind, 1.0, 0]); + + if (!imp.reexport && isBarrelFile(ctx, resolvedPath)) { + buildBarrelEdges(ctx, imp, resolvedPath, fileNodeId, edgeKind, getNodeIdStmt, allEdgeRows); } } + } +} - // ── Call/receiver/extends/implements edges ─────────────────────── - const native = engineName === 'native' ? loadNative() : null; - if (native?.buildCallEdges) { - const nativeFiles = []; - for (const [relPath, symbols] of fileSymbols) { - if (barrelOnlyFiles.has(relPath)) continue; - const fileNodeRow = getNodeIdStmt.get(relPath, 'file', relPath, 0); - if (!fileNodeRow) continue; - - const importedNames = []; - for (const imp of symbols.imports) { - const resolvedPath = getResolved(ctx, path.join(rootDir, relPath), imp.source); - for (const name of imp.names) { - const cleanName = name.replace(/^\*\s+as\s+/, ''); - let targetFile = resolvedPath; - if (isBarrelFile(ctx, resolvedPath)) { - const actual = resolveBarrelExport(ctx, resolvedPath, cleanName); - if (actual) targetFile = actual; - } - importedNames.push({ name: cleanName, file: targetFile }); +function buildBarrelEdges(ctx, imp, resolvedPath, fileNodeId, edgeKind, getNodeIdStmt, edgeRows) { + const resolvedSources = new Set(); + for (const name of imp.names) { + const cleanName = name.replace(/^\*\s+as\s+/, ''); + const actualSource = resolveBarrelExport(ctx, resolvedPath, cleanName); + if (actualSource && actualSource !== resolvedPath && !resolvedSources.has(actualSource)) { + resolvedSources.add(actualSource); + const actualRow = getNodeIdStmt.get(actualSource, 'file', actualSource, 0); + if (actualRow) { + const kind = + edgeKind === 'imports-type' + ? 'imports-type' + : edgeKind === 'dynamic-imports' + ? 'dynamic-imports' + : 'imports'; + edgeRows.push([fileNodeId, actualRow.id, kind, 0.9, 0]); + } + } + } +} + +// ── Call edges (native engine) ────────────────────────────────────────── + +function buildCallEdgesNative(ctx, getNodeIdStmt, allEdgeRows, allNodes, native) { + const { fileSymbols, barrelOnlyFiles, rootDir } = ctx; + const nativeFiles = []; + + for (const [relPath, symbols] of fileSymbols) { + if (barrelOnlyFiles.has(relPath)) continue; + const fileNodeRow = getNodeIdStmt.get(relPath, 'file', relPath, 0); + if (!fileNodeRow) continue; + + const importedNames = buildImportedNamesForNative(ctx, relPath, symbols, rootDir); + nativeFiles.push({ + file: relPath, + fileNodeId: fileNodeRow.id, + definitions: symbols.definitions.map((d) => ({ + name: d.name, + kind: d.kind, + line: d.line, + endLine: d.endLine ?? null, + })), + calls: symbols.calls, + importedNames, + classes: symbols.classes, + }); + } + + const nativeEdges = native.buildCallEdges(nativeFiles, allNodes, [...BUILTIN_RECEIVERS]); + for (const e of nativeEdges) { + allEdgeRows.push([e.sourceId, e.targetId, e.kind, e.confidence, e.dynamic]); + } +} + +function buildImportedNamesForNative(ctx, relPath, symbols, rootDir) { + const importedNames = []; + for (const imp of symbols.imports) { + const resolvedPath = getResolved(ctx, path.join(rootDir, relPath), imp.source); + for (const name of imp.names) { + const cleanName = name.replace(/^\*\s+as\s+/, ''); + let targetFile = resolvedPath; + if (isBarrelFile(ctx, resolvedPath)) { + const actual = resolveBarrelExport(ctx, resolvedPath, cleanName); + if (actual) targetFile = actual; + } + importedNames.push({ name: cleanName, file: targetFile }); + } + } + return importedNames; +} + +// ── Call edges (JS fallback) ──────────────────────────────────────────── + +function buildCallEdgesJS(ctx, getNodeIdStmt, allEdgeRows) { + const { fileSymbols, barrelOnlyFiles, rootDir } = ctx; + + for (const [relPath, symbols] of fileSymbols) { + if (barrelOnlyFiles.has(relPath)) continue; + const fileNodeRow = getNodeIdStmt.get(relPath, 'file', relPath, 0); + if (!fileNodeRow) continue; + + const importedNames = buildImportedNamesMap(ctx, relPath, symbols, rootDir); + const seenCallEdges = new Set(); + + buildFileCallEdges( + ctx, + relPath, + symbols, + fileNodeRow, + importedNames, + seenCallEdges, + getNodeIdStmt, + allEdgeRows, + ); + buildClassHierarchyEdges(ctx, relPath, symbols, allEdgeRows); + } +} + +function buildImportedNamesMap(ctx, relPath, symbols, rootDir) { + const importedNames = new Map(); + for (const imp of symbols.imports) { + const resolvedPath = getResolved(ctx, path.join(rootDir, relPath), imp.source); + for (const name of imp.names) { + importedNames.set(name.replace(/^\*\s+as\s+/, ''), resolvedPath); + } + } + return importedNames; +} + +function findCaller(call, definitions, relPath, getNodeIdStmt, fileNodeRow) { + let caller = null; + let callerSpan = Infinity; + for (const def of definitions) { + if (def.line <= call.line) { + const end = def.endLine || Infinity; + if (call.line <= end) { + const span = end - def.line; + if (span < callerSpan) { + const row = getNodeIdStmt.get(def.name, def.kind, relPath, def.line); + if (row) { + caller = row; + callerSpan = span; } } + } else if (!caller) { + const row = getNodeIdStmt.get(def.name, def.kind, relPath, def.line); + if (row) caller = row; + } + } + } + return caller || fileNodeRow; +} - nativeFiles.push({ - file: relPath, - fileNodeId: fileNodeRow.id, - definitions: symbols.definitions.map((d) => ({ - name: d.name, - kind: d.kind, - line: d.line, - endLine: d.endLine ?? null, - })), - calls: symbols.calls, - importedNames, - classes: symbols.classes, - }); +function resolveCallTargets(ctx, call, relPath, importedNames) { + const importedFrom = importedNames.get(call.name); + let targets; + + if (importedFrom) { + targets = ctx.nodesByNameAndFile.get(`${call.name}|${importedFrom}`) || []; + if (targets.length === 0 && isBarrelFile(ctx, importedFrom)) { + const actualSource = resolveBarrelExport(ctx, importedFrom, call.name); + if (actualSource) { + targets = ctx.nodesByNameAndFile.get(`${call.name}|${actualSource}`) || []; } + } + } - const nativeEdges = native.buildCallEdges(nativeFiles, allNodes, [...BUILTIN_RECEIVERS]); - for (const e of nativeEdges) { - allEdgeRows.push([e.sourceId, e.targetId, e.kind, e.confidence, e.dynamic]); + if (!targets || targets.length === 0) { + targets = ctx.nodesByNameAndFile.get(`${call.name}|${relPath}`) || []; + if (targets.length === 0) { + targets = resolveByMethodOrGlobal(ctx, call, relPath); + } + } + + if (targets.length > 1) { + targets.sort((a, b) => { + const confA = computeConfidence(relPath, a.file, importedFrom); + const confB = computeConfidence(relPath, b.file, importedFrom); + return confB - confA; + }); + } + + return { targets, importedFrom }; +} + +function resolveByMethodOrGlobal(ctx, call, relPath) { + const methodCandidates = (ctx.nodesByName.get(call.name) || []).filter( + (n) => n.name.endsWith(`.${call.name}`) && n.kind === 'method', + ); + if (methodCandidates.length > 0) return methodCandidates; + + if ( + !call.receiver || + call.receiver === 'this' || + call.receiver === 'self' || + call.receiver === 'super' + ) { + return (ctx.nodesByName.get(call.name) || []).filter( + (n) => computeConfidence(relPath, n.file, null) >= 0.5, + ); + } + return []; +} + +function buildFileCallEdges( + ctx, + relPath, + symbols, + fileNodeRow, + importedNames, + seenCallEdges, + getNodeIdStmt, + allEdgeRows, +) { + for (const call of symbols.calls) { + if (call.receiver && BUILTIN_RECEIVERS.has(call.receiver)) continue; + + const caller = findCaller(call, symbols.definitions, relPath, getNodeIdStmt, fileNodeRow); + const isDynamic = call.dynamic ? 1 : 0; + const { targets, importedFrom } = resolveCallTargets(ctx, call, relPath, importedNames); + + for (const t of targets) { + const edgeKey = `${caller.id}|${t.id}`; + if (t.id !== caller.id && !seenCallEdges.has(edgeKey)) { + seenCallEdges.add(edgeKey); + const confidence = computeConfidence(relPath, t.file, importedFrom); + allEdgeRows.push([caller.id, t.id, 'calls', confidence, isDynamic]); } - } else { - // JS fallback - for (const [relPath, symbols] of fileSymbols) { - if (barrelOnlyFiles.has(relPath)) continue; - const fileNodeRow = getNodeIdStmt.get(relPath, 'file', relPath, 0); - if (!fileNodeRow) continue; - - const importedNames = new Map(); - for (const imp of symbols.imports) { - const resolvedPath = getResolved(ctx, path.join(rootDir, relPath), imp.source); - for (const name of imp.names) { - const cleanName = name.replace(/^\*\s+as\s+/, ''); - importedNames.set(cleanName, resolvedPath); - } - } + } - const seenCallEdges = new Set(); - for (const call of symbols.calls) { - if (call.receiver && BUILTIN_RECEIVERS.has(call.receiver)) continue; - let caller = null; - let callerSpan = Infinity; - for (const def of symbols.definitions) { - if (def.line <= call.line) { - const end = def.endLine || Infinity; - if (call.line <= end) { - const span = end - def.line; - if (span < callerSpan) { - const row = getNodeIdStmt.get(def.name, def.kind, relPath, def.line); - if (row) { - caller = row; - callerSpan = span; - } - } - } else if (!caller) { - const row = getNodeIdStmt.get(def.name, def.kind, relPath, def.line); - if (row) caller = row; - } - } - } - if (!caller) caller = fileNodeRow; - - const isDynamic = call.dynamic ? 1 : 0; - let targets; - const importedFrom = importedNames.get(call.name); - - if (importedFrom) { - targets = ctx.nodesByNameAndFile.get(`${call.name}|${importedFrom}`) || []; - if (targets.length === 0 && isBarrelFile(ctx, importedFrom)) { - const actualSource = resolveBarrelExport(ctx, importedFrom, call.name); - if (actualSource) { - targets = ctx.nodesByNameAndFile.get(`${call.name}|${actualSource}`) || []; - } - } - } - if (!targets || targets.length === 0) { - targets = ctx.nodesByNameAndFile.get(`${call.name}|${relPath}`) || []; - if (targets.length === 0) { - const methodCandidates = (ctx.nodesByName.get(call.name) || []).filter( - (n) => n.name.endsWith(`.${call.name}`) && n.kind === 'method', - ); - if (methodCandidates.length > 0) { - targets = methodCandidates; - } else if ( - !call.receiver || - call.receiver === 'this' || - call.receiver === 'self' || - call.receiver === 'super' - ) { - targets = (ctx.nodesByName.get(call.name) || []).filter( - (n) => computeConfidence(relPath, n.file, null) >= 0.5, - ); - } - } - } + // Receiver edge + if ( + call.receiver && + !BUILTIN_RECEIVERS.has(call.receiver) && + call.receiver !== 'this' && + call.receiver !== 'self' && + call.receiver !== 'super' + ) { + buildReceiverEdge(ctx, call, caller, relPath, seenCallEdges, allEdgeRows); + } + } +} - if (targets.length > 1) { - targets.sort((a, b) => { - const confA = computeConfidence(relPath, a.file, importedFrom); - const confB = computeConfidence(relPath, b.file, importedFrom); - return confB - confA; - }); - } +function buildReceiverEdge(ctx, call, caller, relPath, seenCallEdges, allEdgeRows) { + const receiverKinds = new Set(['class', 'struct', 'interface', 'type', 'module']); + const samefile = ctx.nodesByNameAndFile.get(`${call.receiver}|${relPath}`) || []; + const candidates = samefile.length > 0 ? samefile : ctx.nodesByName.get(call.receiver) || []; + const receiverNodes = candidates.filter((n) => receiverKinds.has(n.kind)); + if (receiverNodes.length > 0 && caller) { + const recvTarget = receiverNodes[0]; + const recvKey = `recv|${caller.id}|${recvTarget.id}`; + if (!seenCallEdges.has(recvKey)) { + seenCallEdges.add(recvKey); + allEdgeRows.push([caller.id, recvTarget.id, 'receiver', 0.7, 0]); + } + } +} - for (const t of targets) { - const edgeKey = `${caller.id}|${t.id}`; - if (t.id !== caller.id && !seenCallEdges.has(edgeKey)) { - seenCallEdges.add(edgeKey); - const confidence = computeConfidence(relPath, t.file, importedFrom); - allEdgeRows.push([caller.id, t.id, 'calls', confidence, isDynamic]); - } - } +// ── Class hierarchy edges ─────────────────────────────────────────────── - // Receiver edge - if ( - call.receiver && - !BUILTIN_RECEIVERS.has(call.receiver) && - call.receiver !== 'this' && - call.receiver !== 'self' && - call.receiver !== 'super' - ) { - const receiverKinds = new Set(['class', 'struct', 'interface', 'type', 'module']); - const samefile = ctx.nodesByNameAndFile.get(`${call.receiver}|${relPath}`) || []; - const candidates = - samefile.length > 0 ? samefile : ctx.nodesByName.get(call.receiver) || []; - const receiverNodes = candidates.filter((n) => receiverKinds.has(n.kind)); - if (receiverNodes.length > 0 && caller) { - const recvTarget = receiverNodes[0]; - const recvKey = `recv|${caller.id}|${recvTarget.id}`; - if (!seenCallEdges.has(recvKey)) { - seenCallEdges.add(recvKey); - allEdgeRows.push([caller.id, recvTarget.id, 'receiver', 0.7, 0]); - } - } - } +function buildClassHierarchyEdges(ctx, relPath, symbols, allEdgeRows) { + for (const cls of symbols.classes) { + if (cls.extends) { + const sourceRow = (ctx.nodesByNameAndFile.get(`${cls.name}|${relPath}`) || []).find( + (n) => n.kind === 'class', + ); + const targetRows = (ctx.nodesByName.get(cls.extends) || []).filter((n) => n.kind === 'class'); + if (sourceRow) { + for (const t of targetRows) { + allEdgeRows.push([sourceRow.id, t.id, 'extends', 1.0, 0]); } + } + } - // Class extends edges - for (const cls of symbols.classes) { - if (cls.extends) { - const sourceRow = (ctx.nodesByNameAndFile.get(`${cls.name}|${relPath}`) || []).find( - (n) => n.kind === 'class', - ); - const targetCandidates = ctx.nodesByName.get(cls.extends) || []; - const targetRows = targetCandidates.filter((n) => n.kind === 'class'); - if (sourceRow) { - for (const t of targetRows) { - allEdgeRows.push([sourceRow.id, t.id, 'extends', 1.0, 0]); - } - } - } - - if (cls.implements) { - const sourceRow = (ctx.nodesByNameAndFile.get(`${cls.name}|${relPath}`) || []).find( - (n) => n.kind === 'class', - ); - const targetCandidates = ctx.nodesByName.get(cls.implements) || []; - const targetRows = targetCandidates.filter( - (n) => n.kind === 'interface' || n.kind === 'class', - ); - if (sourceRow) { - for (const t of targetRows) { - allEdgeRows.push([sourceRow.id, t.id, 'implements', 1.0, 0]); - } - } - } + if (cls.implements) { + const sourceRow = (ctx.nodesByNameAndFile.get(`${cls.name}|${relPath}`) || []).find( + (n) => n.kind === 'class', + ); + const targetRows = (ctx.nodesByName.get(cls.implements) || []).filter( + (n) => n.kind === 'interface' || n.kind === 'class', + ); + if (sourceRow) { + for (const t of targetRows) { + allEdgeRows.push([sourceRow.id, t.id, 'implements', 1.0, 0]); } } } + } +} + +// ── Main entry point ──────────────────────────────────────────────────── + +/** + * @param {import('../context.js').PipelineContext} ctx + */ +export async function buildEdges(ctx) { + const { db, engineName } = ctx; + + const getNodeIdStmt = makeGetNodeIdStmt(db); + + const allNodes = db + .prepare( + `SELECT id, name, kind, file, line FROM nodes WHERE kind IN ('function','method','class','interface','struct','type','module','enum','trait')`, + ) + .all(); + setupNodeLookups(ctx, allNodes); + + const t0 = performance.now(); + const buildEdgesTx = db.transaction(() => { + const allEdgeRows = []; + + buildImportEdges(ctx, getNodeIdStmt, allEdgeRows); + + const native = engineName === 'native' ? loadNative() : null; + if (native?.buildCallEdges) { + buildCallEdgesNative(ctx, getNodeIdStmt, allEdgeRows, allNodes, native); + } else { + buildCallEdgesJS(ctx, getNodeIdStmt, allEdgeRows); + } batchInsertEdges(db, allEdgeRows); }); diff --git a/src/domain/graph/builder/stages/detect-changes.js b/src/domain/graph/builder/stages/detect-changes.js index 50ffbd1d..23d15245 100644 --- a/src/domain/graph/builder/stages/detect-changes.js +++ b/src/domain/graph/builder/stages/detect-changes.js @@ -13,12 +13,13 @@ import { parseFilesAuto } from '../../../parser.js'; import { readJournal, writeJournalHeader } from '../../journal.js'; import { fileHash, fileStat, purgeFilesFromGraph, readFileSafe } from '../helpers.js'; +// ── Three-tier change detection ───────────────────────────────────────── + /** * Determine which files have changed since last build. - * Three-tier cascade: - * Tier 0 — Journal: O(changed) when watcher was running - * Tier 1 — mtime+size: O(n) stats, O(changed) reads - * Tier 2 — Hash comparison: O(changed) reads (fallback from Tier 1) + * Tier 0 — Journal: O(changed) when watcher was running + * Tier 1 — mtime+size: O(n) stats, O(changed) reads + * Tier 2 — Hash comparison: O(changed) reads (fallback from Tier 1) */ function getChangedFiles(db, allFiles, rootDir) { let hasTable = false; @@ -44,6 +45,17 @@ function getChangedFiles(db, allFiles, rootDir) { .map((r) => [r.file, r]), ); + const removed = detectRemovedFiles(existing, allFiles, rootDir); + + // Tier 0: Journal + const journalResult = tryJournalTier(db, existing, rootDir, removed); + if (journalResult) return journalResult; + + // Tier 1 + 2: mtime/size fast-path → hash comparison + return mtimeAndHashTiers(existing, allFiles, rootDir, removed); +} + +function detectRemovedFiles(existing, allFiles, rootDir) { const currentFiles = new Set(); for (const file of allFiles) { currentFiles.add(normalizePath(path.relative(rootDir, file))); @@ -55,51 +67,57 @@ function getChangedFiles(db, allFiles, rootDir) { removed.push(existingFile); } } + return removed; +} - // ── Tier 0: Journal ────────────────────────────────────────────── +function tryJournalTier(db, existing, rootDir, removed) { const journal = readJournal(rootDir); - if (journal.valid) { - const dbMtimes = db.prepare('SELECT MAX(mtime) as latest FROM file_hashes').get(); - const latestDbMtime = dbMtimes?.latest || 0; - const hasJournalEntries = journal.changed.length > 0 || journal.removed.length > 0; - - if (hasJournalEntries && journal.timestamp >= latestDbMtime) { - debug( - `Tier 0: journal valid, ${journal.changed.length} changed, ${journal.removed.length} removed`, - ); - const changed = []; - - for (const relPath of journal.changed) { - const absPath = path.join(rootDir, relPath); - const stat = fileStat(absPath); - if (!stat) continue; - - let content; - try { - content = readFileSafe(absPath); - } catch { - continue; - } - const hash = fileHash(content); - const record = existing.get(relPath); - if (!record || record.hash !== hash) { - changed.push({ file: absPath, content, hash, relPath, stat }); - } - } + if (!journal.valid) return null; - const removedSet = new Set(removed); - for (const relPath of journal.removed) { - if (existing.has(relPath)) removedSet.add(relPath); - } + const dbMtimes = db.prepare('SELECT MAX(mtime) as latest FROM file_hashes').get(); + const latestDbMtime = dbMtimes?.latest || 0; + const hasJournalEntries = journal.changed.length > 0 || journal.removed.length > 0; - return { changed, removed: [...removedSet], isFullBuild: false }; - } + if (!hasJournalEntries || journal.timestamp < latestDbMtime) { debug( `Tier 0: skipped (${hasJournalEntries ? 'timestamp stale' : 'no entries'}), falling to Tier 1`, ); + return null; } - // ── Tier 1: mtime+size fast-path ───────────────────────────────── + debug( + `Tier 0: journal valid, ${journal.changed.length} changed, ${journal.removed.length} removed`, + ); + const changed = []; + + for (const relPath of journal.changed) { + const absPath = path.join(rootDir, relPath); + const stat = fileStat(absPath); + if (!stat) continue; + + let content; + try { + content = readFileSafe(absPath); + } catch { + continue; + } + const hash = fileHash(content); + const record = existing.get(relPath); + if (!record || record.hash !== hash) { + changed.push({ file: absPath, content, hash, relPath, stat }); + } + } + + const removedSet = new Set(removed); + for (const relPath of journal.removed) { + if (existing.has(relPath)) removedSet.add(relPath); + } + + return { changed, removed: [...removedSet], isFullBuild: false }; +} + +function mtimeAndHashTiers(existing, allFiles, rootDir, removed) { + // Tier 1: mtime+size fast-path const needsHash = []; const skipped = []; @@ -130,7 +148,7 @@ function getChangedFiles(db, allFiles, rootDir) { debug(`Tier 1: ${skipped.length} skipped by mtime+size, ${needsHash.length} need hash check`); } - // ── Tier 2: Hash comparison ────────────────────────────────────── + // Tier 2: Hash comparison const changed = []; for (const item of needsHash) { @@ -168,9 +186,10 @@ function getChangedFiles(db, allFiles, rootDir) { return { changed, removed, isFullBuild: false }; } +// ── Pending analysis ──────────────────────────────────────────────────── + /** * Run pending analysis pass when no file changes but analysis tables are empty. - * @returns {boolean} true if analysis was run and we should early-exit */ async function runPendingAnalysis(ctx) { const { db, opts, engineOpts, allFiles, rootDir } = ctx; @@ -213,9 +232,8 @@ async function runPendingAnalysis(ctx) { return true; } -/** - * Self-heal metadata-only updates (mtime/size) without re-parsing. - */ +// ── Metadata self-heal ────────────────────────────────────────────────── + function healMetadata(ctx) { const { db, metadataUpdates } = ctx; if (!metadataUpdates || metadataUpdates.length === 0) return; @@ -237,104 +255,91 @@ function healMetadata(ctx) { } } -/** - * @param {import('../context.js').PipelineContext} ctx - */ -export async function detectChanges(ctx) { - const { db, allFiles, rootDir, incremental, forceFullRebuild, opts } = ctx; - - // Scoped builds already set parseChanges in collectFiles. - // Still need to purge removed files and set hasEmbeddings. - if (opts.scope) { - let hasEmbeddings = false; - try { - db.prepare('SELECT 1 FROM embeddings LIMIT 1').get(); - hasEmbeddings = true; - } catch { - /* table doesn't exist */ - } - ctx.hasEmbeddings = hasEmbeddings; +// ── Reverse-dependency cascade ────────────────────────────────────────── - // Reverse-dependency cascade BEFORE purging (needs existing edges to find importers) - const changePaths = ctx.parseChanges.map( - (item) => item.relPath || normalizePath(path.relative(rootDir, item.file)), - ); - const reverseDeps = new Set(); - if (!opts.noReverseDeps) { - const changedRelPaths = new Set([...changePaths, ...ctx.removed]); - if (changedRelPaths.size > 0) { - const findReverseDeps = db.prepare(` - SELECT DISTINCT n_src.file FROM edges e - JOIN nodes n_src ON e.source_id = n_src.id - JOIN nodes n_tgt ON e.target_id = n_tgt.id - WHERE n_tgt.file = ? AND n_src.file != n_tgt.file AND n_src.kind != 'directory' - `); - for (const relPath of changedRelPaths) { - for (const row of findReverseDeps.all(relPath)) { - if (!changedRelPaths.has(row.file) && !reverseDeps.has(row.file)) { - const absPath = path.join(rootDir, row.file); - if (fs.existsSync(absPath)) { - reverseDeps.add(row.file); - } - } - } +function findReverseDependencies(db, changedRelPaths, rootDir) { + const reverseDeps = new Set(); + if (changedRelPaths.size === 0) return reverseDeps; + + const findReverseDepsStmt = db.prepare(` + SELECT DISTINCT n_src.file FROM edges e + JOIN nodes n_src ON e.source_id = n_src.id + JOIN nodes n_tgt ON e.target_id = n_tgt.id + WHERE n_tgt.file = ? AND n_src.file != n_tgt.file AND n_src.kind != 'directory' + `); + for (const relPath of changedRelPaths) { + for (const row of findReverseDepsStmt.all(relPath)) { + if (!changedRelPaths.has(row.file) && !reverseDeps.has(row.file)) { + const absPath = path.join(rootDir, row.file); + if (fs.existsSync(absPath)) { + reverseDeps.add(row.file); } } } + } + return reverseDeps; +} - // Now purge changed + removed files - if (changePaths.length > 0 || ctx.removed.length > 0) { - purgeFilesFromGraph(db, [...ctx.removed, ...changePaths], { purgeHashes: false }); - } +function purgeAndAddReverseDeps(ctx, changePaths, reverseDeps) { + const { db, rootDir } = ctx; - // Delete outgoing edges for reverse-dep files and add to parse list - if (reverseDeps.size > 0) { - const deleteOutgoingEdgesForFile = db.prepare( - 'DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?)', - ); - for (const relPath of reverseDeps) { - deleteOutgoingEdgesForFile.run(relPath); - } - for (const relPath of reverseDeps) { - const absPath = path.join(rootDir, relPath); - ctx.parseChanges.push({ file: absPath, relPath, _reverseDepOnly: true }); - } - info( - `Scoped rebuild: ${changePaths.length} changed, ${ctx.removed.length} removed, ${reverseDeps.size} reverse-deps`, - ); + if (changePaths.length > 0 || ctx.removed.length > 0) { + purgeFilesFromGraph(db, [...ctx.removed, ...changePaths], { purgeHashes: false }); + } + + if (reverseDeps.size > 0) { + const deleteOutgoingEdgesForFile = db.prepare( + 'DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?)', + ); + for (const relPath of reverseDeps) { + deleteOutgoingEdgesForFile.run(relPath); + } + for (const relPath of reverseDeps) { + const absPath = path.join(rootDir, relPath); + ctx.parseChanges.push({ file: absPath, relPath, _reverseDepOnly: true }); } - return; } +} - const increResult = - incremental && !forceFullRebuild - ? getChangedFiles(db, allFiles, rootDir) - : { changed: allFiles.map((f) => ({ file: f })), removed: [], isFullBuild: true }; +// ── Scoped build path ─────────────────────────────────────────────────── - ctx.removed = increResult.removed; - ctx.isFullBuild = increResult.isFullBuild; - ctx.parseChanges = increResult.changed.filter((c) => !c.metadataOnly); - ctx.metadataUpdates = increResult.changed.filter((c) => c.metadataOnly); +function handleScopedBuild(ctx) { + const { db, rootDir, opts } = ctx; - // Early exit: no changes detected - if (!ctx.isFullBuild && ctx.parseChanges.length === 0 && ctx.removed.length === 0) { - const ranAnalysis = await runPendingAnalysis(ctx); - if (ranAnalysis) { - closeDb(db); - writeJournalHeader(rootDir, Date.now()); - ctx.earlyExit = true; - return; - } + let hasEmbeddings = false; + try { + db.prepare('SELECT 1 FROM embeddings LIMIT 1').get(); + hasEmbeddings = true; + } catch { + /* table doesn't exist */ + } + ctx.hasEmbeddings = hasEmbeddings; - healMetadata(ctx); - info('No changes detected. Graph is up to date.'); - closeDb(db); - writeJournalHeader(rootDir, Date.now()); - ctx.earlyExit = true; - return; + const changePaths = ctx.parseChanges.map( + (item) => item.relPath || normalizePath(path.relative(rootDir, item.file)), + ); + + let reverseDeps = new Set(); + if (!opts.noReverseDeps) { + const changedRelPaths = new Set([...changePaths, ...ctx.removed]); + reverseDeps = findReverseDependencies(db, changedRelPaths, rootDir); + } + + // Purge changed + removed files, then add reverse-deps + purgeAndAddReverseDeps(ctx, changePaths, reverseDeps); + + if (reverseDeps.size > 0) { + info( + `Scoped rebuild: ${changePaths.length} changed, ${ctx.removed.length} removed, ${reverseDeps.size} reverse-deps`, + ); } +} + +// ── Full/incremental build path ───────────────────────────────────────── + +function handleFullBuild(ctx) { + const { db } = ctx; - // ── Full build: truncate all tables ────────────────────────────── let hasEmbeddings = false; try { db.prepare('SELECT 1 FROM embeddings LIMIT 1').get(); @@ -344,19 +349,28 @@ export async function detectChanges(ctx) { } ctx.hasEmbeddings = hasEmbeddings; - if (ctx.isFullBuild) { - const deletions = - 'PRAGMA foreign_keys = OFF; DELETE FROM cfg_edges; DELETE FROM cfg_blocks; DELETE FROM node_metrics; DELETE FROM edges; DELETE FROM function_complexity; DELETE FROM dataflow; DELETE FROM ast_nodes; DELETE FROM nodes; PRAGMA foreign_keys = ON;'; - db.exec( - hasEmbeddings - ? `${deletions.replace('PRAGMA foreign_keys = ON;', '')} DELETE FROM embeddings; PRAGMA foreign_keys = ON;` - : deletions, - ); - return; + const deletions = + 'PRAGMA foreign_keys = OFF; DELETE FROM cfg_edges; DELETE FROM cfg_blocks; DELETE FROM node_metrics; DELETE FROM edges; DELETE FROM function_complexity; DELETE FROM dataflow; DELETE FROM ast_nodes; DELETE FROM nodes; PRAGMA foreign_keys = ON;'; + db.exec( + hasEmbeddings + ? `${deletions.replace('PRAGMA foreign_keys = ON;', '')} DELETE FROM embeddings; PRAGMA foreign_keys = ON;` + : deletions, + ); +} + +function handleIncrementalBuild(ctx) { + const { db, rootDir, opts } = ctx; + + let hasEmbeddings = false; + try { + db.prepare('SELECT 1 FROM embeddings LIMIT 1').get(); + hasEmbeddings = true; + } catch { + /* table doesn't exist */ } + ctx.hasEmbeddings = hasEmbeddings; - // ── Reverse-dependency cascade (incremental) ───────────────────── - const reverseDeps = new Set(); + let reverseDeps = new Set(); if (!opts.noReverseDeps) { const changedRelPaths = new Set(); for (const item of ctx.parseChanges) { @@ -365,25 +379,7 @@ export async function detectChanges(ctx) { for (const relPath of ctx.removed) { changedRelPaths.add(relPath); } - - if (changedRelPaths.size > 0) { - const findReverseDeps = db.prepare(` - SELECT DISTINCT n_src.file FROM edges e - JOIN nodes n_src ON e.source_id = n_src.id - JOIN nodes n_tgt ON e.target_id = n_tgt.id - WHERE n_tgt.file = ? AND n_src.file != n_tgt.file AND n_src.kind != 'directory' - `); - for (const relPath of changedRelPaths) { - for (const row of findReverseDeps.all(relPath)) { - if (!changedRelPaths.has(row.file) && !reverseDeps.has(row.file)) { - const absPath = path.join(rootDir, row.file); - if (fs.existsSync(absPath)) { - reverseDeps.add(row.file); - } - } - } - } - } + reverseDeps = findReverseDependencies(db, changedRelPaths, rootDir); } info( @@ -393,21 +389,57 @@ export async function detectChanges(ctx) { debug(`Changed files: ${ctx.parseChanges.map((c) => c.relPath).join(', ')}`); if (ctx.removed.length > 0) debug(`Removed files: ${ctx.removed.join(', ')}`); - // Purge changed and removed files const changePaths = ctx.parseChanges.map( (item) => item.relPath || normalizePath(path.relative(rootDir, item.file)), ); - purgeFilesFromGraph(db, [...ctx.removed, ...changePaths], { purgeHashes: false }); + purgeAndAddReverseDeps(ctx, changePaths, reverseDeps); +} - // Delete outgoing edges for reverse-dep files, then add them to parse list - const deleteOutgoingEdgesForFile = db.prepare( - 'DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?)', - ); - for (const relPath of reverseDeps) { - deleteOutgoingEdgesForFile.run(relPath); +// ── Main entry point ──────────────────────────────────────────────────── + +/** + * @param {import('../context.js').PipelineContext} ctx + */ +export async function detectChanges(ctx) { + const { db, allFiles, rootDir, incremental, forceFullRebuild, opts } = ctx; + + // Scoped builds already set parseChanges in collectFiles + if (opts.scope) { + handleScopedBuild(ctx); + return; } - for (const relPath of reverseDeps) { - const absPath = path.join(rootDir, relPath); - ctx.parseChanges.push({ file: absPath, relPath, _reverseDepOnly: true }); + + const increResult = + incremental && !forceFullRebuild + ? getChangedFiles(db, allFiles, rootDir) + : { changed: allFiles.map((f) => ({ file: f })), removed: [], isFullBuild: true }; + + ctx.removed = increResult.removed; + ctx.isFullBuild = increResult.isFullBuild; + ctx.parseChanges = increResult.changed.filter((c) => !c.metadataOnly); + ctx.metadataUpdates = increResult.changed.filter((c) => c.metadataOnly); + + // Early exit: no changes detected + if (!ctx.isFullBuild && ctx.parseChanges.length === 0 && ctx.removed.length === 0) { + const ranAnalysis = await runPendingAnalysis(ctx); + if (ranAnalysis) { + closeDb(db); + writeJournalHeader(rootDir, Date.now()); + ctx.earlyExit = true; + return; + } + + healMetadata(ctx); + info('No changes detected. Graph is up to date.'); + closeDb(db); + writeJournalHeader(rootDir, Date.now()); + ctx.earlyExit = true; + return; + } + + if (ctx.isFullBuild) { + handleFullBuild(ctx); + } else { + handleIncrementalBuild(ctx); } } diff --git a/src/domain/graph/builder/stages/insert-nodes.js b/src/domain/graph/builder/stages/insert-nodes.js index 2eaf6a73..6e22c966 100644 --- a/src/domain/graph/builder/stages/insert-nodes.js +++ b/src/domain/graph/builder/stages/insert-nodes.js @@ -15,23 +15,159 @@ import { readFileSafe, } from '../helpers.js'; +// ── Phase 1: Insert file nodes, definitions, exports ──────────────────── + +function insertDefinitionsAndExports(db, allSymbols) { + const phase1Rows = []; + for (const [relPath, symbols] of allSymbols) { + phase1Rows.push([relPath, 'file', relPath, 0, null, null, null, null, null]); + for (const def of symbols.definitions) { + const dotIdx = def.name.lastIndexOf('.'); + const scope = dotIdx !== -1 ? def.name.slice(0, dotIdx) : null; + phase1Rows.push([ + def.name, + def.kind, + relPath, + def.line, + def.endLine || null, + null, + def.name, + scope, + def.visibility || null, + ]); + } + for (const exp of symbols.exports) { + phase1Rows.push([exp.name, exp.kind, relPath, exp.line, null, null, exp.name, null, null]); + } + } + batchInsertNodes(db, phase1Rows); + + // Mark exported symbols + const markExported = db.prepare( + 'UPDATE nodes SET exported = 1 WHERE name = ? AND kind = ? AND file = ? AND line = ?', + ); + for (const [relPath, symbols] of allSymbols) { + for (const exp of symbols.exports) { + markExported.run(exp.name, exp.kind, relPath, exp.line); + } + } +} + +// ── Phase 2: Insert children (needs parent IDs) ──────────────────────── + +function insertChildren(db, allSymbols) { + const childRows = []; + for (const [relPath, symbols] of allSymbols) { + const nodeIdMap = new Map(); + for (const row of bulkNodeIdsByFile(db, relPath)) { + nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id); + } + for (const def of symbols.definitions) { + if (!def.children?.length) continue; + const defId = nodeIdMap.get(`${def.name}|${def.kind}|${def.line}`); + if (!defId) continue; + for (const child of def.children) { + const qualifiedName = `${def.name}.${child.name}`; + childRows.push([ + child.name, + child.kind, + relPath, + child.line, + child.endLine || null, + defId, + qualifiedName, + def.name, + child.visibility || null, + ]); + } + } + } + batchInsertNodes(db, childRows); +} + +// ── Phase 3: Insert containment + parameter_of edges ──────────────────── + +function insertContainmentEdges(db, allSymbols) { + const edgeRows = []; + for (const [relPath, symbols] of allSymbols) { + const nodeIdMap = new Map(); + for (const row of bulkNodeIdsByFile(db, relPath)) { + nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id); + } + const fileId = nodeIdMap.get(`${relPath}|file|0`); + for (const def of symbols.definitions) { + const defId = nodeIdMap.get(`${def.name}|${def.kind}|${def.line}`); + if (fileId && defId) { + edgeRows.push([fileId, defId, 'contains', 1.0, 0]); + } + if (def.children?.length && defId) { + for (const child of def.children) { + const childId = nodeIdMap.get(`${child.name}|${child.kind}|${child.line}`); + if (childId) { + edgeRows.push([defId, childId, 'contains', 1.0, 0]); + if (child.kind === 'parameter') { + edgeRows.push([childId, defId, 'parameter_of', 1.0, 0]); + } + } + } + } + } + } + batchInsertEdges(db, edgeRows); +} + +// ── Phase 4: Update file hashes ───────────────────────────────────────── + +function updateFileHashes(_db, allSymbols, precomputedData, metadataUpdates, rootDir, upsertHash) { + if (!upsertHash) return; + + for (const [relPath] of allSymbols) { + const precomputed = precomputedData.get(relPath); + if (precomputed?._reverseDepOnly) { + // no-op: file unchanged, hash already correct + } else if (precomputed?.hash) { + const stat = precomputed.stat || fileStat(path.join(rootDir, relPath)); + const mtime = stat ? Math.floor(stat.mtimeMs) : 0; + const size = stat ? stat.size : 0; + upsertHash.run(relPath, precomputed.hash, mtime, size); + } else { + const absPath = path.join(rootDir, relPath); + let code; + try { + code = readFileSafe(absPath); + } catch { + code = null; + } + if (code !== null) { + const stat = fileStat(absPath); + const mtime = stat ? Math.floor(stat.mtimeMs) : 0; + const size = stat ? stat.size : 0; + upsertHash.run(relPath, fileHash(code), mtime, size); + } + } + } + + // Also update metadata-only entries (self-heal mtime/size without re-parse) + for (const item of metadataUpdates) { + const mtime = item.stat ? Math.floor(item.stat.mtimeMs) : 0; + const size = item.stat ? item.stat.size : 0; + upsertHash.run(item.relPath, item.hash, mtime, size); + } +} + +// ── Main entry point ──────────────────────────────────────────────────── + /** * @param {import('../context.js').PipelineContext} ctx */ export async function insertNodes(ctx) { const { db, allSymbols, filesToParse, metadataUpdates, rootDir, removed } = ctx; - // Build lookup from incremental data (pre-computed hashes + stats) const precomputedData = new Map(); for (const item of filesToParse) { - if (item.relPath) { - precomputedData.set(item.relPath, item); - } + if (item.relPath) precomputedData.set(item.relPath, item); } - const bulkGetNodeIds = { all: (file) => bulkNodeIdsByFile(db, file) }; - - // Prepare hash upsert let upsertHash; try { upsertHash = db.prepare( @@ -42,143 +178,15 @@ export async function insertNodes(ctx) { } // Populate fileSymbols before the transaction so it is a pure input - // to (rather than a side-effect of) the DB write — avoids partial - // population if the transaction rolls back. for (const [relPath, symbols] of allSymbols) { ctx.fileSymbols.set(relPath, symbols); } const insertAll = db.transaction(() => { - // Phase 1: Batch insert all file nodes + definitions + exports - // Row format: [name, kind, file, line, end_line, parent_id, qualified_name, scope, visibility] - const phase1Rows = []; - for (const [relPath, symbols] of allSymbols) { - phase1Rows.push([relPath, 'file', relPath, 0, null, null, null, null, null]); - for (const def of symbols.definitions) { - // Methods already have 'Class.method' as name — use as qualified_name. - // For methods, scope is the class portion; for top-level defs, scope is null. - const dotIdx = def.name.lastIndexOf('.'); - const scope = dotIdx !== -1 ? def.name.slice(0, dotIdx) : null; - phase1Rows.push([ - def.name, - def.kind, - relPath, - def.line, - def.endLine || null, - null, - def.name, - scope, - def.visibility || null, - ]); - } - for (const exp of symbols.exports) { - phase1Rows.push([exp.name, exp.kind, relPath, exp.line, null, null, exp.name, null, null]); - } - } - batchInsertNodes(db, phase1Rows); - - // Phase 1b: Mark exported symbols - const markExported = db.prepare( - 'UPDATE nodes SET exported = 1 WHERE name = ? AND kind = ? AND file = ? AND line = ?', - ); - for (const [relPath, symbols] of allSymbols) { - for (const exp of symbols.exports) { - markExported.run(exp.name, exp.kind, relPath, exp.line); - } - } - - // Phase 3: Batch insert children (needs parent IDs from Phase 2) - const childRows = []; - for (const [relPath, symbols] of allSymbols) { - const nodeIdMap = new Map(); - for (const row of bulkGetNodeIds.all(relPath)) { - nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id); - } - for (const def of symbols.definitions) { - if (!def.children?.length) continue; - const defId = nodeIdMap.get(`${def.name}|${def.kind}|${def.line}`); - if (!defId) continue; - for (const child of def.children) { - const qualifiedName = `${def.name}.${child.name}`; - childRows.push([ - child.name, - child.kind, - relPath, - child.line, - child.endLine || null, - defId, - qualifiedName, - def.name, - child.visibility || null, - ]); - } - } - } - batchInsertNodes(db, childRows); - - // Phase 5: Batch insert contains/parameter_of edges - const edgeRows = []; - for (const [relPath, symbols] of allSymbols) { - const nodeIdMap = new Map(); - for (const row of bulkGetNodeIds.all(relPath)) { - nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id); - } - const fileId = nodeIdMap.get(`${relPath}|file|0`); - for (const def of symbols.definitions) { - const defId = nodeIdMap.get(`${def.name}|${def.kind}|${def.line}`); - if (fileId && defId) { - edgeRows.push([fileId, defId, 'contains', 1.0, 0]); - } - if (def.children?.length && defId) { - for (const child of def.children) { - const childId = nodeIdMap.get(`${child.name}|${child.kind}|${child.line}`); - if (childId) { - edgeRows.push([defId, childId, 'contains', 1.0, 0]); - if (child.kind === 'parameter') { - edgeRows.push([childId, defId, 'parameter_of', 1.0, 0]); - } - } - } - } - } - - // Update file hash — skip reverse-dep files (unchanged) - if (upsertHash) { - const precomputed = precomputedData.get(relPath); - if (precomputed?._reverseDepOnly) { - // no-op: file unchanged, hash already correct - } else if (precomputed?.hash) { - const stat = precomputed.stat || fileStat(path.join(rootDir, relPath)); - const mtime = stat ? Math.floor(stat.mtimeMs) : 0; - const size = stat ? stat.size : 0; - upsertHash.run(relPath, precomputed.hash, mtime, size); - } else { - const absPath = path.join(rootDir, relPath); - let code; - try { - code = readFileSafe(absPath); - } catch { - code = null; - } - if (code !== null) { - const stat = fileStat(absPath); - const mtime = stat ? Math.floor(stat.mtimeMs) : 0; - const size = stat ? stat.size : 0; - upsertHash.run(relPath, fileHash(code), mtime, size); - } - } - } - } - batchInsertEdges(db, edgeRows); - - // Also update metadata-only entries (self-heal mtime/size without re-parse) - if (upsertHash) { - for (const item of metadataUpdates) { - const mtime = item.stat ? Math.floor(item.stat.mtimeMs) : 0; - const size = item.stat ? item.stat.size : 0; - upsertHash.run(item.relPath, item.hash, mtime, size); - } - } + insertDefinitionsAndExports(db, allSymbols); + insertChildren(db, allSymbols); + insertContainmentEdges(db, allSymbols); + updateFileHashes(db, allSymbols, precomputedData, metadataUpdates, rootDir, upsertHash); }); const t0 = performance.now(); From 0a3fbc7d6aee22111cdd3408df68bb42871c955a Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 03:30:02 -0600 Subject: [PATCH 13/21] refactor: decompose domain analysis functions into focused helpers Impact: 37 functions changed, 29 affected --- src/domain/analysis/context.js | 361 ++++++++++----------- src/domain/analysis/dependencies.js | 346 ++++++++++++--------- src/domain/analysis/impact.js | 418 ++++++++++++++++--------- src/domain/analysis/module-map.js | 467 +++++++++++++++------------- 4 files changed, 884 insertions(+), 708 deletions(-) diff --git a/src/domain/analysis/context.js b/src/domain/analysis/context.js index a97e5419..a8f3261f 100644 --- a/src/domain/analysis/context.js +++ b/src/domain/analysis/context.js @@ -27,6 +27,149 @@ import { normalizeSymbol } from '../../shared/normalize.js'; import { paginateResult } from '../../shared/paginate.js'; import { findMatchingNodes } from './symbol-lookup.js'; +function buildCallees(db, node, repoRoot, getFileLines, opts) { + const { noTests, depth } = opts; + const calleeRows = findCallees(db, node.id); + const filteredCallees = noTests ? calleeRows.filter((c) => !isTestFile(c.file)) : calleeRows; + + const callees = filteredCallees.map((c) => { + const cLines = getFileLines(c.file); + const summary = cLines ? extractSummary(cLines, c.line) : null; + let calleeSource = null; + if (depth >= 1) { + calleeSource = readSourceRange(repoRoot, c.file, c.line, c.end_line); + } + return { + name: c.name, + kind: c.kind, + file: c.file, + line: c.line, + endLine: c.end_line || null, + summary, + source: calleeSource, + }; + }); + + if (depth > 1) { + const visited = new Set(filteredCallees.map((c) => c.id)); + visited.add(node.id); + let frontier = filteredCallees.map((c) => c.id); + const maxDepth = Math.min(depth, 5); + for (let d = 2; d <= maxDepth; d++) { + const nextFrontier = []; + for (const fid of frontier) { + const deeper = findCallees(db, fid); + for (const c of deeper) { + if (!visited.has(c.id) && (!noTests || !isTestFile(c.file))) { + visited.add(c.id); + nextFrontier.push(c.id); + const cLines = getFileLines(c.file); + callees.push({ + name: c.name, + kind: c.kind, + file: c.file, + line: c.line, + endLine: c.end_line || null, + summary: cLines ? extractSummary(cLines, c.line) : null, + source: readSourceRange(repoRoot, c.file, c.line, c.end_line), + }); + } + } + } + frontier = nextFrontier; + if (frontier.length === 0) break; + } + } + + return callees; +} + +function buildCallers(db, node, noTests) { + let callerRows = findCallers(db, node.id); + + if (node.kind === 'method' && node.name.includes('.')) { + const methodName = node.name.split('.').pop(); + const relatedMethods = resolveMethodViaHierarchy(db, methodName); + for (const rm of relatedMethods) { + if (rm.id === node.id) continue; + const extraCallers = findCallers(db, rm.id); + callerRows.push(...extraCallers.map((c) => ({ ...c, viaHierarchy: rm.name }))); + } + } + if (noTests) callerRows = callerRows.filter((c) => !isTestFile(c.file)); + + return callerRows.map((c) => ({ + name: c.name, + kind: c.kind, + file: c.file, + line: c.line, + viaHierarchy: c.viaHierarchy || undefined, + })); +} + +function buildRelatedTests(db, node, getFileLines, includeTests) { + const testCallerRows = findCallers(db, node.id); + const testCallers = testCallerRows.filter((c) => isTestFile(c.file)); + + const testsByFile = new Map(); + for (const tc of testCallers) { + if (!testsByFile.has(tc.file)) testsByFile.set(tc.file, []); + testsByFile.get(tc.file).push(tc); + } + + const relatedTests = []; + for (const [file] of testsByFile) { + const tLines = getFileLines(file); + const testNames = []; + if (tLines) { + for (const tl of tLines) { + const tm = tl.match(/(?:it|test|describe)\s*\(\s*['"`]([^'"`]+)['"`]/); + if (tm) testNames.push(tm[1]); + } + } + const testSource = includeTests && tLines ? tLines.join('\n') : undefined; + relatedTests.push({ + file, + testCount: testNames.length, + testNames, + source: testSource, + }); + } + + return relatedTests; +} + +function getComplexityMetrics(db, nodeId) { + try { + const cRow = getComplexityForNode(db, nodeId); + if (!cRow) return null; + return { + cognitive: cRow.cognitive, + cyclomatic: cRow.cyclomatic, + maxNesting: cRow.max_nesting, + maintainabilityIndex: cRow.maintainability_index || 0, + halsteadVolume: cRow.halstead_volume || 0, + }; + } catch (e) { + debug(`complexity lookup failed for node ${nodeId}: ${e.message}`); + return null; + } +} + +function getNodeChildrenSafe(db, nodeId) { + try { + return findNodeChildren(db, nodeId).map((c) => ({ + name: c.name, + kind: c.kind, + line: c.line, + endLine: c.end_line || null, + })); + } catch (e) { + debug(`findNodeChildren failed for node ${nodeId}: ${e.message}`); + return []; + } +} + function explainFileImpl(db, target, getFileLines) { const fileNodes = findFileNodes(db, `%${target}%`); if (fileNodes.length === 0) return []; @@ -50,14 +193,10 @@ function explainFileImpl(db, target, getFileLines) { const publicApi = symbols.filter((s) => publicIds.has(s.id)).map(mapSymbol); const internal = symbols.filter((s) => !publicIds.has(s.id)).map(mapSymbol); - // Imports / importedBy const imports = findImportTargets(db, fn.id).map((r) => ({ file: r.file })); - const importedBy = findImportSources(db, fn.id).map((r) => ({ file: r.file })); - // Intra-file data flow const intraEdges = findIntraFileCallEdges(db, fn.file); - const dataFlowMap = new Map(); for (const edge of intraEdges) { if (!dataFlowMap.has(edge.caller_name)) dataFlowMap.set(edge.caller_name, []); @@ -68,7 +207,6 @@ function explainFileImpl(db, target, getFileLines) { callees, })); - // Line count: prefer node_metrics (actual), fall back to MAX(end_line) const metric = db .prepare(`SELECT nm.line_count FROM node_metrics nm WHERE nm.node_id = ?`) .get(fn.id); @@ -130,29 +268,12 @@ function explainFunctionImpl(db, target, noTests, getFileLines) { .filter((r) => isTestFile(r.file) && !seenFiles.has(r.file) && seenFiles.add(r.file)) .map((r) => ({ file: r.file })); - // Complexity metrics - let complexityMetrics = null; - try { - const cRow = getComplexityForNode(db, node.id); - if (cRow) { - complexityMetrics = { - cognitive: cRow.cognitive, - cyclomatic: cRow.cyclomatic, - maxNesting: cRow.max_nesting, - maintainabilityIndex: cRow.maintainability_index || 0, - halsteadVolume: cRow.halstead_volume || 0, - }; - } - } catch (e) { - debug(`complexity lookup failed for node ${node.id}: ${e.message}`); - } - return { ...normalizeSymbol(node, db, hc), lineCount, summary, signature, - complexity: complexityMetrics, + complexity: getComplexityMetrics(db, node.id), callees, callers, relatedTests, @@ -160,6 +281,28 @@ function explainFunctionImpl(db, target, noTests, getFileLines) { }); } +function explainCallees(parentResults, currentDepth, visited, db, noTests, getFileLines) { + if (currentDepth <= 0) return; + for (const r of parentResults) { + const newCallees = []; + for (const callee of r.callees) { + const key = `${callee.name}:${callee.file}:${callee.line}`; + if (visited.has(key)) continue; + visited.add(key); + const calleeResults = explainFunctionImpl(db, callee.name, noTests, getFileLines); + const exact = calleeResults.find((cr) => cr.file === callee.file && cr.line === callee.line); + if (exact) { + exact._depth = (r._depth || 0) + 1; + newCallees.push(exact); + } + } + if (newCallees.length > 0) { + r.depDetails = newCallees; + explainCallees(newCallees, currentDepth - 1, visited, db, noTests, getFileLines); + } + } +} + // ─── Exported functions ────────────────────────────────────────────────── export function contextData(name, customDbPath, opts = {}) { @@ -178,156 +321,22 @@ export function contextData(name, customDbPath, opts = {}) { return { name, results: [] }; } - // No hardcoded slice — pagination handles bounding via limit/offset - const getFileLines = createFileLinesReader(repoRoot); const results = nodes.map((node) => { const fileLines = getFileLines(node.file); - // Source const source = noSource ? null : readSourceRange(repoRoot, node.file, node.line, node.end_line); - // Signature const signature = fileLines ? extractSignature(fileLines, node.line) : null; - // Callees - const calleeRows = findCallees(db, node.id); - const filteredCallees = noTests ? calleeRows.filter((c) => !isTestFile(c.file)) : calleeRows; - - const callees = filteredCallees.map((c) => { - const cLines = getFileLines(c.file); - const summary = cLines ? extractSummary(cLines, c.line) : null; - let calleeSource = null; - if (depth >= 1) { - calleeSource = readSourceRange(repoRoot, c.file, c.line, c.end_line); - } - return { - name: c.name, - kind: c.kind, - file: c.file, - line: c.line, - endLine: c.end_line || null, - summary, - source: calleeSource, - }; - }); - - // Deep callee expansion via BFS (depth > 1, capped at 5) - if (depth > 1) { - const visited = new Set(filteredCallees.map((c) => c.id)); - visited.add(node.id); - let frontier = filteredCallees.map((c) => c.id); - const maxDepth = Math.min(depth, 5); - for (let d = 2; d <= maxDepth; d++) { - const nextFrontier = []; - for (const fid of frontier) { - const deeper = findCallees(db, fid); - for (const c of deeper) { - if (!visited.has(c.id) && (!noTests || !isTestFile(c.file))) { - visited.add(c.id); - nextFrontier.push(c.id); - const cLines = getFileLines(c.file); - callees.push({ - name: c.name, - kind: c.kind, - file: c.file, - line: c.line, - endLine: c.end_line || null, - summary: cLines ? extractSummary(cLines, c.line) : null, - source: readSourceRange(repoRoot, c.file, c.line, c.end_line), - }); - } - } - } - frontier = nextFrontier; - if (frontier.length === 0) break; - } - } - - // Callers - let callerRows = findCallers(db, node.id); - - // Method hierarchy resolution - if (node.kind === 'method' && node.name.includes('.')) { - const methodName = node.name.split('.').pop(); - const relatedMethods = resolveMethodViaHierarchy(db, methodName); - for (const rm of relatedMethods) { - if (rm.id === node.id) continue; - const extraCallers = findCallers(db, rm.id); - callerRows.push(...extraCallers.map((c) => ({ ...c, viaHierarchy: rm.name }))); - } - } - if (noTests) callerRows = callerRows.filter((c) => !isTestFile(c.file)); - - const callers = callerRows.map((c) => ({ - name: c.name, - kind: c.kind, - file: c.file, - line: c.line, - viaHierarchy: c.viaHierarchy || undefined, - })); - - // Related tests: callers that live in test files - const testCallerRows = findCallers(db, node.id); - const testCallers = testCallerRows.filter((c) => isTestFile(c.file)); - - const testsByFile = new Map(); - for (const tc of testCallers) { - if (!testsByFile.has(tc.file)) testsByFile.set(tc.file, []); - testsByFile.get(tc.file).push(tc); - } - - const relatedTests = []; - for (const [file] of testsByFile) { - const tLines = getFileLines(file); - const testNames = []; - if (tLines) { - for (const tl of tLines) { - const tm = tl.match(/(?:it|test|describe)\s*\(\s*['"`]([^'"`]+)['"`]/); - if (tm) testNames.push(tm[1]); - } - } - const testSource = includeTests && tLines ? tLines.join('\n') : undefined; - relatedTests.push({ - file, - testCount: testNames.length, - testNames, - source: testSource, - }); - } - - // Complexity metrics - let complexityMetrics = null; - try { - const cRow = getComplexityForNode(db, node.id); - if (cRow) { - complexityMetrics = { - cognitive: cRow.cognitive, - cyclomatic: cRow.cyclomatic, - maxNesting: cRow.max_nesting, - maintainabilityIndex: cRow.maintainability_index || 0, - halsteadVolume: cRow.halstead_volume || 0, - }; - } - } catch (e) { - debug(`complexity lookup failed for node ${node.id}: ${e.message}`); - } - - // Children (parameters, properties, constants) - let nodeChildren = []; - try { - nodeChildren = findNodeChildren(db, node.id).map((c) => ({ - name: c.name, - kind: c.kind, - line: c.line, - endLine: c.end_line || null, - })); - } catch (e) { - debug(`findNodeChildren failed for node ${node.id}: ${e.message}`); - } + const callees = buildCallees(db, node, repoRoot, getFileLines, { noTests, depth }); + const callers = buildCallers(db, node, noTests); + const relatedTests = buildRelatedTests(db, node, getFileLines, includeTests); + const complexityMetrics = getComplexityMetrics(db, node.id); + const nodeChildren = getNodeChildrenSafe(db, node.id); return { name: node.name, @@ -370,35 +379,9 @@ export function explainData(target, customDbPath, opts = {}) { ? explainFileImpl(db, target, getFileLines) : explainFunctionImpl(db, target, noTests, getFileLines); - // Recursive dependency explanation for function targets if (kind === 'function' && depth > 0 && results.length > 0) { const visited = new Set(results.map((r) => `${r.name}:${r.file}:${r.line}`)); - - function explainCallees(parentResults, currentDepth) { - if (currentDepth <= 0) return; - for (const r of parentResults) { - const newCallees = []; - for (const callee of r.callees) { - const key = `${callee.name}:${callee.file}:${callee.line}`; - if (visited.has(key)) continue; - visited.add(key); - const calleeResults = explainFunctionImpl(db, callee.name, noTests, getFileLines); - const exact = calleeResults.find( - (cr) => cr.file === callee.file && cr.line === callee.line, - ); - if (exact) { - exact._depth = (r._depth || 0) + 1; - newCallees.push(exact); - } - } - if (newCallees.length > 0) { - r.depDetails = newCallees; - explainCallees(newCallees, currentDepth - 1); - } - } - } - - explainCallees(results, depth); + explainCallees(results, depth, visited, db, noTests, getFileLines); } const base = { target, kind, results }; diff --git a/src/domain/analysis/dependencies.js b/src/domain/analysis/dependencies.js index e632470f..867cd5bd 100644 --- a/src/domain/analysis/dependencies.js +++ b/src/domain/analysis/dependencies.js @@ -46,6 +46,61 @@ export function fileDepsData(file, customDbPath, opts = {}) { } } +/** + * BFS transitive caller traversal starting from `callers` of `nodeId`. + * Returns an object keyed by depth (2..depth) → array of caller descriptors. + */ +function buildTransitiveCallers(db, callers, nodeId, depth, noTests) { + const transitiveCallers = {}; + if (depth <= 1) return transitiveCallers; + + const visited = new Set([nodeId]); + let frontier = callers + .map((c) => { + const row = db + .prepare('SELECT id FROM nodes WHERE name = ? AND kind = ? AND file = ? AND line = ?') + .get(c.name, c.kind, c.file, c.line); + return row ? { ...c, id: row.id } : null; + }) + .filter(Boolean); + + for (let d = 2; d <= depth; d++) { + const nextFrontier = []; + for (const f of frontier) { + if (visited.has(f.id)) continue; + visited.add(f.id); + const upstream = db + .prepare(` + SELECT n.name, n.kind, n.file, n.line + FROM edges e JOIN nodes n ON e.source_id = n.id + WHERE e.target_id = ? AND e.kind = 'calls' + `) + .all(f.id); + for (const u of upstream) { + if (noTests && isTestFile(u.file)) continue; + const uid = db + .prepare('SELECT id FROM nodes WHERE name = ? AND kind = ? AND file = ? AND line = ?') + .get(u.name, u.kind, u.file, u.line)?.id; + if (uid && !visited.has(uid)) { + nextFrontier.push({ ...u, id: uid }); + } + } + } + if (nextFrontier.length > 0) { + transitiveCallers[d] = nextFrontier.map((n) => ({ + name: n.name, + kind: n.kind, + file: n.file, + line: n.line, + })); + } + frontier = nextFrontier; + if (frontier.length === 0) break; + } + + return transitiveCallers; +} + export function fnDepsData(name, customDbPath, opts = {}) { const db = openReadonlyOrFail(customDbPath); try { @@ -75,55 +130,7 @@ export function fnDepsData(name, customDbPath, opts = {}) { } if (noTests) callers = callers.filter((c) => !isTestFile(c.file)); - // Transitive callers - const transitiveCallers = {}; - if (depth > 1) { - const visited = new Set([node.id]); - let frontier = callers - .map((c) => { - const row = db - .prepare('SELECT id FROM nodes WHERE name = ? AND kind = ? AND file = ? AND line = ?') - .get(c.name, c.kind, c.file, c.line); - return row ? { ...c, id: row.id } : null; - }) - .filter(Boolean); - - for (let d = 2; d <= depth; d++) { - const nextFrontier = []; - for (const f of frontier) { - if (visited.has(f.id)) continue; - visited.add(f.id); - const upstream = db - .prepare(` - SELECT n.name, n.kind, n.file, n.line - FROM edges e JOIN nodes n ON e.source_id = n.id - WHERE e.target_id = ? AND e.kind = 'calls' - `) - .all(f.id); - for (const u of upstream) { - if (noTests && isTestFile(u.file)) continue; - const uid = db - .prepare( - 'SELECT id FROM nodes WHERE name = ? AND kind = ? AND file = ? AND line = ?', - ) - .get(u.name, u.kind, u.file, u.line)?.id; - if (uid && !visited.has(uid)) { - nextFrontier.push({ ...u, id: uid }); - } - } - } - if (nextFrontier.length > 0) { - transitiveCallers[d] = nextFrontier.map((n) => ({ - name: n.name, - kind: n.kind, - file: n.file, - line: n.line, - })); - } - frontier = nextFrontier; - if (frontier.length === 0) break; - } - } + const transitiveCallers = buildTransitiveCallers(db, callers, node.id, depth, noTests); return { ...normalizeSymbol(node, db, hc), @@ -151,37 +158,40 @@ export function fnDepsData(name, customDbPath, opts = {}) { } } -export function pathData(from, to, customDbPath, opts = {}) { - const db = openReadonlyOrFail(customDbPath); - try { - const noTests = opts.noTests || false; - const maxDepth = opts.maxDepth || 10; - const edgeKinds = opts.edgeKinds || ['calls']; - const reverse = opts.reverse || false; +/** + * Resolve from/to symbol names to node records. + * Returns { sourceNode, targetNode, fromCandidates, toCandidates } on success, + * or { earlyResult } when a caller-facing error/not-found response should be returned immediately. + */ +function resolveEndpoints(db, from, to, opts) { + const { noTests = false } = opts; - const fromNodes = findMatchingNodes(db, from, { - noTests, - file: opts.fromFile, - kind: opts.kind, - }); - if (fromNodes.length === 0) { - return { + const fromNodes = findMatchingNodes(db, from, { + noTests, + file: opts.fromFile, + kind: opts.kind, + }); + if (fromNodes.length === 0) { + return { + earlyResult: { from, to, found: false, error: `No symbol matching "${from}"`, fromCandidates: [], toCandidates: [], - }; - } + }, + }; + } - const toNodes = findMatchingNodes(db, to, { - noTests, - file: opts.toFile, - kind: opts.kind, - }); - if (toNodes.length === 0) { - return { + const toNodes = findMatchingNodes(db, to, { + noTests, + file: opts.toFile, + kind: opts.kind, + }); + if (toNodes.length === 0) { + return { + earlyResult: { from, to, found: false, @@ -190,18 +200,118 @@ export function pathData(from, to, customDbPath, opts = {}) { .slice(0, 5) .map((n) => ({ name: n.name, kind: n.kind, file: n.file, line: n.line })), toCandidates: [], - }; + }, + }; + } + + const fromCandidates = fromNodes + .slice(0, 5) + .map((n) => ({ name: n.name, kind: n.kind, file: n.file, line: n.line })); + const toCandidates = toNodes + .slice(0, 5) + .map((n) => ({ name: n.name, kind: n.kind, file: n.file, line: n.line })); + + return { + sourceNode: fromNodes[0], + targetNode: toNodes[0], + fromCandidates, + toCandidates, + }; +} + +/** + * BFS from sourceId toward targetId. + * Returns { found, parent, alternateCount, foundDepth }. + * `parent` maps nodeId → { parentId, edgeKind }. + */ +function bfsShortestPath(db, sourceId, targetId, edgeKinds, reverse, maxDepth, noTests) { + const kindPlaceholders = edgeKinds.map(() => '?').join(', '); + + // Forward: source_id → target_id (A calls... calls B) + // Reverse: target_id → source_id (B is called by... called by A) + const neighborQuery = reverse + ? `SELECT n.id, n.name, n.kind, n.file, n.line, e.kind AS edge_kind + FROM edges e JOIN nodes n ON e.source_id = n.id + WHERE e.target_id = ? AND e.kind IN (${kindPlaceholders})` + : `SELECT n.id, n.name, n.kind, n.file, n.line, e.kind AS edge_kind + FROM edges e JOIN nodes n ON e.target_id = n.id + WHERE e.source_id = ? AND e.kind IN (${kindPlaceholders})`; + const neighborStmt = db.prepare(neighborQuery); + + const visited = new Set([sourceId]); + const parent = new Map(); + let queue = [sourceId]; + let found = false; + let alternateCount = 0; + let foundDepth = -1; + + for (let depth = 1; depth <= maxDepth; depth++) { + const nextQueue = []; + for (const currentId of queue) { + const neighbors = neighborStmt.all(currentId, ...edgeKinds); + for (const n of neighbors) { + if (noTests && isTestFile(n.file)) continue; + if (n.id === targetId) { + if (!found) { + found = true; + foundDepth = depth; + parent.set(n.id, { parentId: currentId, edgeKind: n.edge_kind }); + } + alternateCount++; + continue; + } + if (!visited.has(n.id)) { + visited.add(n.id); + parent.set(n.id, { parentId: currentId, edgeKind: n.edge_kind }); + nextQueue.push(n.id); + } + } } + if (found) break; + queue = nextQueue; + if (queue.length === 0) break; + } + + return { found, parent, alternateCount, foundDepth }; +} + +/** + * Walk the parent map from targetId back to sourceId and return an ordered + * array of node IDs source → target. + */ +function reconstructPath(db, pathIds, parent) { + const nodeCache = new Map(); + const getNode = (id) => { + if (nodeCache.has(id)) return nodeCache.get(id); + const row = db.prepare('SELECT name, kind, file, line FROM nodes WHERE id = ?').get(id); + nodeCache.set(id, row); + return row; + }; + + return pathIds.map((id, idx) => { + const node = getNode(id); + const edgeKind = idx === 0 ? null : parent.get(id).edgeKind; + return { name: node.name, kind: node.kind, file: node.file, line: node.line, edgeKind }; + }); +} + +export function pathData(from, to, customDbPath, opts = {}) { + const db = openReadonlyOrFail(customDbPath); + try { + const noTests = opts.noTests || false; + const maxDepth = opts.maxDepth || 10; + const edgeKinds = opts.edgeKinds || ['calls']; + const reverse = opts.reverse || false; - const sourceNode = fromNodes[0]; - const targetNode = toNodes[0]; + const resolved = resolveEndpoints(db, from, to, { + noTests, + fromFile: opts.fromFile, + toFile: opts.toFile, + kind: opts.kind, + }); + if (resolved.earlyResult) return resolved.earlyResult; - const fromCandidates = fromNodes - .slice(0, 5) - .map((n) => ({ name: n.name, kind: n.kind, file: n.file, line: n.line })); - const toCandidates = toNodes - .slice(0, 5) - .map((n) => ({ name: n.name, kind: n.kind, file: n.file, line: n.line })); + const { sourceNode, targetNode, fromCandidates, toCandidates } = resolved; // Self-path if (sourceNode.id === targetNode.id) { @@ -228,55 +338,12 @@ export function pathData(from, to, customDbPath, opts = {}) { }; } - // Build edge kind filter - const kindPlaceholders = edgeKinds.map(() => '?').join(', '); - - // BFS — direction depends on `reverse` flag - // Forward: source_id → target_id (A calls... calls B) - // Reverse: target_id → source_id (B is called by... called by A) - const neighborQuery = reverse - ? `SELECT n.id, n.name, n.kind, n.file, n.line, e.kind AS edge_kind - FROM edges e JOIN nodes n ON e.source_id = n.id - WHERE e.target_id = ? AND e.kind IN (${kindPlaceholders})` - : `SELECT n.id, n.name, n.kind, n.file, n.line, e.kind AS edge_kind - FROM edges e JOIN nodes n ON e.target_id = n.id - WHERE e.source_id = ? AND e.kind IN (${kindPlaceholders})`; - const neighborStmt = db.prepare(neighborQuery); - - const visited = new Set([sourceNode.id]); - // parent map: nodeId → { parentId, edgeKind } - const parent = new Map(); - let queue = [sourceNode.id]; - let found = false; - let alternateCount = 0; - let foundDepth = -1; - - for (let depth = 1; depth <= maxDepth; depth++) { - const nextQueue = []; - for (const currentId of queue) { - const neighbors = neighborStmt.all(currentId, ...edgeKinds); - for (const n of neighbors) { - if (noTests && isTestFile(n.file)) continue; - if (n.id === targetNode.id) { - if (!found) { - found = true; - foundDepth = depth; - parent.set(n.id, { parentId: currentId, edgeKind: n.edge_kind }); - } - alternateCount++; - continue; - } - if (!visited.has(n.id)) { - visited.add(n.id); - parent.set(n.id, { parentId: currentId, edgeKind: n.edge_kind }); - nextQueue.push(n.id); - } - } - } - if (found) break; - queue = nextQueue; - if (queue.length === 0) break; - } + const { + found, + parent, + alternateCount: rawAlternateCount, + foundDepth, + } = bfsShortestPath(db, sourceNode.id, targetNode.id, edgeKinds, reverse, maxDepth, noTests); if (!found) { return { @@ -294,8 +361,8 @@ export function pathData(from, to, customDbPath, opts = {}) { }; } - // alternateCount includes the one we kept; subtract 1 for "alternates" - alternateCount = Math.max(0, alternateCount - 1); + // rawAlternateCount includes the one we kept; subtract 1 for "alternates" + const alternateCount = Math.max(0, rawAlternateCount - 1); // Reconstruct path from target back to source const pathIds = [targetNode.id]; @@ -307,20 +374,7 @@ export function pathData(from, to, customDbPath, opts = {}) { } pathIds.reverse(); - // Build path with node info - const nodeCache = new Map(); - const getNode = (id) => { - if (nodeCache.has(id)) return nodeCache.get(id); - const row = db.prepare('SELECT name, kind, file, line FROM nodes WHERE id = ?').get(id); - nodeCache.set(id, row); - return row; - }; - - const resultPath = pathIds.map((id, idx) => { - const node = getNode(id); - const edgeKind = idx === 0 ? null : parent.get(id).edgeKind; - return { name: node.name, kind: node.kind, file: node.file, line: node.line, edgeKind }; - }); + const resultPath = reconstructPath(db, pathIds, parent); return { from, diff --git a/src/domain/analysis/impact.js b/src/domain/analysis/impact.js index bd3bbe1d..6bdd5464 100644 --- a/src/domain/analysis/impact.js +++ b/src/domain/analysis/impact.js @@ -134,6 +134,251 @@ export function fnImpactData(name, customDbPath, opts = {}) { } } +// ─── diffImpactData helpers ───────────────────────────────────────────── + +/** + * Walk up from repoRoot until a .git directory is found. + * Returns true if a git root exists, false otherwise. + * + * @param {string} repoRoot + * @returns {boolean} + */ +function findGitRoot(repoRoot) { + let checkDir = repoRoot; + while (checkDir) { + if (fs.existsSync(path.join(checkDir, '.git'))) { + return true; + } + const parent = path.dirname(checkDir); + if (parent === checkDir) break; + checkDir = parent; + } + return false; +} + +/** + * Execute git diff and return the raw output string. + * Returns `{ output: string }` on success or `{ error: string }` on failure. + * + * @param {string} repoRoot + * @param {{ staged?: boolean, ref?: string }} opts + * @returns {{ output: string } | { error: string }} + */ +function runGitDiff(repoRoot, opts) { + try { + const args = opts.staged + ? ['diff', '--cached', '--unified=0', '--no-color'] + : ['diff', opts.ref || 'HEAD', '--unified=0', '--no-color']; + const output = execFileSync('git', args, { + cwd: repoRoot, + encoding: 'utf-8', + maxBuffer: 10 * 1024 * 1024, + stdio: ['pipe', 'pipe', 'pipe'], + }); + return { output }; + } catch (e) { + return { error: `Failed to run git diff: ${e.message}` }; + } +} + +/** + * Parse raw git diff output into a changedRanges map and newFiles set. + * + * @param {string} diffOutput + * @returns {{ changedRanges: Map>, newFiles: Set }} + */ +function parseGitDiff(diffOutput) { + const changedRanges = new Map(); + const newFiles = new Set(); + let currentFile = null; + let prevIsDevNull = false; + + for (const line of diffOutput.split('\n')) { + if (line.startsWith('--- /dev/null')) { + prevIsDevNull = true; + continue; + } + if (line.startsWith('--- ')) { + prevIsDevNull = false; + continue; + } + const fileMatch = line.match(/^\+\+\+ b\/(.+)/); + if (fileMatch) { + currentFile = fileMatch[1]; + if (!changedRanges.has(currentFile)) changedRanges.set(currentFile, []); + if (prevIsDevNull) newFiles.add(currentFile); + prevIsDevNull = false; + continue; + } + const hunkMatch = line.match(/^@@ .+ \+(\d+)(?:,(\d+))? @@/); + if (hunkMatch && currentFile) { + const start = parseInt(hunkMatch[1], 10); + const count = parseInt(hunkMatch[2] || '1', 10); + changedRanges.get(currentFile).push({ start, end: start + count - 1 }); + } + } + + return { changedRanges, newFiles }; +} + +/** + * Find all function/method/class nodes whose line ranges overlap any changed range. + * + * @param {import('better-sqlite3').Database} db + * @param {Map} changedRanges + * @param {boolean} noTests + * @returns {Array} + */ +function findAffectedFunctions(db, changedRanges, noTests) { + const affectedFunctions = []; + for (const [file, ranges] of changedRanges) { + if (noTests && isTestFile(file)) continue; + const defs = db + .prepare( + `SELECT * FROM nodes WHERE file = ? AND kind IN ('function', 'method', 'class') ORDER BY line`, + ) + .all(file); + for (let i = 0; i < defs.length; i++) { + const def = defs[i]; + const endLine = def.end_line || (defs[i + 1] ? defs[i + 1].line - 1 : 999999); + for (const range of ranges) { + if (range.start <= endLine && range.end >= def.line) { + affectedFunctions.push(def); + break; + } + } + } + } + return affectedFunctions; +} + +/** + * Run BFS per affected function, collecting per-function results and the full affected set. + * + * @param {import('better-sqlite3').Database} db + * @param {Array} affectedFunctions + * @param {boolean} noTests + * @param {number} maxDepth + * @returns {{ functionResults: Array, allAffected: Set }} + */ +function buildFunctionImpactResults(db, affectedFunctions, noTests, maxDepth) { + const allAffected = new Set(); + const functionResults = affectedFunctions.map((fn) => { + const edges = []; + const idToKey = new Map(); + idToKey.set(fn.id, `${fn.file}::${fn.name}:${fn.line}`); + + const { levels, totalDependents } = bfsTransitiveCallers(db, fn.id, { + noTests, + maxDepth, + onVisit(c, parentId) { + allAffected.add(`${c.file}:${c.name}`); + const callerKey = `${c.file}::${c.name}:${c.line}`; + idToKey.set(c.id, callerKey); + edges.push({ from: idToKey.get(parentId), to: callerKey }); + }, + }); + + return { + name: fn.name, + kind: fn.kind, + file: fn.file, + line: fn.line, + transitiveCallers: totalDependents, + levels, + edges, + }; + }); + + return { functionResults, allAffected }; +} + +/** + * Look up historically co-changed files for the set of changed files. + * Returns an empty array if the co_changes table is unavailable. + * + * @param {import('better-sqlite3').Database} db + * @param {Map} changedRanges + * @param {Set} affectedFiles + * @param {boolean} noTests + * @returns {Array} + */ +function lookupCoChanges(db, changedRanges, affectedFiles, noTests) { + try { + db.prepare('SELECT 1 FROM co_changes LIMIT 1').get(); + const changedFilesList = [...changedRanges.keys()]; + const coResults = coChangeForFiles(changedFilesList, db, { + minJaccard: 0.3, + limit: 20, + noTests, + }); + return coResults.filter((r) => !affectedFiles.has(r.file)); + } catch (e) { + debug(`co_changes lookup skipped: ${e.message}`); + return []; + } +} + +/** + * Look up CODEOWNERS for changed and affected files. + * Returns null if no owners are found or lookup fails. + * + * @param {Map} changedRanges + * @param {Set} affectedFiles + * @param {string} repoRoot + * @returns {{ owners: object, affectedOwners: Array, suggestedReviewers: Array } | null} + */ +function lookupOwnership(changedRanges, affectedFiles, repoRoot) { + try { + const allFilePaths = [...new Set([...changedRanges.keys(), ...affectedFiles])]; + const ownerResult = ownersForFiles(allFilePaths, repoRoot); + if (ownerResult.affectedOwners.length > 0) { + return { + owners: Object.fromEntries(ownerResult.owners), + affectedOwners: ownerResult.affectedOwners, + suggestedReviewers: ownerResult.suggestedReviewers, + }; + } + return null; + } catch (e) { + debug(`CODEOWNERS lookup skipped: ${e.message}`); + return null; + } +} + +/** + * Check manifesto boundary violations scoped to the changed files. + * Returns `{ boundaryViolations, boundaryViolationCount }`. + * + * @param {import('better-sqlite3').Database} db + * @param {Map} changedRanges + * @param {boolean} noTests + * @param {object} opts — full diffImpactData opts (may contain `opts.config`) + * @param {string} repoRoot + * @returns {{ boundaryViolations: Array, boundaryViolationCount: number }} + */ +function checkBoundaryViolations(db, changedRanges, noTests, opts, repoRoot) { + try { + const cfg = opts.config || loadConfig(repoRoot); + const boundaryConfig = cfg.manifesto?.boundaries; + if (boundaryConfig) { + const result = evaluateBoundaries(db, boundaryConfig, { + scopeFiles: [...changedRanges.keys()], + noTests, + }); + return { + boundaryViolations: result.violations, + boundaryViolationCount: result.violationCount, + }; + } + } catch (e) { + debug(`boundary check skipped: ${e.message}`); + } + return { boundaryViolations: [], boundaryViolationCount: 0 }; +} + +// ─── diffImpactData ───────────────────────────────────────────────────── + /** * Fix #2: Shell injection vulnerability. * Uses execFileSync instead of execSync to prevent shell interpretation of user input. @@ -147,38 +392,14 @@ export function diffImpactData(customDbPath, opts = {}) { const dbPath = findDbPath(customDbPath); const repoRoot = path.resolve(path.dirname(dbPath), '..'); - // Verify we're in a git repository before running git diff - let checkDir = repoRoot; - let isGitRepo = false; - while (checkDir) { - if (fs.existsSync(path.join(checkDir, '.git'))) { - isGitRepo = true; - break; - } - const parent = path.dirname(checkDir); - if (parent === checkDir) break; - checkDir = parent; - } - if (!isGitRepo) { + if (!findGitRoot(repoRoot)) { return { error: `Not a git repository: ${repoRoot}` }; } - let diffOutput; - try { - const args = opts.staged - ? ['diff', '--cached', '--unified=0', '--no-color'] - : ['diff', opts.ref || 'HEAD', '--unified=0', '--no-color']; - diffOutput = execFileSync('git', args, { - cwd: repoRoot, - encoding: 'utf-8', - maxBuffer: 10 * 1024 * 1024, - stdio: ['pipe', 'pipe', 'pipe'], - }); - } catch (e) { - return { error: `Failed to run git diff: ${e.message}` }; - } + const gitResult = runGitDiff(repoRoot, opts); + if (gitResult.error) return { error: gitResult.error }; - if (!diffOutput.trim()) { + if (!gitResult.output.trim()) { return { changedFiles: 0, newFiles: [], @@ -188,34 +409,7 @@ export function diffImpactData(customDbPath, opts = {}) { }; } - const changedRanges = new Map(); - const newFiles = new Set(); - let currentFile = null; - let prevIsDevNull = false; - for (const line of diffOutput.split('\n')) { - if (line.startsWith('--- /dev/null')) { - prevIsDevNull = true; - continue; - } - if (line.startsWith('--- ')) { - prevIsDevNull = false; - continue; - } - const fileMatch = line.match(/^\+\+\+ b\/(.+)/); - if (fileMatch) { - currentFile = fileMatch[1]; - if (!changedRanges.has(currentFile)) changedRanges.set(currentFile, []); - if (prevIsDevNull) newFiles.add(currentFile); - prevIsDevNull = false; - continue; - } - const hunkMatch = line.match(/^@@ .+ \+(\d+)(?:,(\d+))? @@/); - if (hunkMatch && currentFile) { - const start = parseInt(hunkMatch[1], 10); - const count = parseInt(hunkMatch[2] || '1', 10); - changedRanges.get(currentFile).push({ start, end: start + count - 1 }); - } - } + const { changedRanges, newFiles } = parseGitDiff(gitResult.output); if (changedRanges.size === 0) { return { @@ -227,106 +421,26 @@ export function diffImpactData(customDbPath, opts = {}) { }; } - const affectedFunctions = []; - for (const [file, ranges] of changedRanges) { - if (noTests && isTestFile(file)) continue; - const defs = db - .prepare( - `SELECT * FROM nodes WHERE file = ? AND kind IN ('function', 'method', 'class') ORDER BY line`, - ) - .all(file); - for (let i = 0; i < defs.length; i++) { - const def = defs[i]; - const endLine = def.end_line || (defs[i + 1] ? defs[i + 1].line - 1 : 999999); - for (const range of ranges) { - if (range.start <= endLine && range.end >= def.line) { - affectedFunctions.push(def); - break; - } - } - } - } - - const allAffected = new Set(); - const functionResults = affectedFunctions.map((fn) => { - const edges = []; - const idToKey = new Map(); - idToKey.set(fn.id, `${fn.file}::${fn.name}:${fn.line}`); - - const { levels, totalDependents } = bfsTransitiveCallers(db, fn.id, { - noTests, - maxDepth, - onVisit(c, parentId) { - allAffected.add(`${c.file}:${c.name}`); - const callerKey = `${c.file}::${c.name}:${c.line}`; - idToKey.set(c.id, callerKey); - edges.push({ from: idToKey.get(parentId), to: callerKey }); - }, - }); - - return { - name: fn.name, - kind: fn.kind, - file: fn.file, - line: fn.line, - transitiveCallers: totalDependents, - levels, - edges, - }; - }); + const affectedFunctions = findAffectedFunctions(db, changedRanges, noTests); + const { functionResults, allAffected } = buildFunctionImpactResults( + db, + affectedFunctions, + noTests, + maxDepth, + ); const affectedFiles = new Set(); for (const key of allAffected) affectedFiles.add(key.split(':')[0]); - // Look up historically coupled files from co-change data - let historicallyCoupled = []; - try { - db.prepare('SELECT 1 FROM co_changes LIMIT 1').get(); - const changedFilesList = [...changedRanges.keys()]; - const coResults = coChangeForFiles(changedFilesList, db, { - minJaccard: 0.3, - limit: 20, - noTests, - }); - // Exclude files already found via static analysis - historicallyCoupled = coResults.filter((r) => !affectedFiles.has(r.file)); - } catch (e) { - debug(`co_changes lookup skipped: ${e.message}`); - } - - // Look up CODEOWNERS for changed + affected files - let ownership = null; - try { - const allFilePaths = [...new Set([...changedRanges.keys(), ...affectedFiles])]; - const ownerResult = ownersForFiles(allFilePaths, repoRoot); - if (ownerResult.affectedOwners.length > 0) { - ownership = { - owners: Object.fromEntries(ownerResult.owners), - affectedOwners: ownerResult.affectedOwners, - suggestedReviewers: ownerResult.suggestedReviewers, - }; - } - } catch (e) { - debug(`CODEOWNERS lookup skipped: ${e.message}`); - } - - // Check boundary violations scoped to changed files - let boundaryViolations = []; - let boundaryViolationCount = 0; - try { - const cfg = opts.config || loadConfig(repoRoot); - const boundaryConfig = cfg.manifesto?.boundaries; - if (boundaryConfig) { - const result = evaluateBoundaries(db, boundaryConfig, { - scopeFiles: [...changedRanges.keys()], - noTests, - }); - boundaryViolations = result.violations; - boundaryViolationCount = result.violationCount; - } - } catch (e) { - debug(`boundary check skipped: ${e.message}`); - } + const historicallyCoupled = lookupCoChanges(db, changedRanges, affectedFiles, noTests); + const ownership = lookupOwnership(changedRanges, affectedFiles, repoRoot); + const { boundaryViolations, boundaryViolationCount } = checkBoundaryViolations( + db, + changedRanges, + noTests, + opts, + repoRoot, + ); const base = { changedFiles: changedRanges.size, diff --git a/src/domain/analysis/module-map.js b/src/domain/analysis/module-map.js index d2bc613b..daf09b33 100644 --- a/src/domain/analysis/module-map.js +++ b/src/domain/analysis/module-map.js @@ -37,6 +37,241 @@ export const FALSE_POSITIVE_NAMES = new Set([ ]); export const FALSE_POSITIVE_CALLER_THRESHOLD = 20; +// --------------------------------------------------------------------------- +// Section helpers +// --------------------------------------------------------------------------- + +function buildTestFileIds(db) { + const allFileNodes = db.prepare("SELECT id, file FROM nodes WHERE kind = 'file'").all(); + const testFileIds = new Set(); + const testFiles = new Set(); + for (const n of allFileNodes) { + if (isTestFile(n.file)) { + testFileIds.add(n.id); + testFiles.add(n.file); + } + } + const allNodes = db.prepare('SELECT id, file FROM nodes').all(); + for (const n of allNodes) { + if (testFiles.has(n.file)) testFileIds.add(n.id); + } + return testFileIds; +} + +function countNodesByKind(db, testFileIds) { + let nodeRows; + if (testFileIds) { + const allNodes = db.prepare('SELECT id, kind, file FROM nodes').all(); + const filtered = allNodes.filter((n) => !testFileIds.has(n.id)); + const counts = {}; + for (const n of filtered) counts[n.kind] = (counts[n.kind] || 0) + 1; + nodeRows = Object.entries(counts).map(([kind, c]) => ({ kind, c })); + } else { + nodeRows = db.prepare('SELECT kind, COUNT(*) as c FROM nodes GROUP BY kind').all(); + } + const byKind = {}; + let total = 0; + for (const r of nodeRows) { + byKind[r.kind] = r.c; + total += r.c; + } + return { total, byKind }; +} + +function countEdgesByKind(db, testFileIds) { + let edgeRows; + if (testFileIds) { + const allEdges = db.prepare('SELECT source_id, target_id, kind FROM edges').all(); + const filtered = allEdges.filter( + (e) => !testFileIds.has(e.source_id) && !testFileIds.has(e.target_id), + ); + const counts = {}; + for (const e of filtered) counts[e.kind] = (counts[e.kind] || 0) + 1; + edgeRows = Object.entries(counts).map(([kind, c]) => ({ kind, c })); + } else { + edgeRows = db.prepare('SELECT kind, COUNT(*) as c FROM edges GROUP BY kind').all(); + } + const byKind = {}; + let total = 0; + for (const r of edgeRows) { + byKind[r.kind] = r.c; + total += r.c; + } + return { total, byKind }; +} + +function countFilesByLanguage(db, noTests) { + const extToLang = new Map(); + for (const entry of LANGUAGE_REGISTRY) { + for (const ext of entry.extensions) { + extToLang.set(ext, entry.id); + } + } + let fileNodes = db.prepare("SELECT file FROM nodes WHERE kind = 'file'").all(); + if (noTests) fileNodes = fileNodes.filter((n) => !isTestFile(n.file)); + const byLanguage = {}; + for (const row of fileNodes) { + const ext = path.extname(row.file).toLowerCase(); + const lang = extToLang.get(ext) || 'other'; + byLanguage[lang] = (byLanguage[lang] || 0) + 1; + } + return { total: fileNodes.length, languages: Object.keys(byLanguage).length, byLanguage }; +} + +function findHotspots(db, noTests, limit) { + const testFilter = testFilterSQL('n.file', noTests); + const hotspotRows = db + .prepare(` + SELECT n.file, + (SELECT COUNT(*) FROM edges WHERE target_id = n.id) as fan_in, + (SELECT COUNT(*) FROM edges WHERE source_id = n.id) as fan_out + FROM nodes n + WHERE n.kind = 'file' ${testFilter} + ORDER BY (SELECT COUNT(*) FROM edges WHERE target_id = n.id) + + (SELECT COUNT(*) FROM edges WHERE source_id = n.id) DESC + `) + .all(); + const filtered = noTests ? hotspotRows.filter((r) => !isTestFile(r.file)) : hotspotRows; + return filtered.slice(0, limit).map((r) => ({ + file: r.file, + fanIn: r.fan_in, + fanOut: r.fan_out, + })); +} + +function getEmbeddingsInfo(db) { + try { + const count = db.prepare('SELECT COUNT(*) as c FROM embeddings').get(); + if (count && count.c > 0) { + const meta = {}; + const metaRows = db.prepare('SELECT key, value FROM embedding_meta').all(); + for (const r of metaRows) meta[r.key] = r.value; + return { + count: count.c, + model: meta.model || null, + dim: meta.dim ? parseInt(meta.dim, 10) : null, + builtAt: meta.built_at || null, + }; + } + } catch (e) { + debug(`embeddings lookup skipped: ${e.message}`); + } + return null; +} + +function computeQualityMetrics(db, testFilter) { + const qualityTestFilter = testFilter.replace(/n\.file/g, 'file'); + + const totalCallable = db + .prepare( + `SELECT COUNT(*) as c FROM nodes WHERE kind IN ('function', 'method') ${qualityTestFilter}`, + ) + .get().c; + const callableWithCallers = db + .prepare(` + SELECT COUNT(DISTINCT e.target_id) as c FROM edges e + JOIN nodes n ON e.target_id = n.id + WHERE e.kind = 'calls' AND n.kind IN ('function', 'method') ${testFilter} + `) + .get().c; + const callerCoverage = totalCallable > 0 ? callableWithCallers / totalCallable : 0; + + const totalCallEdges = db.prepare("SELECT COUNT(*) as c FROM edges WHERE kind = 'calls'").get().c; + const highConfCallEdges = db + .prepare("SELECT COUNT(*) as c FROM edges WHERE kind = 'calls' AND confidence >= 0.7") + .get().c; + const callConfidence = totalCallEdges > 0 ? highConfCallEdges / totalCallEdges : 0; + + const fpRows = db + .prepare(` + SELECT n.name, n.file, n.line, COUNT(e.source_id) as caller_count + FROM nodes n + LEFT JOIN edges e ON n.id = e.target_id AND e.kind = 'calls' + WHERE n.kind IN ('function', 'method') + GROUP BY n.id + HAVING caller_count > ? + ORDER BY caller_count DESC + `) + .all(FALSE_POSITIVE_CALLER_THRESHOLD); + const falsePositiveWarnings = fpRows + .filter((r) => + FALSE_POSITIVE_NAMES.has(r.name.includes('.') ? r.name.split('.').pop() : r.name), + ) + .map((r) => ({ name: r.name, file: r.file, line: r.line, callerCount: r.caller_count })); + + let fpEdgeCount = 0; + for (const fp of falsePositiveWarnings) fpEdgeCount += fp.callerCount; + const falsePositiveRatio = totalCallEdges > 0 ? fpEdgeCount / totalCallEdges : 0; + + const score = Math.round( + callerCoverage * 40 + callConfidence * 40 + (1 - falsePositiveRatio) * 20, + ); + + return { + score, + callerCoverage: { + ratio: callerCoverage, + covered: callableWithCallers, + total: totalCallable, + }, + callConfidence: { + ratio: callConfidence, + highConf: highConfCallEdges, + total: totalCallEdges, + }, + falsePositiveWarnings, + }; +} + +function countRoles(db, noTests) { + let roleRows; + if (noTests) { + const allRoleNodes = db.prepare('SELECT role, file FROM nodes WHERE role IS NOT NULL').all(); + const filtered = allRoleNodes.filter((n) => !isTestFile(n.file)); + const counts = {}; + for (const n of filtered) counts[n.role] = (counts[n.role] || 0) + 1; + roleRows = Object.entries(counts).map(([role, c]) => ({ role, c })); + } else { + roleRows = db + .prepare('SELECT role, COUNT(*) as c FROM nodes WHERE role IS NOT NULL GROUP BY role') + .all(); + } + const roles = {}; + for (const r of roleRows) roles[r.role] = r.c; + return roles; +} + +function getComplexitySummary(db, testFilter) { + try { + const cRows = db + .prepare( + `SELECT fc.cognitive, fc.cyclomatic, fc.max_nesting, fc.maintainability_index + FROM function_complexity fc JOIN nodes n ON fc.node_id = n.id + WHERE n.kind IN ('function','method') ${testFilter}`, + ) + .all(); + if (cRows.length > 0) { + const miValues = cRows.map((r) => r.maintainability_index || 0); + return { + analyzed: cRows.length, + avgCognitive: +(cRows.reduce((s, r) => s + r.cognitive, 0) / cRows.length).toFixed(1), + avgCyclomatic: +(cRows.reduce((s, r) => s + r.cyclomatic, 0) / cRows.length).toFixed(1), + maxCognitive: Math.max(...cRows.map((r) => r.cognitive)), + maxCyclomatic: Math.max(...cRows.map((r) => r.cyclomatic)), + avgMI: +(miValues.reduce((s, v) => s + v, 0) / miValues.length).toFixed(1), + minMI: +Math.min(...miValues).toFixed(1), + }; + } + } catch (e) { + debug(`complexity summary skipped: ${e.message}`); + } + return null; +} + +// --------------------------------------------------------------------------- +// Public API +// --------------------------------------------------------------------------- + export function moduleMapData(customDbPath, limit = 20, opts = {}) { const db = openReadonlyOrFail(customDbPath); try { @@ -79,237 +314,27 @@ export function statsData(customDbPath, opts = {}) { const db = openReadonlyOrFail(customDbPath); try { const noTests = opts.noTests || false; + const testFilter = testFilterSQL('n.file', noTests); - // Build set of test file IDs for filtering nodes and edges - let testFileIds = null; - if (noTests) { - const allFileNodes = db.prepare("SELECT id, file FROM nodes WHERE kind = 'file'").all(); - testFileIds = new Set(); - const testFiles = new Set(); - for (const n of allFileNodes) { - if (isTestFile(n.file)) { - testFileIds.add(n.id); - testFiles.add(n.file); - } - } - - // Also collect non-file node IDs that belong to test files - const allNodes = db.prepare('SELECT id, file FROM nodes').all(); - for (const n of allNodes) { - if (testFiles.has(n.file)) testFileIds.add(n.id); - } - } - - // Node breakdown by kind - let nodeRows; - if (noTests) { - const allNodes = db.prepare('SELECT id, kind, file FROM nodes').all(); - const filtered = allNodes.filter((n) => !testFileIds.has(n.id)); - const counts = {}; - for (const n of filtered) counts[n.kind] = (counts[n.kind] || 0) + 1; - nodeRows = Object.entries(counts).map(([kind, c]) => ({ kind, c })); - } else { - nodeRows = db.prepare('SELECT kind, COUNT(*) as c FROM nodes GROUP BY kind').all(); - } - const nodesByKind = {}; - let totalNodes = 0; - for (const r of nodeRows) { - nodesByKind[r.kind] = r.c; - totalNodes += r.c; - } - - // Edge breakdown by kind - let edgeRows; - if (noTests) { - const allEdges = db.prepare('SELECT source_id, target_id, kind FROM edges').all(); - const filtered = allEdges.filter( - (e) => !testFileIds.has(e.source_id) && !testFileIds.has(e.target_id), - ); - const counts = {}; - for (const e of filtered) counts[e.kind] = (counts[e.kind] || 0) + 1; - edgeRows = Object.entries(counts).map(([kind, c]) => ({ kind, c })); - } else { - edgeRows = db.prepare('SELECT kind, COUNT(*) as c FROM edges GROUP BY kind').all(); - } - const edgesByKind = {}; - let totalEdges = 0; - for (const r of edgeRows) { - edgesByKind[r.kind] = r.c; - totalEdges += r.c; - } + const testFileIds = noTests ? buildTestFileIds(db) : null; - // File/language distribution — map extensions via LANGUAGE_REGISTRY - const extToLang = new Map(); - for (const entry of LANGUAGE_REGISTRY) { - for (const ext of entry.extensions) { - extToLang.set(ext, entry.id); - } - } - let fileNodes = db.prepare("SELECT file FROM nodes WHERE kind = 'file'").all(); - if (noTests) fileNodes = fileNodes.filter((n) => !isTestFile(n.file)); - const byLanguage = {}; - for (const row of fileNodes) { - const ext = path.extname(row.file).toLowerCase(); - const lang = extToLang.get(ext) || 'other'; - byLanguage[lang] = (byLanguage[lang] || 0) + 1; - } - const langCount = Object.keys(byLanguage).length; + const { total: totalNodes, byKind: nodesByKind } = countNodesByKind(db, testFileIds); + const { total: totalEdges, byKind: edgesByKind } = countEdgesByKind(db, testFileIds); + const files = countFilesByLanguage(db, noTests); - // Cycles const fileCycles = findCycles(db, { fileLevel: true, noTests }); const fnCycles = findCycles(db, { fileLevel: false, noTests }); - // Top 5 coupling hotspots (fan-in + fan-out, file nodes) - const testFilter = testFilterSQL('n.file', noTests); - const hotspotRows = db - .prepare(` - SELECT n.file, - (SELECT COUNT(*) FROM edges WHERE target_id = n.id) as fan_in, - (SELECT COUNT(*) FROM edges WHERE source_id = n.id) as fan_out - FROM nodes n - WHERE n.kind = 'file' ${testFilter} - ORDER BY (SELECT COUNT(*) FROM edges WHERE target_id = n.id) - + (SELECT COUNT(*) FROM edges WHERE source_id = n.id) DESC - `) - .all(); - const filteredHotspots = noTests ? hotspotRows.filter((r) => !isTestFile(r.file)) : hotspotRows; - const hotspots = filteredHotspots.slice(0, 5).map((r) => ({ - file: r.file, - fanIn: r.fan_in, - fanOut: r.fan_out, - })); - - // Embeddings metadata - let embeddings = null; - try { - const count = db.prepare('SELECT COUNT(*) as c FROM embeddings').get(); - if (count && count.c > 0) { - const meta = {}; - const metaRows = db.prepare('SELECT key, value FROM embedding_meta').all(); - for (const r of metaRows) meta[r.key] = r.value; - embeddings = { - count: count.c, - model: meta.model || null, - dim: meta.dim ? parseInt(meta.dim, 10) : null, - builtAt: meta.built_at || null, - }; - } - } catch (e) { - debug(`embeddings lookup skipped: ${e.message}`); - } - - // Graph quality metrics - const qualityTestFilter = testFilter.replace(/n\.file/g, 'file'); - const totalCallable = db - .prepare( - `SELECT COUNT(*) as c FROM nodes WHERE kind IN ('function', 'method') ${qualityTestFilter}`, - ) - .get().c; - const callableWithCallers = db - .prepare(` - SELECT COUNT(DISTINCT e.target_id) as c FROM edges e - JOIN nodes n ON e.target_id = n.id - WHERE e.kind = 'calls' AND n.kind IN ('function', 'method') ${testFilter} - `) - .get().c; - const callerCoverage = totalCallable > 0 ? callableWithCallers / totalCallable : 0; - - const totalCallEdges = db - .prepare("SELECT COUNT(*) as c FROM edges WHERE kind = 'calls'") - .get().c; - const highConfCallEdges = db - .prepare("SELECT COUNT(*) as c FROM edges WHERE kind = 'calls' AND confidence >= 0.7") - .get().c; - const callConfidence = totalCallEdges > 0 ? highConfCallEdges / totalCallEdges : 0; - - // False-positive warnings: generic names with > threshold callers - const fpRows = db - .prepare(` - SELECT n.name, n.file, n.line, COUNT(e.source_id) as caller_count - FROM nodes n - LEFT JOIN edges e ON n.id = e.target_id AND e.kind = 'calls' - WHERE n.kind IN ('function', 'method') - GROUP BY n.id - HAVING caller_count > ? - ORDER BY caller_count DESC - `) - .all(FALSE_POSITIVE_CALLER_THRESHOLD); - const falsePositiveWarnings = fpRows - .filter((r) => - FALSE_POSITIVE_NAMES.has(r.name.includes('.') ? r.name.split('.').pop() : r.name), - ) - .map((r) => ({ name: r.name, file: r.file, line: r.line, callerCount: r.caller_count })); - - // Edges from suspicious nodes - let fpEdgeCount = 0; - for (const fp of falsePositiveWarnings) fpEdgeCount += fp.callerCount; - const falsePositiveRatio = totalCallEdges > 0 ? fpEdgeCount / totalCallEdges : 0; - - const score = Math.round( - callerCoverage * 40 + callConfidence * 40 + (1 - falsePositiveRatio) * 20, - ); - - const quality = { - score, - callerCoverage: { - ratio: callerCoverage, - covered: callableWithCallers, - total: totalCallable, - }, - callConfidence: { - ratio: callConfidence, - highConf: highConfCallEdges, - total: totalCallEdges, - }, - falsePositiveWarnings, - }; - - // Role distribution - let roleRows; - if (noTests) { - const allRoleNodes = db.prepare('SELECT role, file FROM nodes WHERE role IS NOT NULL').all(); - const filtered = allRoleNodes.filter((n) => !isTestFile(n.file)); - const counts = {}; - for (const n of filtered) counts[n.role] = (counts[n.role] || 0) + 1; - roleRows = Object.entries(counts).map(([role, c]) => ({ role, c })); - } else { - roleRows = db - .prepare('SELECT role, COUNT(*) as c FROM nodes WHERE role IS NOT NULL GROUP BY role') - .all(); - } - const roles = {}; - for (const r of roleRows) roles[r.role] = r.c; - - // Complexity summary - let complexity = null; - try { - const cRows = db - .prepare( - `SELECT fc.cognitive, fc.cyclomatic, fc.max_nesting, fc.maintainability_index - FROM function_complexity fc JOIN nodes n ON fc.node_id = n.id - WHERE n.kind IN ('function','method') ${testFilter}`, - ) - .all(); - if (cRows.length > 0) { - const miValues = cRows.map((r) => r.maintainability_index || 0); - complexity = { - analyzed: cRows.length, - avgCognitive: +(cRows.reduce((s, r) => s + r.cognitive, 0) / cRows.length).toFixed(1), - avgCyclomatic: +(cRows.reduce((s, r) => s + r.cyclomatic, 0) / cRows.length).toFixed(1), - maxCognitive: Math.max(...cRows.map((r) => r.cognitive)), - maxCyclomatic: Math.max(...cRows.map((r) => r.cyclomatic)), - avgMI: +(miValues.reduce((s, v) => s + v, 0) / miValues.length).toFixed(1), - minMI: +Math.min(...miValues).toFixed(1), - }; - } - } catch (e) { - debug(`complexity summary skipped: ${e.message}`); - } + const hotspots = findHotspots(db, noTests, 5); + const embeddings = getEmbeddingsInfo(db); + const quality = computeQualityMetrics(db, testFilter); + const roles = countRoles(db, noTests); + const complexity = getComplexitySummary(db, testFilter); return { nodes: { total: totalNodes, byKind: nodesByKind }, edges: { total: totalEdges, byKind: edgesByKind }, - files: { total: fileNodes.length, languages: langCount, byLanguage }, + files, cycles: { fileLevel: fileCycles.length, functionLevel: fnCycles.length }, hotspots, embeddings, From b2f89f13448946ade2876e5f7ac0dd05534bf010 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 03:35:59 -0600 Subject: [PATCH 14/21] refactor: decompose buildComplexityMetrics Impact: 5 functions changed, 3 affected --- src/features/complexity.js | 246 +++++++++++++++++++------------------ 1 file changed, 125 insertions(+), 121 deletions(-) diff --git a/src/features/complexity.js b/src/features/complexity.js index 12f5acf1..4f82e5ef 100644 --- a/src/features/complexity.js +++ b/src/features/complexity.js @@ -330,41 +330,138 @@ export function computeAllMetrics(functionNode, langId) { */ export { _findFunctionNode as findFunctionNode }; -/** - * Re-parse changed files with WASM tree-sitter, find function AST subtrees, - * compute complexity, and upsert into function_complexity table. - * - * @param {object} db - open better-sqlite3 database (read-write) - * @param {Map} fileSymbols - Map - * @param {string} rootDir - absolute project root path - * @param {object} [engineOpts] - engine options (unused; always uses WASM for AST) - */ -export async function buildComplexityMetrics(db, fileSymbols, rootDir, _engineOpts) { - // Only initialize WASM parsers if some files lack both a cached tree AND pre-computed complexity - let parsers = null; - let extToLang = null; - let needsFallback = false; +async function initWasmParsersIfNeeded(fileSymbols) { for (const [relPath, symbols] of fileSymbols) { if (!symbols._tree) { - // Only consider files whose language actually has complexity rules const ext = path.extname(relPath).toLowerCase(); if (!COMPLEXITY_EXTENSIONS.has(ext)) continue; - // Check if all function/method defs have pre-computed complexity (native engine) const hasPrecomputed = symbols.definitions.every( (d) => (d.kind !== 'function' && d.kind !== 'method') || d.complexity, ); if (!hasPrecomputed) { - needsFallback = true; - break; + const { createParsers } = await import('../domain/parser.js'); + const parsers = await createParsers(); + const extToLang = buildExtToLangMap(); + return { parsers, extToLang }; } } } - if (needsFallback) { - const { createParsers } = await import('../domain/parser.js'); - parsers = await createParsers(); - extToLang = buildExtToLangMap(); + return { parsers: null, extToLang: null }; +} + +function getTreeForFile(symbols, relPath, rootDir, parsers, extToLang, getParser) { + let tree = symbols._tree; + let langId = symbols._langId; + + const allPrecomputed = symbols.definitions.every( + (d) => (d.kind !== 'function' && d.kind !== 'method') || d.complexity, + ); + + if (!allPrecomputed && !tree) { + const ext = path.extname(relPath).toLowerCase(); + if (!COMPLEXITY_EXTENSIONS.has(ext)) return null; + if (!extToLang) return null; + langId = extToLang.get(ext); + if (!langId) return null; + + const absPath = path.join(rootDir, relPath); + let code; + try { + code = fs.readFileSync(absPath, 'utf-8'); + } catch (e) { + debug(`complexity: cannot read ${relPath}: ${e.message}`); + return null; + } + + const parser = getParser(parsers, absPath); + if (!parser) return null; + + try { + tree = parser.parse(code); + } catch (e) { + debug(`complexity: parse failed for ${relPath}: ${e.message}`); + return null; + } } + return { tree, langId }; +} + +function upsertPrecomputedComplexity(db, upsert, def, relPath) { + const nodeId = getFunctionNodeId(db, def.name, relPath, def.line); + if (!nodeId) return 0; + const ch = def.complexity.halstead; + const cl = def.complexity.loc; + upsert.run( + nodeId, + def.complexity.cognitive, + def.complexity.cyclomatic, + def.complexity.maxNesting ?? 0, + cl ? cl.loc : 0, + cl ? cl.sloc : 0, + cl ? cl.commentLines : 0, + ch ? ch.n1 : 0, + ch ? ch.n2 : 0, + ch ? ch.bigN1 : 0, + ch ? ch.bigN2 : 0, + ch ? ch.vocabulary : 0, + ch ? ch.length : 0, + ch ? ch.volume : 0, + ch ? ch.difficulty : 0, + ch ? ch.effort : 0, + ch ? ch.bugs : 0, + def.complexity.maintainabilityIndex ?? 0, + ); + return 1; +} + +function upsertAstComplexity(db, upsert, def, relPath, tree, langId, rules) { + if (!tree || !rules) return 0; + + const funcNode = _findFunctionNode(tree.rootNode, def.line, def.endLine, rules); + if (!funcNode) return 0; + + const metrics = computeAllMetrics(funcNode, langId); + if (!metrics) return 0; + + const nodeId = getFunctionNodeId(db, def.name, relPath, def.line); + if (!nodeId) return 0; + + const h = metrics.halstead; + upsert.run( + nodeId, + metrics.cognitive, + metrics.cyclomatic, + metrics.maxNesting, + metrics.loc.loc, + metrics.loc.sloc, + metrics.loc.commentLines, + h ? h.n1 : 0, + h ? h.n2 : 0, + h ? h.bigN1 : 0, + h ? h.bigN2 : 0, + h ? h.vocabulary : 0, + h ? h.length : 0, + h ? h.volume : 0, + h ? h.difficulty : 0, + h ? h.effort : 0, + h ? h.bugs : 0, + metrics.mi, + ); + return 1; +} + +/** + * Re-parse changed files with WASM tree-sitter, find function AST subtrees, + * compute complexity, and upsert into function_complexity table. + * + * @param {object} db - open better-sqlite3 database (read-write) + * @param {Map} fileSymbols - Map + * @param {string} rootDir - absolute project root path + * @param {object} [engineOpts] - engine options (unused; always uses WASM for AST) + */ +export async function buildComplexityMetrics(db, fileSymbols, rootDir, _engineOpts) { + const { parsers, extToLang } = await initWasmParsersIfNeeded(fileSymbols); const { getParser } = await import('../domain/parser.js'); const upsert = db.prepare( @@ -381,113 +478,20 @@ export async function buildComplexityMetrics(db, fileSymbols, rootDir, _engineOp const tx = db.transaction(() => { for (const [relPath, symbols] of fileSymbols) { - // Check if all function/method defs have pre-computed complexity - const allPrecomputed = symbols.definitions.every( - (d) => (d.kind !== 'function' && d.kind !== 'method') || d.complexity, - ); - - let tree = symbols._tree; - let langId = symbols._langId; - - // Only attempt WASM fallback if we actually need AST-based computation - if (!allPrecomputed && !tree) { - const ext = path.extname(relPath).toLowerCase(); - if (!COMPLEXITY_EXTENSIONS.has(ext)) continue; // Language has no complexity rules - if (!extToLang) continue; // No WASM parsers available - langId = extToLang.get(ext); - if (!langId) continue; - - const absPath = path.join(rootDir, relPath); - let code; - try { - code = fs.readFileSync(absPath, 'utf-8'); - } catch (e) { - debug(`complexity: cannot read ${relPath}: ${e.message}`); - continue; - } - - const parser = getParser(parsers, absPath); - if (!parser) continue; - - try { - tree = parser.parse(code); - } catch (e) { - debug(`complexity: parse failed for ${relPath}: ${e.message}`); - continue; - } - } - + const result = getTreeForFile(symbols, relPath, rootDir, parsers, extToLang, getParser); + const tree = result ? result.tree : null; + const langId = result ? result.langId : null; const rules = langId ? COMPLEXITY_RULES.get(langId) : null; for (const def of symbols.definitions) { if (def.kind !== 'function' && def.kind !== 'method') continue; if (!def.line) continue; - // Use pre-computed complexity from native engine if available if (def.complexity) { - const nodeId = getFunctionNodeId(db, def.name, relPath, def.line); - if (!nodeId) continue; - const ch = def.complexity.halstead; - const cl = def.complexity.loc; - upsert.run( - nodeId, - def.complexity.cognitive, - def.complexity.cyclomatic, - def.complexity.maxNesting ?? 0, - cl ? cl.loc : 0, - cl ? cl.sloc : 0, - cl ? cl.commentLines : 0, - ch ? ch.n1 : 0, - ch ? ch.n2 : 0, - ch ? ch.bigN1 : 0, - ch ? ch.bigN2 : 0, - ch ? ch.vocabulary : 0, - ch ? ch.length : 0, - ch ? ch.volume : 0, - ch ? ch.difficulty : 0, - ch ? ch.effort : 0, - ch ? ch.bugs : 0, - def.complexity.maintainabilityIndex ?? 0, - ); - analyzed++; - continue; + analyzed += upsertPrecomputedComplexity(db, upsert, def, relPath); + } else { + analyzed += upsertAstComplexity(db, upsert, def, relPath, tree, langId, rules); } - - // Fallback: compute from AST tree - if (!tree || !rules) continue; - - const funcNode = _findFunctionNode(tree.rootNode, def.line, def.endLine, rules); - if (!funcNode) continue; - - // Single-pass: complexity + Halstead + LOC + MI in one DFS walk - const metrics = computeAllMetrics(funcNode, langId); - if (!metrics) continue; - - const nodeId = getFunctionNodeId(db, def.name, relPath, def.line); - if (!nodeId) continue; - - const h = metrics.halstead; - upsert.run( - nodeId, - metrics.cognitive, - metrics.cyclomatic, - metrics.maxNesting, - metrics.loc.loc, - metrics.loc.sloc, - metrics.loc.commentLines, - h ? h.n1 : 0, - h ? h.n2 : 0, - h ? h.bigN1 : 0, - h ? h.bigN2 : 0, - h ? h.vocabulary : 0, - h ? h.length : 0, - h ? h.volume : 0, - h ? h.difficulty : 0, - h ? h.effort : 0, - h ? h.bugs : 0, - metrics.mi, - ); - analyzed++; } } }); From cb822587f285542e09fa4d9d0a4486ece5d01683 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 03:36:05 -0600 Subject: [PATCH 15/21] refactor: decompose buildStructure into traversal, cohesion, and classification Impact: 8 functions changed, 3 affected --- src/features/structure.js | 208 ++++++++++++++++++++------------------ 1 file changed, 111 insertions(+), 97 deletions(-) diff --git a/src/features/structure.js b/src/features/structure.js index 4ba9ee0a..7f582076 100644 --- a/src/features/structure.js +++ b/src/features/structure.js @@ -5,73 +5,41 @@ import { isTestFile } from '../infrastructure/test-filter.js'; import { normalizePath } from '../shared/constants.js'; import { paginateResult } from '../shared/paginate.js'; -// ─── Build-time: insert directory nodes, contains edges, and metrics ──── +// ─── Build-time helpers ─────────────────────────────────────────────── -/** - * Build directory structure nodes, containment edges, and compute metrics. - * Called from builder.js after edge building. - * - * @param {import('better-sqlite3').Database} db - Open read-write database - * @param {Map} fileSymbols - Map of relPath → { definitions, imports, exports, calls } - * @param {string} rootDir - Absolute root directory - * @param {Map} lineCountMap - Map of relPath → line count - * @param {Set} directories - Set of relative directory paths - */ -export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, directories, changedFiles) { - const insertNode = db.prepare( - 'INSERT OR IGNORE INTO nodes (name, kind, file, line, end_line) VALUES (?, ?, ?, ?, ?)', - ); - const getNodeIdStmt = { - get: (name, kind, file, line) => { - const id = getNodeId(db, name, kind, file, line); - return id != null ? { id } : undefined; - }, - }; - const insertEdge = db.prepare( - 'INSERT INTO edges (source_id, target_id, kind, confidence, dynamic) VALUES (?, ?, ?, ?, ?)', - ); - const upsertMetric = db.prepare(` - INSERT OR REPLACE INTO node_metrics - (node_id, line_count, symbol_count, import_count, export_count, fan_in, fan_out, cohesion, file_count) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) - `); - - const isIncremental = changedFiles != null && changedFiles.length > 0; +function getAncestorDirs(filePaths) { + const dirs = new Set(); + for (const f of filePaths) { + let d = normalizePath(path.dirname(f)); + while (d && d !== '.') { + dirs.add(d); + d = normalizePath(path.dirname(d)); + } + } + return dirs; +} +function cleanupPreviousData(db, getNodeIdStmt, isIncremental, changedFiles) { if (isIncremental) { - // Incremental: only clean up data for changed files and their ancestor directories - const affectedDirs = new Set(); - for (const f of changedFiles) { - let d = normalizePath(path.dirname(f)); - while (d && d !== '.') { - affectedDirs.add(d); - d = normalizePath(path.dirname(d)); - } - } + const affectedDirs = getAncestorDirs(changedFiles); const deleteContainsForDir = db.prepare( "DELETE FROM edges WHERE kind = 'contains' AND source_id IN (SELECT id FROM nodes WHERE name = ? AND kind = 'directory')", ); const deleteMetricForNode = db.prepare('DELETE FROM node_metrics WHERE node_id = ?'); db.transaction(() => { - // Delete contains edges only from affected directories for (const dir of affectedDirs) { deleteContainsForDir.run(dir); } - // Delete metrics for changed files for (const f of changedFiles) { const fileRow = getNodeIdStmt.get(f, 'file', f, 0); if (fileRow) deleteMetricForNode.run(fileRow.id); } - // Delete metrics for affected directories for (const dir of affectedDirs) { const dirRow = getNodeIdStmt.get(dir, 'directory', dir, 0); if (dirRow) deleteMetricForNode.run(dirRow.id); } })(); } else { - // Full rebuild: clean previous directory nodes/edges (idempotent) - // Scope contains-edge delete to directory-sourced edges only, - // preserving symbol-level contains edges (file→def, class→method, etc.) db.exec(` DELETE FROM edges WHERE kind = 'contains' AND source_id IN (SELECT id FROM nodes WHERE kind = 'directory'); @@ -79,8 +47,9 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director DELETE FROM nodes WHERE kind = 'directory'; `); } +} - // Step 1: Ensure all directories are represented (including intermediate parents) +function collectAllDirectories(directories, fileSymbols) { const allDirs = new Set(); for (const dir of directories) { let d = dir; @@ -89,7 +58,6 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director d = normalizePath(path.dirname(d)); } } - // Also add dirs derived from file paths for (const relPath of fileSymbols.keys()) { let d = normalizePath(path.dirname(relPath)); while (d && d !== '.') { @@ -97,37 +65,17 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director d = normalizePath(path.dirname(d)); } } + return allDirs; +} - // Step 2: Insert directory nodes (INSERT OR IGNORE — safe for incremental) - const insertDirs = db.transaction(() => { - for (const dir of allDirs) { - insertNode.run(dir, 'directory', dir, 0, null); - } - }); - insertDirs(); - - // Step 3: Insert 'contains' edges (dir → file, dir → subdirectory) - // On incremental, only re-insert for affected directories (others are intact) - const affectedDirs = isIncremental - ? (() => { - const dirs = new Set(); - for (const f of changedFiles) { - let d = normalizePath(path.dirname(f)); - while (d && d !== '.') { - dirs.add(d); - d = normalizePath(path.dirname(d)); - } - } - return dirs; - })() - : null; +function insertContainsEdges(db, insertEdge, getNodeIdStmt, fileSymbols, allDirs, changedFiles) { + const isIncremental = changedFiles != null && changedFiles.length > 0; + const affectedDirs = isIncremental ? getAncestorDirs(changedFiles) : null; - const insertContains = db.transaction(() => { - // dir → file + db.transaction(() => { for (const relPath of fileSymbols.keys()) { const dir = normalizePath(path.dirname(relPath)); if (!dir || dir === '.') continue; - // On incremental, skip dirs whose contains edges are intact if (affectedDirs && !affectedDirs.has(dir)) continue; const dirRow = getNodeIdStmt.get(dir, 'directory', dir, 0); const fileRow = getNodeIdStmt.get(relPath, 'file', relPath, 0); @@ -135,11 +83,9 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director insertEdge.run(dirRow.id, fileRow.id, 'contains', 1.0, 0); } } - // dir → subdirectory for (const dir of allDirs) { const parent = normalizePath(path.dirname(dir)); if (!parent || parent === '.' || parent === dir) continue; - // On incremental, skip parent dirs whose contains edges are intact if (affectedDirs && !affectedDirs.has(parent)) continue; const parentRow = getNodeIdStmt.get(parent, 'directory', parent, 0); const childRow = getNodeIdStmt.get(dir, 'directory', dir, 0); @@ -147,11 +93,10 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director insertEdge.run(parentRow.id, childRow.id, 'contains', 1.0, 0); } } - }); - insertContains(); + })(); +} - // Step 4: Compute per-file metrics - // Pre-compute fan-in/fan-out per file from import edges +function computeImportEdgeMaps(db) { const fanInMap = new Map(); const fanOutMap = new Map(); const importEdges = db @@ -169,14 +114,24 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director fanOutMap.set(source_file, (fanOutMap.get(source_file) || 0) + 1); fanInMap.set(target_file, (fanInMap.get(target_file) || 0) + 1); } + return { fanInMap, fanOutMap, importEdges }; +} - const computeFileMetrics = db.transaction(() => { +function computeFileMetrics( + db, + upsertMetric, + getNodeIdStmt, + fileSymbols, + lineCountMap, + fanInMap, + fanOutMap, +) { + db.transaction(() => { for (const [relPath, symbols] of fileSymbols) { const fileRow = getNodeIdStmt.get(relPath, 'file', relPath, 0); if (!fileRow) continue; const lineCount = lineCountMap.get(relPath) || 0; - // Deduplicate definitions by name+kind+line const seen = new Set(); let symbolCount = 0; for (const d of symbols.definitions) { @@ -203,11 +158,17 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director null, ); } - }); - computeFileMetrics(); + })(); +} - // Step 5: Compute per-directory metrics - // Build a map of dir → descendant files +function computeDirectoryMetrics( + db, + upsertMetric, + getNodeIdStmt, + fileSymbols, + allDirs, + importEdges, +) { const dirFiles = new Map(); for (const dir of allDirs) { dirFiles.set(dir, []); @@ -222,7 +183,6 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director } } - // Build reverse index: file → set of ancestor directories (O(files × depth)) const fileToAncestorDirs = new Map(); for (const [dir, files] of dirFiles) { for (const f of files) { @@ -231,7 +191,6 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director } } - // Single O(E) pass: pre-aggregate edge counts per directory const dirEdgeCounts = new Map(); for (const dir of allDirs) { dirEdgeCounts.set(dir, { intra: 0, fanIn: 0, fanOut: 0 }); @@ -241,7 +200,6 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director const tgtDirs = fileToAncestorDirs.get(target_file); if (!srcDirs && !tgtDirs) continue; - // For each directory that contains the source file if (srcDirs) { for (const dir of srcDirs) { const counts = dirEdgeCounts.get(dir); @@ -253,10 +211,9 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director } } } - // For each directory that contains the target but NOT the source if (tgtDirs) { for (const dir of tgtDirs) { - if (srcDirs?.has(dir)) continue; // already counted as intra + if (srcDirs?.has(dir)) continue; const counts = dirEdgeCounts.get(dir); if (!counts) continue; counts.fanIn++; @@ -264,7 +221,7 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director } } - const computeDirMetrics = db.transaction(() => { + db.transaction(() => { for (const [dir, files] of dirFiles) { const dirRow = getNodeIdStmt.get(dir, 'directory', dir, 0); if (!dirRow) continue; @@ -286,7 +243,6 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director } } - // O(1) lookup from pre-aggregated edge counts const counts = dirEdgeCounts.get(dir) || { intra: 0, fanIn: 0, fanOut: 0 }; const totalEdges = counts.intra + counts.fanIn + counts.fanOut; const cohesion = totalEdges > 0 ? counts.intra / totalEdges : null; @@ -303,11 +259,69 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director fileCount, ); } - }); - computeDirMetrics(); + })(); +} + +// ─── Build-time: insert directory nodes, contains edges, and metrics ──── + +/** + * Build directory structure nodes, containment edges, and compute metrics. + * Called from builder.js after edge building. + * + * @param {import('better-sqlite3').Database} db - Open read-write database + * @param {Map} fileSymbols - Map of relPath → { definitions, imports, exports, calls } + * @param {string} rootDir - Absolute root directory + * @param {Map} lineCountMap - Map of relPath → line count + * @param {Set} directories - Set of relative directory paths + */ +export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, directories, changedFiles) { + const insertNode = db.prepare( + 'INSERT OR IGNORE INTO nodes (name, kind, file, line, end_line) VALUES (?, ?, ?, ?, ?)', + ); + const getNodeIdStmt = { + get: (name, kind, file, line) => { + const id = getNodeId(db, name, kind, file, line); + return id != null ? { id } : undefined; + }, + }; + const insertEdge = db.prepare( + 'INSERT INTO edges (source_id, target_id, kind, confidence, dynamic) VALUES (?, ?, ?, ?, ?)', + ); + const upsertMetric = db.prepare(` + INSERT OR REPLACE INTO node_metrics + (node_id, line_count, symbol_count, import_count, export_count, fan_in, fan_out, cohesion, file_count) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + `); + + const isIncremental = changedFiles != null && changedFiles.length > 0; + + cleanupPreviousData(db, getNodeIdStmt, isIncremental, changedFiles); + + const allDirs = collectAllDirectories(directories, fileSymbols); + + db.transaction(() => { + for (const dir of allDirs) { + insertNode.run(dir, 'directory', dir, 0, null); + } + })(); + + insertContainsEdges(db, insertEdge, getNodeIdStmt, fileSymbols, allDirs, changedFiles); + + const { fanInMap, fanOutMap, importEdges } = computeImportEdgeMaps(db); + + computeFileMetrics( + db, + upsertMetric, + getNodeIdStmt, + fileSymbols, + lineCountMap, + fanInMap, + fanOutMap, + ); + + computeDirectoryMetrics(db, upsertMetric, getNodeIdStmt, fileSymbols, allDirs, importEdges); - const dirCount = allDirs.size; - debug(`Structure: ${dirCount} directories, ${fileSymbols.size} files with metrics`); + debug(`Structure: ${allDirs.size} directories, ${fileSymbols.size} files with metrics`); } // ─── Node role classification ───────────────────────────────────────── From 54b0067b0e27e9ec97ec0270b1cd34516c3c319c Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 03:36:16 -0600 Subject: [PATCH 16/21] refactor: decompose buildCFGData and buildDataflowEdges Impact: 10 functions changed, 5 affected --- src/features/cfg.js | 246 ++++++++++++++++++++------------------- src/features/dataflow.js | 240 +++++++++++++++++++------------------- 2 files changed, 244 insertions(+), 242 deletions(-) diff --git a/src/features/cfg.js b/src/features/cfg.js index ae1b8564..3f029274 100644 --- a/src/features/cfg.js +++ b/src/features/cfg.js @@ -68,30 +68,15 @@ export function buildFunctionCFG(functionNode, langId) { return { blocks: r.blocks, edges: r.edges, cyclomatic: r.cyclomatic }; } -// ─── Build-Time: Compute CFG for Changed Files ───────────────────────── +// ─── Build-Time Helpers ───────────────────────────────────────────────── -/** - * Build CFG data for all function/method definitions and persist to DB. - * - * @param {object} db - open better-sqlite3 database (read-write) - * @param {Map} fileSymbols - Map - * @param {string} rootDir - absolute project root path - * @param {object} [_engineOpts] - engine options (unused; always uses WASM for AST) - */ -export async function buildCFGData(db, fileSymbols, rootDir, _engineOpts) { - // Lazily init WASM parsers if needed - let parsers = null; +async function initCfgParsers(fileSymbols) { let needsFallback = false; - // Always build ext→langId map so native-only builds (where _langId is unset) - // can still derive the language from the file extension. - const extToLang = buildExtToLangMap(); - for (const [relPath, symbols] of fileSymbols) { if (!symbols._tree) { const ext = path.extname(relPath).toLowerCase(); if (CFG_EXTENSIONS.has(ext)) { - // Check if all function/method defs already have native CFG data const hasNativeCfg = symbols.definitions .filter((d) => (d.kind === 'function' || d.kind === 'method') && d.line) .every((d) => d.cfg === null || d.cfg?.blocks?.length); @@ -103,18 +88,131 @@ export async function buildCFGData(db, fileSymbols, rootDir, _engineOpts) { } } + let parsers = null; + let getParserFn = null; + if (needsFallback) { const { createParsers } = await import('../domain/parser.js'); parsers = await createParsers(); - } - - let getParserFn = null; - if (parsers) { const mod = await import('../domain/parser.js'); getParserFn = mod.getParser; } - // findFunctionNode imported from ./ast-analysis/shared.js at module level + return { parsers, getParserFn }; +} + +function getTreeAndLang(symbols, relPath, rootDir, extToLang, parsers, getParserFn) { + const ext = path.extname(relPath).toLowerCase(); + let tree = symbols._tree; + let langId = symbols._langId; + + const allNative = symbols.definitions + .filter((d) => (d.kind === 'function' || d.kind === 'method') && d.line) + .every((d) => d.cfg === null || d.cfg?.blocks?.length); + + if (!tree && !allNative) { + if (!getParserFn) return null; + langId = extToLang.get(ext); + if (!langId || !CFG_RULES.has(langId)) return null; + + const absPath = path.join(rootDir, relPath); + let code; + try { + code = fs.readFileSync(absPath, 'utf-8'); + } catch (e) { + debug(`cfg: cannot read ${relPath}: ${e.message}`); + return null; + } + + const parser = getParserFn(parsers, absPath); + if (!parser) return null; + + try { + tree = parser.parse(code); + } catch (e) { + debug(`cfg: parse failed for ${relPath}: ${e.message}`); + return null; + } + } + + if (!langId) { + langId = extToLang.get(ext); + if (!langId) return null; + } + + return { tree, langId }; +} + +function buildVisitorCfgMap(tree, cfgRules, symbols, langId) { + const needsVisitor = + tree && + symbols.definitions.some( + (d) => + (d.kind === 'function' || d.kind === 'method') && + d.line && + d.cfg !== null && + !d.cfg?.blocks?.length, + ); + if (!needsVisitor) return null; + + const visitor = createCfgVisitor(cfgRules); + const walkerOpts = { + functionNodeTypes: new Set(cfgRules.functionNodes), + nestingNodeTypes: new Set(), + getFunctionName: (node) => { + const nameNode = node.childForFieldName('name'); + return nameNode ? nameNode.text : null; + }, + }; + const walkResults = walkWithVisitors(tree.rootNode, [visitor], langId, walkerOpts); + const cfgResults = walkResults.cfg || []; + const visitorCfgByLine = new Map(); + for (const r of cfgResults) { + if (r.funcNode) { + const line = r.funcNode.startPosition.row + 1; + if (!visitorCfgByLine.has(line)) visitorCfgByLine.set(line, []); + visitorCfgByLine.get(line).push(r); + } + } + return visitorCfgByLine; +} + +function persistCfg(cfg, nodeId, insertBlock, insertEdge) { + const blockDbIds = new Map(); + for (const block of cfg.blocks) { + const result = insertBlock.run( + nodeId, + block.index, + block.type, + block.startLine, + block.endLine, + block.label, + ); + blockDbIds.set(block.index, result.lastInsertRowid); + } + + for (const edge of cfg.edges) { + const sourceDbId = blockDbIds.get(edge.sourceIndex); + const targetDbId = blockDbIds.get(edge.targetIndex); + if (sourceDbId && targetDbId) { + insertEdge.run(nodeId, sourceDbId, targetDbId, edge.kind); + } + } +} + +// ─── Build-Time: Compute CFG for Changed Files ───────────────────────── + +/** + * Build CFG data for all function/method definitions and persist to DB. + * + * @param {object} db - open better-sqlite3 database (read-write) + * @param {Map} fileSymbols - Map + * @param {string} rootDir - absolute project root path + * @param {object} [_engineOpts] - engine options (unused; always uses WASM for AST) + */ +export async function buildCFGData(db, fileSymbols, rootDir, _engineOpts) { + const extToLang = buildExtToLangMap(); + const { parsers, getParserFn } = await initCfgParsers(fileSymbols); const insertBlock = db.prepare( `INSERT INTO cfg_blocks (function_node_id, block_index, block_type, start_line, end_line, label) @@ -131,81 +229,14 @@ export async function buildCFGData(db, fileSymbols, rootDir, _engineOpts) { const ext = path.extname(relPath).toLowerCase(); if (!CFG_EXTENSIONS.has(ext)) continue; - let tree = symbols._tree; - let langId = symbols._langId; - - // Check if all defs already have native CFG — skip WASM parse if so - const allNative = symbols.definitions - .filter((d) => (d.kind === 'function' || d.kind === 'method') && d.line) - .every((d) => d.cfg === null || d.cfg?.blocks?.length); - - // WASM fallback if no cached tree and not all native - if (!tree && !allNative) { - if (!getParserFn) continue; - langId = extToLang.get(ext); - if (!langId || !CFG_RULES.has(langId)) continue; - - const absPath = path.join(rootDir, relPath); - let code; - try { - code = fs.readFileSync(absPath, 'utf-8'); - } catch (e) { - debug(`cfg: cannot read ${relPath}: ${e.message}`); - continue; - } - - const parser = getParserFn(parsers, absPath); - if (!parser) continue; - - try { - tree = parser.parse(code); - } catch (e) { - debug(`cfg: parse failed for ${relPath}: ${e.message}`); - continue; - } - } - - if (!langId) { - langId = extToLang.get(ext); - if (!langId) continue; - } + const treeLang = getTreeAndLang(symbols, relPath, rootDir, extToLang, parsers, getParserFn); + if (!treeLang) continue; + const { tree, langId } = treeLang; const cfgRules = CFG_RULES.get(langId); if (!cfgRules) continue; - // WASM fallback: run file-level visitor walk to compute CFG for all functions - // that don't already have pre-computed data (from native engine or unified walk) - let visitorCfgByLine = null; - const needsVisitor = - tree && - symbols.definitions.some( - (d) => - (d.kind === 'function' || d.kind === 'method') && - d.line && - d.cfg !== null && - !d.cfg?.blocks?.length, - ); - if (needsVisitor) { - const visitor = createCfgVisitor(cfgRules); - const walkerOpts = { - functionNodeTypes: new Set(cfgRules.functionNodes), - nestingNodeTypes: new Set(), - getFunctionName: (node) => { - const nameNode = node.childForFieldName('name'); - return nameNode ? nameNode.text : null; - }, - }; - const walkResults = walkWithVisitors(tree.rootNode, [visitor], langId, walkerOpts); - const cfgResults = walkResults.cfg || []; - visitorCfgByLine = new Map(); - for (const r of cfgResults) { - if (r.funcNode) { - const line = r.funcNode.startPosition.row + 1; - if (!visitorCfgByLine.has(line)) visitorCfgByLine.set(line, []); - visitorCfgByLine.get(line).push(r); - } - } - } + const visitorCfgByLine = buildVisitorCfgMap(tree, cfgRules, symbols, langId); for (const def of symbols.definitions) { if (def.kind !== 'function' && def.kind !== 'method') continue; @@ -214,7 +245,6 @@ export async function buildCFGData(db, fileSymbols, rootDir, _engineOpts) { const nodeId = getFunctionNodeId(db, def.name, relPath, def.line); if (!nodeId) continue; - // Use pre-computed CFG (native engine or unified walk), then visitor fallback let cfg = null; if (def.cfg?.blocks?.length) { cfg = def.cfg; @@ -233,36 +263,10 @@ export async function buildCFGData(db, fileSymbols, rootDir, _engineOpts) { if (!cfg || cfg.blocks.length === 0) continue; - // Clear old CFG data for this function deleteCfgForNode(db, nodeId); - - // Insert blocks and build index→dbId mapping - const blockDbIds = new Map(); - for (const block of cfg.blocks) { - const result = insertBlock.run( - nodeId, - block.index, - block.type, - block.startLine, - block.endLine, - block.label, - ); - blockDbIds.set(block.index, result.lastInsertRowid); - } - - // Insert edges - for (const edge of cfg.edges) { - const sourceDbId = blockDbIds.get(edge.sourceIndex); - const targetDbId = blockDbIds.get(edge.targetIndex); - if (sourceDbId && targetDbId) { - insertEdge.run(nodeId, sourceDbId, targetDbId, edge.kind); - } - } - + persistCfg(cfg, nodeId, insertBlock, insertEdge); analyzed++; } - - // Don't release _tree here — complexity/dataflow may still need it } }); diff --git a/src/features/dataflow.js b/src/features/dataflow.js index 695afa95..2dee25b6 100644 --- a/src/features/dataflow.js +++ b/src/features/dataflow.js @@ -58,26 +58,11 @@ export function extractDataflow(tree, _filePath, _definitions, langId = 'javascr return results.dataflow; } -// ── buildDataflowEdges ────────────────────────────────────────────────────── +// ── Build-Time Helpers ────────────────────────────────────────────────────── -/** - * Build dataflow edges and insert them into the database. - * Called during graph build when --dataflow is enabled. - * - * @param {object} db - better-sqlite3 database instance - * @param {Map} fileSymbols - map of relPath → symbols - * @param {string} rootDir - absolute root directory - * @param {object} engineOpts - engine options - */ -export async function buildDataflowEdges(db, fileSymbols, rootDir, _engineOpts) { - // Lazily init WASM parsers if needed - let parsers = null; +async function initDataflowParsers(fileSymbols) { let needsFallback = false; - // Always build ext→langId map so native-only builds (where _langId is unset) - // can still derive the language from the file extension. - const extToLang = buildExtToLangMap(); - for (const [relPath, symbols] of fileSymbols) { if (!symbols._tree && !symbols.dataflow) { const ext = path.extname(relPath).toLowerCase(); @@ -88,25 +73,130 @@ export async function buildDataflowEdges(db, fileSymbols, rootDir, _engineOpts) } } + let parsers = null; + let getParserFn = null; + if (needsFallback) { const { createParsers } = await import('../domain/parser.js'); parsers = await createParsers(); - } - - let getParserFn = null; - if (parsers) { const mod = await import('../domain/parser.js'); getParserFn = mod.getParser; } + return { parsers, getParserFn }; +} + +function getDataflowForFile(symbols, relPath, rootDir, extToLang, parsers, getParserFn) { + if (symbols.dataflow) return symbols.dataflow; + + let tree = symbols._tree; + let langId = symbols._langId; + + if (!tree) { + if (!getParserFn) return null; + const ext = path.extname(relPath).toLowerCase(); + langId = extToLang.get(ext); + if (!langId || !DATAFLOW_RULES.has(langId)) return null; + + const absPath = path.join(rootDir, relPath); + let code; + try { + code = fs.readFileSync(absPath, 'utf-8'); + } catch (e) { + debug(`dataflow: cannot read ${relPath}: ${e.message}`); + return null; + } + + const parser = getParserFn(parsers, absPath); + if (!parser) return null; + + try { + tree = parser.parse(code); + } catch (e) { + debug(`dataflow: parse failed for ${relPath}: ${e.message}`); + return null; + } + } + + if (!langId) { + const ext = path.extname(relPath).toLowerCase(); + langId = extToLang.get(ext); + if (!langId) return null; + } + + if (!DATAFLOW_RULES.has(langId)) return null; + + return extractDataflow(tree, relPath, symbols.definitions, langId); +} + +function insertDataflowEdges(insert, data, resolveNode) { + let edgeCount = 0; + + for (const flow of data.argFlows) { + const sourceNode = resolveNode(flow.callerFunc); + const targetNode = resolveNode(flow.calleeName); + if (sourceNode && targetNode) { + insert.run( + sourceNode.id, + targetNode.id, + 'flows_to', + flow.argIndex, + flow.expression, + flow.line, + flow.confidence, + ); + edgeCount++; + } + } + + for (const assignment of data.assignments) { + const producerNode = resolveNode(assignment.sourceCallName); + const consumerNode = resolveNode(assignment.callerFunc); + if (producerNode && consumerNode) { + insert.run( + producerNode.id, + consumerNode.id, + 'returns', + null, + assignment.expression, + assignment.line, + 1.0, + ); + edgeCount++; + } + } + + for (const mut of data.mutations) { + const mutatorNode = resolveNode(mut.funcName); + if (mutatorNode && mut.binding?.type === 'param') { + insert.run(mutatorNode.id, mutatorNode.id, 'mutates', null, mut.mutatingExpr, mut.line, 1.0); + edgeCount++; + } + } + + return edgeCount; +} + +// ── buildDataflowEdges ────────────────────────────────────────────────────── + +/** + * Build dataflow edges and insert them into the database. + * Called during graph build when --dataflow is enabled. + * + * @param {object} db - better-sqlite3 database instance + * @param {Map} fileSymbols - map of relPath → symbols + * @param {string} rootDir - absolute root directory + * @param {object} engineOpts - engine options + */ +export async function buildDataflowEdges(db, fileSymbols, rootDir, _engineOpts) { + const extToLang = buildExtToLangMap(); + const { parsers, getParserFn } = await initDataflowParsers(fileSymbols); + const insert = db.prepare( `INSERT INTO dataflow (source_id, target_id, kind, param_index, expression, line, confidence) VALUES (?, ?, ?, ?, ?, ?, ?)`, ); - // MVP scope: only resolve function/method nodes for dataflow edges. - // Future expansion: add 'parameter', 'property', 'constant' kinds to track - // data flow through property accessors or constant references. const getNodeByNameAndFile = db.prepare( `SELECT id, name, kind, file, line FROM nodes WHERE name = ? AND file = ? AND kind IN ('function', 'method')`, @@ -125,109 +215,17 @@ export async function buildDataflowEdges(db, fileSymbols, rootDir, _engineOpts) const ext = path.extname(relPath).toLowerCase(); if (!DATAFLOW_EXTENSIONS.has(ext)) continue; - // Use native dataflow data if available — skip WASM extraction - let data = symbols.dataflow; - if (!data) { - let tree = symbols._tree; - let langId = symbols._langId; - - // WASM fallback if no cached tree - if (!tree) { - if (!getParserFn) continue; - langId = extToLang.get(ext); - if (!langId || !DATAFLOW_RULES.has(langId)) continue; - - const absPath = path.join(rootDir, relPath); - let code; - try { - code = fs.readFileSync(absPath, 'utf-8'); - } catch (e) { - debug(`dataflow: cannot read ${relPath}: ${e.message}`); - continue; - } - - const parser = getParserFn(parsers, absPath); - if (!parser) continue; - - try { - tree = parser.parse(code); - } catch (e) { - debug(`dataflow: parse failed for ${relPath}: ${e.message}`); - continue; - } - } - - if (!langId) { - langId = extToLang.get(ext); - if (!langId) continue; - } - - if (!DATAFLOW_RULES.has(langId)) continue; - - data = extractDataflow(tree, relPath, symbols.definitions, langId); - } + const data = getDataflowForFile(symbols, relPath, rootDir, extToLang, parsers, getParserFn); + if (!data) continue; - // Resolve function names to node IDs in this file first, then globally - function resolveNode(funcName) { + const resolveNode = (funcName) => { const local = getNodeByNameAndFile.all(funcName, relPath); if (local.length > 0) return local[0]; const global = getNodeByName.all(funcName); return global.length > 0 ? global[0] : null; - } - - // flows_to: parameter/variable passed as argument to another function - for (const flow of data.argFlows) { - const sourceNode = resolveNode(flow.callerFunc); - const targetNode = resolveNode(flow.calleeName); - if (sourceNode && targetNode) { - insert.run( - sourceNode.id, - targetNode.id, - 'flows_to', - flow.argIndex, - flow.expression, - flow.line, - flow.confidence, - ); - totalEdges++; - } - } - - // returns: call return value captured in caller - for (const assignment of data.assignments) { - const producerNode = resolveNode(assignment.sourceCallName); - const consumerNode = resolveNode(assignment.callerFunc); - if (producerNode && consumerNode) { - insert.run( - producerNode.id, - consumerNode.id, - 'returns', - null, - assignment.expression, - assignment.line, - 1.0, - ); - totalEdges++; - } - } + }; - // mutates: parameter-derived value is mutated - for (const mut of data.mutations) { - const mutatorNode = resolveNode(mut.funcName); - if (mutatorNode && mut.binding?.type === 'param') { - // The mutation in this function affects the parameter source - insert.run( - mutatorNode.id, - mutatorNode.id, - 'mutates', - null, - mut.mutatingExpr, - mut.line, - 1.0, - ); - totalEdges++; - } - } + totalEdges += insertDataflowEdges(insert, data, resolveNode); } }); From 7030e7f369312ce9bb0cb3e164bc1b78023f3026 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 03:36:21 -0600 Subject: [PATCH 17/21] refactor: decompose sequenceData into BFS and message construction Impact: 5 functions changed, 2 affected --- src/features/sequence.js | 293 ++++++++++++++++++++------------------- 1 file changed, 152 insertions(+), 141 deletions(-) diff --git a/src/features/sequence.js b/src/features/sequence.js index 271d2ea2..cf59ddc3 100644 --- a/src/features/sequence.js +++ b/src/features/sequence.js @@ -68,6 +68,148 @@ function buildAliases(files) { return aliases; } +// ─── Helpers ───────────────────────────────────────────────────────── + +function findEntryNode(repo, name, opts) { + let matchNode = findMatchingNodes(repo, name, opts)[0] ?? null; + if (!matchNode) { + for (const prefix of FRAMEWORK_ENTRY_PREFIXES) { + matchNode = findMatchingNodes(repo, `${prefix}${name}`, opts)[0] ?? null; + if (matchNode) break; + } + } + return matchNode; +} + +function bfsCallees(repo, matchNode, maxDepth, noTests) { + const visited = new Set([matchNode.id]); + let frontier = [matchNode.id]; + const messages = []; + const fileSet = new Set([matchNode.file]); + const idToNode = new Map(); + idToNode.set(matchNode.id, matchNode); + let truncated = false; + + for (let d = 1; d <= maxDepth; d++) { + const nextFrontier = []; + + for (const fid of frontier) { + const callees = repo.findCallees(fid); + const caller = idToNode.get(fid); + + for (const c of callees) { + if (noTests && isTestFile(c.file)) continue; + + fileSet.add(c.file); + messages.push({ + from: caller.file, + to: c.file, + label: c.name, + type: 'call', + depth: d, + }); + + if (visited.has(c.id)) continue; + + visited.add(c.id); + nextFrontier.push(c.id); + idToNode.set(c.id, c); + } + } + + frontier = nextFrontier; + if (frontier.length === 0) break; + + if (d === maxDepth && frontier.length > 0) { + const hasMoreCalls = frontier.some((fid) => repo.findCallees(fid).length > 0); + if (hasMoreCalls) truncated = true; + } + } + + return { messages, fileSet, idToNode, truncated }; +} + +function annotateDataflow(repo, messages, idToNode) { + const hasTable = repo.hasDataflowTable(); + + if (!hasTable || !(repo instanceof SqliteRepository)) return; + + const db = repo.db; + const nodeByNameFile = new Map(); + for (const n of idToNode.values()) { + nodeByNameFile.set(`${n.name}|${n.file}`, n); + } + + const getReturns = db.prepare( + `SELECT d.expression FROM dataflow d + WHERE d.source_id = ? AND d.kind = 'returns'`, + ); + const getFlowsTo = db.prepare( + `SELECT d.expression FROM dataflow d + WHERE d.target_id = ? AND d.kind = 'flows_to' + ORDER BY d.param_index`, + ); + + const seenReturns = new Set(); + for (const msg of [...messages]) { + if (msg.type !== 'call') continue; + const targetNode = nodeByNameFile.get(`${msg.label}|${msg.to}`); + if (!targetNode) continue; + + const returnKey = `${msg.to}->${msg.from}:${msg.label}`; + if (seenReturns.has(returnKey)) continue; + + const returns = getReturns.all(targetNode.id); + + if (returns.length > 0) { + seenReturns.add(returnKey); + const expr = returns[0].expression || 'result'; + messages.push({ + from: msg.to, + to: msg.from, + label: expr, + type: 'return', + depth: msg.depth, + }); + } + } + + for (const msg of messages) { + if (msg.type !== 'call') continue; + const targetNode = nodeByNameFile.get(`${msg.label}|${msg.to}`); + if (!targetNode) continue; + + const params = getFlowsTo.all(targetNode.id); + + if (params.length > 0) { + const paramNames = params + .map((p) => p.expression) + .filter(Boolean) + .slice(0, 3); + if (paramNames.length > 0) { + msg.label = `${msg.label}(${paramNames.join(', ')})`; + } + } + } +} + +function buildParticipants(fileSet, entryFile) { + const aliases = buildAliases([...fileSet]); + const participants = [...fileSet].map((file) => ({ + id: aliases.get(file), + label: file.split('/').pop(), + file, + })); + + participants.sort((a, b) => { + if (a.file === entryFile) return -1; + if (b.file === entryFile) return 1; + return a.file.localeCompare(b.file); + }); + + return { participants, aliases }; +} + // ─── Core data function ────────────────────────────────────────────── /** @@ -90,19 +232,8 @@ export function sequenceData(name, dbPath, opts = {}) { try { const maxDepth = opts.depth || 10; const noTests = opts.noTests || false; - const withDataflow = opts.dataflow || false; - - // Phase 1: Direct LIKE match - let matchNode = findMatchingNodes(repo, name, opts)[0] ?? null; - - // Phase 2: Prefix-stripped matching - if (!matchNode) { - for (const prefix of FRAMEWORK_ENTRY_PREFIXES) { - matchNode = findMatchingNodes(repo, `${prefix}${name}`, opts)[0] ?? null; - if (matchNode) break; - } - } + const matchNode = findEntryNode(repo, name, opts); if (!matchNode) { return { entry: null, @@ -121,123 +252,17 @@ export function sequenceData(name, dbPath, opts = {}) { line: matchNode.line, }; - // BFS forward — track edges, not just nodes - const visited = new Set([matchNode.id]); - let frontier = [matchNode.id]; - const messages = []; - const fileSet = new Set([matchNode.file]); - const idToNode = new Map(); - idToNode.set(matchNode.id, matchNode); - let truncated = false; - - for (let d = 1; d <= maxDepth; d++) { - const nextFrontier = []; - - for (const fid of frontier) { - const callees = repo.findCallees(fid); - - const caller = idToNode.get(fid); - - for (const c of callees) { - if (noTests && isTestFile(c.file)) continue; - - // Always record the message (even for visited nodes — different caller path) - fileSet.add(c.file); - messages.push({ - from: caller.file, - to: c.file, - label: c.name, - type: 'call', - depth: d, - }); - - if (visited.has(c.id)) continue; - - visited.add(c.id); - nextFrontier.push(c.id); - idToNode.set(c.id, c); - } - } - - frontier = nextFrontier; - if (frontier.length === 0) break; - - if (d === maxDepth && frontier.length > 0) { - // Only mark truncated if at least one frontier node has further callees - const hasMoreCalls = frontier.some((fid) => repo.findCallees(fid).length > 0); - if (hasMoreCalls) truncated = true; - } - } - - // Dataflow annotations: add return arrows - if (withDataflow && messages.length > 0) { - const hasTable = repo.hasDataflowTable(); - - if (hasTable && repo instanceof SqliteRepository) { - const db = repo.db; - // Build name|file lookup for O(1) target node access - const nodeByNameFile = new Map(); - for (const n of idToNode.values()) { - nodeByNameFile.set(`${n.name}|${n.file}`, n); - } - - const getReturns = db.prepare( - `SELECT d.expression FROM dataflow d - WHERE d.source_id = ? AND d.kind = 'returns'`, - ); - const getFlowsTo = db.prepare( - `SELECT d.expression FROM dataflow d - WHERE d.target_id = ? AND d.kind = 'flows_to' - ORDER BY d.param_index`, - ); - - // For each called function, check if it has return edges - const seenReturns = new Set(); - for (const msg of [...messages]) { - if (msg.type !== 'call') continue; - const targetNode = nodeByNameFile.get(`${msg.label}|${msg.to}`); - if (!targetNode) continue; - - const returnKey = `${msg.to}->${msg.from}:${msg.label}`; - if (seenReturns.has(returnKey)) continue; - - const returns = getReturns.all(targetNode.id); - - if (returns.length > 0) { - seenReturns.add(returnKey); - const expr = returns[0].expression || 'result'; - messages.push({ - from: msg.to, - to: msg.from, - label: expr, - type: 'return', - depth: msg.depth, - }); - } - } + const { messages, fileSet, idToNode, truncated } = bfsCallees( + repo, + matchNode, + maxDepth, + noTests, + ); - // Annotate call messages with parameter names - for (const msg of messages) { - if (msg.type !== 'call') continue; - const targetNode = nodeByNameFile.get(`${msg.label}|${msg.to}`); - if (!targetNode) continue; - - const params = getFlowsTo.all(targetNode.id); - - if (params.length > 0) { - const paramNames = params - .map((p) => p.expression) - .filter(Boolean) - .slice(0, 3); - if (paramNames.length > 0) { - msg.label = `${msg.label}(${paramNames.join(', ')})`; - } - } - } - } + if (opts.dataflow && messages.length > 0) { + annotateDataflow(repo, messages, idToNode); } - // Sort messages by depth, then call before return messages.sort((a, b) => { if (a.depth !== b.depth) return a.depth - b.depth; if (a.type === 'call' && b.type === 'return') return -1; @@ -245,22 +270,8 @@ export function sequenceData(name, dbPath, opts = {}) { return 0; }); - // Build participant list from files - const aliases = buildAliases([...fileSet]); - const participants = [...fileSet].map((file) => ({ - id: aliases.get(file), - label: file.split('/').pop(), - file, - })); - - // Sort participants: entry file first, then alphabetically - participants.sort((a, b) => { - if (a.file === entry.file) return -1; - if (b.file === entry.file) return 1; - return a.file.localeCompare(b.file); - }); + const { participants, aliases } = buildParticipants(fileSet, entry.file); - // Replace file paths with alias IDs in messages for (const msg of messages) { msg.from = aliases.get(msg.from); msg.to = aliases.get(msg.to); From b4d8a0dc642a7eefc5df717644bd4fdaf28f02a1 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 03:39:38 -0600 Subject: [PATCH 18/21] refactor: decompose explain() into section renderers Impact: 5 functions changed, 2 affected --- src/presentation/queries-cli/inspect.js | 373 ++++++++++++------------ 1 file changed, 184 insertions(+), 189 deletions(-) diff --git a/src/presentation/queries-cli/inspect.js b/src/presentation/queries-cli/inspect.js index 5a3ddcb7..59b85d63 100644 --- a/src/presentation/queries-cli/inspect.js +++ b/src/presentation/queries-cli/inspect.js @@ -96,96 +96,7 @@ export function context(name, customDbPath, opts = {}) { } for (const r of data.results) { - const lineRange = r.endLine ? `${r.line}-${r.endLine}` : `${r.line}`; - const roleTag = r.role ? ` [${r.role}]` : ''; - console.log(`\n# ${r.name} (${r.kind})${roleTag} — ${r.file}:${lineRange}\n`); - - // Signature - if (r.signature) { - console.log('## Type/Shape Info'); - if (r.signature.params != null) console.log(` Parameters: (${r.signature.params})`); - if (r.signature.returnType) console.log(` Returns: ${r.signature.returnType}`); - console.log(); - } - - // Children - if (r.children && r.children.length > 0) { - console.log(`## Children (${r.children.length})`); - for (const c of r.children) { - console.log(` ${kindIcon(c.kind)} ${c.name} :${c.line}`); - } - console.log(); - } - - // Complexity - if (r.complexity) { - const cx = r.complexity; - const miPart = cx.maintainabilityIndex ? ` | MI: ${cx.maintainabilityIndex}` : ''; - console.log('## Complexity'); - console.log( - ` Cognitive: ${cx.cognitive} | Cyclomatic: ${cx.cyclomatic} | Max Nesting: ${cx.maxNesting}${miPart}`, - ); - console.log(); - } - - // Source - if (r.source) { - console.log('## Source'); - for (const line of r.source.split('\n')) { - console.log(` ${line}`); - } - console.log(); - } - - // Callees - if (r.callees.length > 0) { - console.log(`## Direct Dependencies (${r.callees.length})`); - for (const c of r.callees) { - const summary = c.summary ? ` — ${c.summary}` : ''; - console.log(` ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}${summary}`); - if (c.source) { - for (const line of c.source.split('\n').slice(0, 10)) { - console.log(` | ${line}`); - } - } - } - console.log(); - } - - // Callers - if (r.callers.length > 0) { - console.log(`## Callers (${r.callers.length})`); - for (const c of r.callers) { - const via = c.viaHierarchy ? ` (via ${c.viaHierarchy})` : ''; - console.log(` ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}${via}`); - } - console.log(); - } - - // Related tests - if (r.relatedTests.length > 0) { - console.log('## Related Tests'); - for (const t of r.relatedTests) { - console.log(` ${t.file} — ${t.testCount} tests`); - for (const tn of t.testNames) { - console.log(` - ${tn}`); - } - if (t.source) { - console.log(' Source:'); - for (const line of t.source.split('\n').slice(0, 20)) { - console.log(` | ${line}`); - } - } - } - console.log(); - } - - if (r.callees.length === 0 && r.callers.length === 0 && r.relatedTests.length === 0) { - console.log( - ' (no call edges or tests found — may be invoked dynamically or via re-exports)', - ); - console.log(); - } + renderContextResult(r); } } @@ -209,126 +120,210 @@ export function children(name, customDbPath, opts = {}) { } } -export function explain(target, customDbPath, opts = {}) { - const data = explainData(target, customDbPath, opts); - if (outputResult(data, 'results', opts)) return; +function renderContextResult(r) { + const lineRange = r.endLine ? `${r.line}-${r.endLine}` : `${r.line}`; + const roleTag = r.role ? ` [${r.role}]` : ''; + console.log(`\n# ${r.name} (${r.kind})${roleTag} — ${r.file}:${lineRange}\n`); - if (data.results.length === 0) { - console.log(`No ${data.kind === 'file' ? 'file' : 'function/symbol'} matching "${target}"`); - return; + if (r.signature) { + console.log('## Type/Shape Info'); + if (r.signature.params != null) console.log(` Parameters: (${r.signature.params})`); + if (r.signature.returnType) console.log(` Returns: ${r.signature.returnType}`); + console.log(); } - if (data.kind === 'file') { - for (const r of data.results) { - const publicCount = r.publicApi.length; - const internalCount = r.internal.length; - const lineInfo = r.lineCount ? `${r.lineCount} lines, ` : ''; - console.log(`\n# ${r.file}`); - console.log( - ` ${lineInfo}${r.symbolCount} symbols (${publicCount} exported, ${internalCount} internal)`, - ); + if (r.children && r.children.length > 0) { + console.log(`## Children (${r.children.length})`); + for (const c of r.children) { + console.log(` ${kindIcon(c.kind)} ${c.name} :${c.line}`); + } + console.log(); + } - if (r.imports.length > 0) { - console.log(` Imports: ${r.imports.map((i) => i.file).join(', ')}`); - } - if (r.importedBy.length > 0) { - console.log(` Imported by: ${r.importedBy.map((i) => i.file).join(', ')}`); - } + if (r.complexity) { + const cx = r.complexity; + const miPart = cx.maintainabilityIndex ? ` | MI: ${cx.maintainabilityIndex}` : ''; + console.log('## Complexity'); + console.log( + ` Cognitive: ${cx.cognitive} | Cyclomatic: ${cx.cyclomatic} | Max Nesting: ${cx.maxNesting}${miPart}`, + ); + console.log(); + } - if (r.publicApi.length > 0) { - console.log(`\n## Exported`); - for (const s of r.publicApi) { - const sig = s.signature?.params != null ? `(${s.signature.params})` : ''; - const roleTag = s.role ? ` [${s.role}]` : ''; - const summary = s.summary ? ` -- ${s.summary}` : ''; - console.log(` ${kindIcon(s.kind)} ${s.name}${sig}${roleTag} :${s.line}${summary}`); - } - } + if (r.source) { + console.log('## Source'); + for (const line of r.source.split('\n')) { + console.log(` ${line}`); + } + console.log(); + } - if (r.internal.length > 0) { - console.log(`\n## Internal`); - for (const s of r.internal) { - const sig = s.signature?.params != null ? `(${s.signature.params})` : ''; - const roleTag = s.role ? ` [${s.role}]` : ''; - const summary = s.summary ? ` -- ${s.summary}` : ''; - console.log(` ${kindIcon(s.kind)} ${s.name}${sig}${roleTag} :${s.line}${summary}`); + if (r.callees.length > 0) { + console.log(`## Direct Dependencies (${r.callees.length})`); + for (const c of r.callees) { + const summary = c.summary ? ` — ${c.summary}` : ''; + console.log(` ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}${summary}`); + if (c.source) { + for (const line of c.source.split('\n').slice(0, 10)) { + console.log(` | ${line}`); } } + } + console.log(); + } - if (r.dataFlow.length > 0) { - console.log(`\n## Data Flow`); - for (const df of r.dataFlow) { - console.log(` ${df.caller} -> ${df.callees.join(', ')}`); - } - } - console.log(); + if (r.callers.length > 0) { + console.log(`## Callers (${r.callers.length})`); + for (const c of r.callers) { + const via = c.viaHierarchy ? ` (via ${c.viaHierarchy})` : ''; + console.log(` ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}${via}`); } - } else { - function printFunctionExplain(r, indent = '') { - const lineRange = r.endLine ? `${r.line}-${r.endLine}` : `${r.line}`; - const lineInfo = r.lineCount ? `${r.lineCount} lines` : ''; - const summaryPart = r.summary ? ` | ${r.summary}` : ''; - const roleTag = r.role ? ` [${r.role}]` : ''; - const depthLevel = r._depth || 0; - const heading = depthLevel === 0 ? '#' : '##'.padEnd(depthLevel + 2, '#'); - console.log(`\n${indent}${heading} ${r.name} (${r.kind})${roleTag} ${r.file}:${lineRange}`); - if (lineInfo || r.summary) { - console.log(`${indent} ${lineInfo}${summaryPart}`); - } - if (r.signature) { - if (r.signature.params != null) - console.log(`${indent} Parameters: (${r.signature.params})`); - if (r.signature.returnType) console.log(`${indent} Returns: ${r.signature.returnType}`); - } + console.log(); + } - if (r.complexity) { - const cx = r.complexity; - const miPart = cx.maintainabilityIndex ? ` MI=${cx.maintainabilityIndex}` : ''; - console.log( - `${indent} Complexity: cognitive=${cx.cognitive} cyclomatic=${cx.cyclomatic} nesting=${cx.maxNesting}${miPart}`, - ); + if (r.relatedTests.length > 0) { + console.log('## Related Tests'); + for (const t of r.relatedTests) { + console.log(` ${t.file} — ${t.testCount} tests`); + for (const tn of t.testNames) { + console.log(` - ${tn}`); } - - if (r.callees.length > 0) { - console.log(`\n${indent} Calls (${r.callees.length}):`); - for (const c of r.callees) { - console.log(`${indent} ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}`); + if (t.source) { + console.log(' Source:'); + for (const line of t.source.split('\n').slice(0, 20)) { + console.log(` | ${line}`); } } + } + console.log(); + } - if (r.callers.length > 0) { - console.log(`\n${indent} Called by (${r.callers.length}):`); - for (const c of r.callers) { - console.log(`${indent} ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}`); - } - } + if (r.callees.length === 0 && r.callers.length === 0 && r.relatedTests.length === 0) { + console.log(' (no call edges or tests found — may be invoked dynamically or via re-exports)'); + console.log(); + } +} - if (r.relatedTests.length > 0) { - const label = r.relatedTests.length === 1 ? 'file' : 'files'; - console.log(`\n${indent} Tests (${r.relatedTests.length} ${label}):`); - for (const t of r.relatedTests) { - console.log(`${indent} ${t.file}`); - } - } +function renderFileExplain(r) { + const publicCount = r.publicApi.length; + const internalCount = r.internal.length; + const lineInfo = r.lineCount ? `${r.lineCount} lines, ` : ''; + console.log(`\n# ${r.file}`); + console.log( + ` ${lineInfo}${r.symbolCount} symbols (${publicCount} exported, ${internalCount} internal)`, + ); + + if (r.imports.length > 0) { + console.log(` Imports: ${r.imports.map((i) => i.file).join(', ')}`); + } + if (r.importedBy.length > 0) { + console.log(` Imported by: ${r.importedBy.map((i) => i.file).join(', ')}`); + } - if (r.callees.length === 0 && r.callers.length === 0) { - console.log( - `${indent} (no call edges found -- may be invoked dynamically or via re-exports)`, - ); - } + if (r.publicApi.length > 0) { + console.log(`\n## Exported`); + for (const s of r.publicApi) { + const sig = s.signature?.params != null ? `(${s.signature.params})` : ''; + const roleTag = s.role ? ` [${s.role}]` : ''; + const summary = s.summary ? ` -- ${s.summary}` : ''; + console.log(` ${kindIcon(s.kind)} ${s.name}${sig}${roleTag} :${s.line}${summary}`); + } + } - // Render recursive dependency details - if (r.depDetails && r.depDetails.length > 0) { - console.log(`\n${indent} --- Dependencies (depth ${depthLevel + 1}) ---`); - for (const dep of r.depDetails) { - printFunctionExplain(dep, `${indent} `); - } - } - console.log(); + if (r.internal.length > 0) { + console.log(`\n## Internal`); + for (const s of r.internal) { + const sig = s.signature?.params != null ? `(${s.signature.params})` : ''; + const roleTag = s.role ? ` [${s.role}]` : ''; + const summary = s.summary ? ` -- ${s.summary}` : ''; + console.log(` ${kindIcon(s.kind)} ${s.name}${sig}${roleTag} :${s.line}${summary}`); + } + } + + if (r.dataFlow.length > 0) { + console.log(`\n## Data Flow`); + for (const df of r.dataFlow) { + console.log(` ${df.caller} -> ${df.callees.join(', ')}`); + } + } + console.log(); +} + +function renderFunctionExplain(r, indent = '') { + const lineRange = r.endLine ? `${r.line}-${r.endLine}` : `${r.line}`; + const lineInfo = r.lineCount ? `${r.lineCount} lines` : ''; + const summaryPart = r.summary ? ` | ${r.summary}` : ''; + const roleTag = r.role ? ` [${r.role}]` : ''; + const depthLevel = r._depth || 0; + const heading = depthLevel === 0 ? '#' : '##'.padEnd(depthLevel + 2, '#'); + console.log(`\n${indent}${heading} ${r.name} (${r.kind})${roleTag} ${r.file}:${lineRange}`); + if (lineInfo || r.summary) { + console.log(`${indent} ${lineInfo}${summaryPart}`); + } + if (r.signature) { + if (r.signature.params != null) console.log(`${indent} Parameters: (${r.signature.params})`); + if (r.signature.returnType) console.log(`${indent} Returns: ${r.signature.returnType}`); + } + + if (r.complexity) { + const cx = r.complexity; + const miPart = cx.maintainabilityIndex ? ` MI=${cx.maintainabilityIndex}` : ''; + console.log( + `${indent} Complexity: cognitive=${cx.cognitive} cyclomatic=${cx.cyclomatic} nesting=${cx.maxNesting}${miPart}`, + ); + } + + if (r.callees.length > 0) { + console.log(`\n${indent} Calls (${r.callees.length}):`); + for (const c of r.callees) { + console.log(`${indent} ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}`); + } + } + + if (r.callers.length > 0) { + console.log(`\n${indent} Called by (${r.callers.length}):`); + for (const c of r.callers) { + console.log(`${indent} ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}`); + } + } + + if (r.relatedTests.length > 0) { + const label = r.relatedTests.length === 1 ? 'file' : 'files'; + console.log(`\n${indent} Tests (${r.relatedTests.length} ${label}):`); + for (const t of r.relatedTests) { + console.log(`${indent} ${t.file}`); } + } + + if (r.callees.length === 0 && r.callers.length === 0) { + console.log(`${indent} (no call edges found -- may be invoked dynamically or via re-exports)`); + } + if (r.depDetails && r.depDetails.length > 0) { + console.log(`\n${indent} --- Dependencies (depth ${depthLevel + 1}) ---`); + for (const dep of r.depDetails) { + renderFunctionExplain(dep, `${indent} `); + } + } + console.log(); +} + +export function explain(target, customDbPath, opts = {}) { + const data = explainData(target, customDbPath, opts); + if (outputResult(data, 'results', opts)) return; + + if (data.results.length === 0) { + console.log(`No ${data.kind === 'file' ? 'file' : 'function/symbol'} matching "${target}"`); + return; + } + + if (data.kind === 'file') { + for (const r of data.results) { + renderFileExplain(r); + } + } else { for (const r of data.results) { - printFunctionExplain(r); + renderFunctionExplain(r); } } } From ae805d5fc65a45b36532cea6b50ab3fa67f27c0f Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 03:39:44 -0600 Subject: [PATCH 19/21] refactor: decompose stats() into section printers Impact: 12 functions changed, 6 affected --- src/presentation/queries-cli/overview.js | 115 +++++++++++------------ 1 file changed, 57 insertions(+), 58 deletions(-) diff --git a/src/presentation/queries-cli/overview.js b/src/presentation/queries-cli/overview.js index 88409da2..29a4f6e9 100644 --- a/src/presentation/queries-cli/overview.js +++ b/src/presentation/queries-cli/overview.js @@ -2,64 +2,42 @@ import path from 'node:path'; import { kindIcon, moduleMapData, rolesData, statsData } from '../../domain/queries.js'; import { outputResult } from '../../infrastructure/result-formatter.js'; -export async function stats(customDbPath, opts = {}) { - const data = statsData(customDbPath, { noTests: opts.noTests }); - - // Community detection summary (async import for lazy-loading) - try { - const { communitySummaryForStats } = await import('../../features/communities.js'); - data.communities = communitySummaryForStats(customDbPath, { noTests: opts.noTests }); - } catch { - /* graphology may not be available */ - } - - if (outputResult(data, null, opts)) return; - - // Human-readable output - console.log('\n# Codegraph Stats\n'); - - // Nodes - console.log(`Nodes: ${data.nodes.total} total`); - const kindEntries = Object.entries(data.nodes.byKind).sort((a, b) => b[1] - a[1]); - const kindParts = kindEntries.map(([k, v]) => `${k} ${v}`); - for (let i = 0; i < kindParts.length; i += 3) { - const row = kindParts +function printCountGrid(entries, padWidth) { + const parts = entries.map(([k, v]) => `${k} ${v}`); + for (let i = 0; i < parts.length; i += 3) { + const row = parts .slice(i, i + 3) - .map((p) => p.padEnd(18)) + .map((p) => p.padEnd(padWidth)) .join(''); console.log(` ${row}`); } +} - // Edges +function printNodes(data) { + console.log(`Nodes: ${data.nodes.total} total`); + const kindEntries = Object.entries(data.nodes.byKind).sort((a, b) => b[1] - a[1]); + printCountGrid(kindEntries, 18); +} + +function printEdges(data) { console.log(`\nEdges: ${data.edges.total} total`); const edgeEntries = Object.entries(data.edges.byKind).sort((a, b) => b[1] - a[1]); - const edgeParts = edgeEntries.map(([k, v]) => `${k} ${v}`); - for (let i = 0; i < edgeParts.length; i += 3) { - const row = edgeParts - .slice(i, i + 3) - .map((p) => p.padEnd(18)) - .join(''); - console.log(` ${row}`); - } + printCountGrid(edgeEntries, 18); +} - // Files +function printFiles(data) { console.log(`\nFiles: ${data.files.total} (${data.files.languages} languages)`); const langEntries = Object.entries(data.files.byLanguage).sort((a, b) => b[1] - a[1]); - const langParts = langEntries.map(([k, v]) => `${k} ${v}`); - for (let i = 0; i < langParts.length; i += 3) { - const row = langParts - .slice(i, i + 3) - .map((p) => p.padEnd(18)) - .join(''); - console.log(` ${row}`); - } + printCountGrid(langEntries, 18); +} - // Cycles +function printCycles(data) { console.log( `\nCycles: ${data.cycles.fileLevel} file-level, ${data.cycles.functionLevel} function-level`, ); +} - // Hotspots +function printHotspots(data) { if (data.hotspots.length > 0) { console.log(`\nTop ${data.hotspots.length} coupling hotspots:`); for (let i = 0; i < data.hotspots.length; i++) { @@ -69,8 +47,9 @@ export async function stats(customDbPath, opts = {}) { ); } } +} - // Embeddings +function printEmbeddings(data) { if (data.embeddings) { const e = data.embeddings; console.log( @@ -79,8 +58,9 @@ export async function stats(customDbPath, opts = {}) { } else { console.log('\nEmbeddings: not built'); } +} - // Quality +function printQuality(data) { if (data.quality) { const q = data.quality; const cc = q.callerCoverage; @@ -99,24 +79,18 @@ export async function stats(customDbPath, opts = {}) { } } } +} - // Roles +function printRoles(data) { if (data.roles && Object.keys(data.roles).length > 0) { const total = Object.values(data.roles).reduce((a, b) => a + b, 0); console.log(`\nRoles: ${total} classified symbols`); - const roleParts = Object.entries(data.roles) - .sort((a, b) => b[1] - a[1]) - .map(([k, v]) => `${k} ${v}`); - for (let i = 0; i < roleParts.length; i += 3) { - const row = roleParts - .slice(i, i + 3) - .map((p) => p.padEnd(18)) - .join(''); - console.log(` ${row}`); - } + const roleEntries = Object.entries(data.roles).sort((a, b) => b[1] - a[1]); + printCountGrid(roleEntries, 18); } +} - // Complexity +function printComplexity(data) { if (data.complexity) { const cx = data.complexity; const miPart = cx.avgMI != null ? ` | avg MI: ${cx.avgMI} | min MI: ${cx.minMI}` : ''; @@ -124,15 +98,40 @@ export async function stats(customDbPath, opts = {}) { `\nComplexity: ${cx.analyzed} functions | avg cognitive: ${cx.avgCognitive} | avg cyclomatic: ${cx.avgCyclomatic} | max cognitive: ${cx.maxCognitive}${miPart}`, ); } +} - // Communities +function printCommunities(data) { if (data.communities) { const cm = data.communities; console.log( `\nCommunities: ${cm.communityCount} detected | modularity: ${cm.modularity} | drift: ${cm.driftScore}%`, ); } +} + +export async function stats(customDbPath, opts = {}) { + const data = statsData(customDbPath, { noTests: opts.noTests }); + + try { + const { communitySummaryForStats } = await import('../../features/communities.js'); + data.communities = communitySummaryForStats(customDbPath, { noTests: opts.noTests }); + } catch { + /* graphology may not be available */ + } + if (outputResult(data, null, opts)) return; + + console.log('\n# Codegraph Stats\n'); + printNodes(data); + printEdges(data); + printFiles(data); + printCycles(data); + printHotspots(data); + printEmbeddings(data); + printQuality(data); + printRoles(data); + printComplexity(data); + printCommunities(data); console.log(); } From 3aa2e4b69d20e8798c9bd8b4892812c87d11bce2 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 03:53:11 -0600 Subject: [PATCH 20/21] fix: address quality issues in features (boundaries, communities, triage) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract per-section validators from validateBoundaryConfig (cog 101→2). Extract buildCommunityObjects and analyzeDrift from communitiesData (cog 32→4). Extract buildTriageItems and computeTriageSummary from triageData (bugs 1.4→0.48). Impact: 13 functions changed, 11 affected --- src/features/boundaries.js | 181 +++++++++++++++++++----------------- src/features/communities.js | 121 +++++++++++++----------- src/features/triage.js | 151 ++++++++++++++++-------------- 3 files changed, 249 insertions(+), 204 deletions(-) diff --git a/src/features/boundaries.js b/src/features/boundaries.js index 7a357ebd..536dbafa 100644 --- a/src/features/boundaries.js +++ b/src/features/boundaries.js @@ -94,104 +94,119 @@ export function resolveModules(boundaryConfig) { // ─── Validation ────────────────────────────────────────────────────── /** - * Validate a boundary configuration object. - * @param {object} config - The `manifesto.boundaries` config - * @returns {{ valid: boolean, errors: string[] }} + * Validate the `modules` section of a boundary config. + * @param {object} modules + * @param {string[]} errors - Mutated: push any validation errors */ -export function validateBoundaryConfig(config) { - const errors = []; +function validateModules(modules, errors) { + if (!modules || typeof modules !== 'object' || Object.keys(modules).length === 0) { + errors.push('boundaries.modules must be a non-empty object'); + return; + } + for (const [name, value] of Object.entries(modules)) { + if (typeof value === 'string') continue; + if (value && typeof value === 'object' && typeof value.match === 'string') continue; + errors.push(`boundaries.modules.${name}: must be a glob string or { match: "" }`); + } +} - if (!config || typeof config !== 'object') { - return { valid: false, errors: ['boundaries config must be an object'] }; +/** + * Validate the `preset` field of a boundary config. + * @param {string|null|undefined} preset + * @param {string[]} errors - Mutated: push any validation errors + */ +function validatePreset(preset, errors) { + if (preset == null) return; + if (typeof preset !== 'string' || !PRESETS[preset]) { + errors.push( + `boundaries.preset: must be one of ${Object.keys(PRESETS).join(', ')} (got "${preset}")`, + ); } +} - // Validate modules - if ( - !config.modules || - typeof config.modules !== 'object' || - Object.keys(config.modules).length === 0 - ) { - errors.push('boundaries.modules must be a non-empty object'); - } else { - for (const [name, value] of Object.entries(config.modules)) { - if (typeof value === 'string') continue; - if (value && typeof value === 'object' && typeof value.match === 'string') continue; - errors.push(`boundaries.modules.${name}: must be a glob string or { match: "" }`); +/** + * Validate a single rule's target list (`notTo` or `onlyTo`). + * @param {*} list - The target list value + * @param {string} field - "notTo" or "onlyTo" + * @param {number} idx - Rule index for error messages + * @param {Set} moduleNames + * @param {string[]} errors - Mutated + */ +function validateTargetList(list, field, idx, moduleNames, errors) { + if (!Array.isArray(list)) { + errors.push(`boundaries.rules[${idx}]: "${field}" must be an array`); + return; + } + for (const target of list) { + if (!moduleNames.has(target)) { + errors.push(`boundaries.rules[${idx}]: "${field}" references unknown module "${target}"`); } } +} - // Validate preset - if (config.preset != null) { - if (typeof config.preset !== 'string' || !PRESETS[config.preset]) { - errors.push( - `boundaries.preset: must be one of ${Object.keys(PRESETS).join(', ')} (got "${config.preset}")`, - ); +/** + * Validate the `rules` array of a boundary config. + * @param {Array} rules + * @param {object|undefined} modules - The modules config (for cross-referencing names) + * @param {string[]} errors - Mutated + */ +function validateRules(rules, modules, errors) { + if (!rules) return; + if (!Array.isArray(rules)) { + errors.push('boundaries.rules must be an array'); + return; + } + const moduleNames = modules ? new Set(Object.keys(modules)) : new Set(); + for (let i = 0; i < rules.length; i++) { + const rule = rules[i]; + if (!rule.from) { + errors.push(`boundaries.rules[${i}]: missing "from" field`); + } else if (!moduleNames.has(rule.from)) { + errors.push(`boundaries.rules[${i}]: "from" references unknown module "${rule.from}"`); + } + if (rule.notTo && rule.onlyTo) { + errors.push(`boundaries.rules[${i}]: cannot have both "notTo" and "onlyTo"`); + } + if (!rule.notTo && !rule.onlyTo) { + errors.push(`boundaries.rules[${i}]: must have either "notTo" or "onlyTo"`); } + if (rule.notTo) validateTargetList(rule.notTo, 'notTo', i, moduleNames, errors); + if (rule.onlyTo) validateTargetList(rule.onlyTo, 'onlyTo', i, moduleNames, errors); } +} - // Validate rules - if (config.rules) { - if (!Array.isArray(config.rules)) { - errors.push('boundaries.rules must be an array'); - } else { - const moduleNames = config.modules ? new Set(Object.keys(config.modules)) : new Set(); - for (let i = 0; i < config.rules.length; i++) { - const rule = config.rules[i]; - if (!rule.from) { - errors.push(`boundaries.rules[${i}]: missing "from" field`); - } else if (!moduleNames.has(rule.from)) { - errors.push(`boundaries.rules[${i}]: "from" references unknown module "${rule.from}"`); - } - if (rule.notTo && rule.onlyTo) { - errors.push(`boundaries.rules[${i}]: cannot have both "notTo" and "onlyTo"`); - } - if (!rule.notTo && !rule.onlyTo) { - errors.push(`boundaries.rules[${i}]: must have either "notTo" or "onlyTo"`); - } - if (rule.notTo) { - if (!Array.isArray(rule.notTo)) { - errors.push(`boundaries.rules[${i}]: "notTo" must be an array`); - } else { - for (const target of rule.notTo) { - if (!moduleNames.has(target)) { - errors.push( - `boundaries.rules[${i}]: "notTo" references unknown module "${target}"`, - ); - } - } - } - } - if (rule.onlyTo) { - if (!Array.isArray(rule.onlyTo)) { - errors.push(`boundaries.rules[${i}]: "onlyTo" must be an array`); - } else { - for (const target of rule.onlyTo) { - if (!moduleNames.has(target)) { - errors.push( - `boundaries.rules[${i}]: "onlyTo" references unknown module "${target}"`, - ); - } - } - } - } - } +/** + * Validate that module layer assignments match preset layers. + * @param {object} config + * @param {string[]} errors - Mutated + */ +function validateLayerAssignments(config, errors) { + if (!config.preset || !PRESETS[config.preset] || !config.modules) return; + const presetLayers = new Set(PRESETS[config.preset].layers); + for (const [name, value] of Object.entries(config.modules)) { + if (typeof value === 'object' && value.layer && !presetLayers.has(value.layer)) { + errors.push( + `boundaries.modules.${name}: layer "${value.layer}" not in preset "${config.preset}" (valid: ${[...presetLayers].join(', ')})`, + ); } } +} - // Validate preset + layer assignments - if (config.preset && PRESETS[config.preset] && config.modules) { - const presetLayers = new Set(PRESETS[config.preset].layers); - for (const [name, value] of Object.entries(config.modules)) { - if (typeof value === 'object' && value.layer) { - if (!presetLayers.has(value.layer)) { - errors.push( - `boundaries.modules.${name}: layer "${value.layer}" not in preset "${config.preset}" (valid: ${[...presetLayers].join(', ')})`, - ); - } - } - } +/** + * Validate a boundary configuration object. + * @param {object} config - The `manifesto.boundaries` config + * @returns {{ valid: boolean, errors: string[] }} + */ +export function validateBoundaryConfig(config) { + if (!config || typeof config !== 'object') { + return { valid: false, errors: ['boundaries config must be an object'] }; } + const errors = []; + validateModules(config.modules, errors); + validatePreset(config.preset, errors); + validateRules(config.rules, config.modules, errors); + validateLayerAssignments(config, errors); return { valid: errors.length === 0, errors }; } diff --git a/src/features/communities.js b/src/features/communities.js index 062a89b5..f850dc8d 100644 --- a/src/features/communities.js +++ b/src/features/communities.js @@ -11,48 +11,18 @@ function getDirectory(filePath) { return dir === '.' ? '(root)' : dir; } -// ─── Core Analysis ──────────────────────────────────────────────────── +// ─── Community Building ────────────────────────────────────────────── /** - * Run Louvain community detection and return structured data. - * - * @param {string} [customDbPath] - Path to graph.db - * @param {object} [opts] - * @param {boolean} [opts.functions] - Function-level instead of file-level - * @param {number} [opts.resolution] - Louvain resolution (default 1.0) - * @param {boolean} [opts.noTests] - Exclude test files - * @param {boolean} [opts.drift] - Drift-only mode (omit community member lists) - * @param {boolean} [opts.json] - JSON output (used by CLI wrapper only) - * @returns {{ communities: object[], modularity: number, drift: object, summary: object }} + * Group graph nodes by Louvain community assignment and build structured objects. + * @param {object} graph - The dependency graph + * @param {Map} assignments - Node key → community ID + * @param {object} opts + * @param {boolean} [opts.drift] - If true, omit member lists + * @returns {{ communities: object[], communityDirs: Map> }} */ -export function communitiesData(customDbPath, opts = {}) { - const { repo, close } = openRepo(customDbPath, opts); - let graph; - try { - graph = buildDependencyGraph(repo, { - fileLevel: !opts.functions, - noTests: opts.noTests, - }); - } finally { - close(); - } - - // Handle empty or trivial graphs - if (graph.nodeCount === 0 || graph.edgeCount === 0) { - return { - communities: [], - modularity: 0, - drift: { splitCandidates: [], mergeCandidates: [] }, - summary: { communityCount: 0, modularity: 0, nodeCount: graph.nodeCount, driftScore: 0 }, - }; - } - - // Run Louvain - const resolution = opts.resolution ?? 1.0; - const { assignments, modularity } = louvainCommunities(graph, { resolution }); - - // Group nodes by community - const communityMap = new Map(); // community id → node keys[] +function buildCommunityObjects(graph, assignments, opts) { + const communityMap = new Map(); for (const [key] of graph.nodes()) { const cid = assignments.get(key); if (cid == null) continue; @@ -60,9 +30,8 @@ export function communitiesData(customDbPath, opts = {}) { communityMap.get(cid).push(key); } - // Build community objects const communities = []; - const communityDirs = new Map(); // community id → Set + const communityDirs = new Map(); for (const [cid, members] of communityMap) { const dirCounts = {}; @@ -88,19 +57,27 @@ export function communitiesData(customDbPath, opts = {}) { }); } - // Sort by size descending communities.sort((a, b) => b.size - a.size); + return { communities, communityDirs }; +} - // ─── Drift Analysis ───────────────────────────────────────────── +// ─── Drift Analysis ────────────────────────────────────────────────── - // Split candidates: directories with members in 2+ communities - const dirToCommunities = new Map(); // dir → Set +/** + * Compute split/merge candidates and drift score from community directory data. + * @param {object[]} communities - Community objects with `directories` + * @param {Map>} communityDirs - Community ID → directory set + * @returns {{ splitCandidates: object[], mergeCandidates: object[], driftScore: number }} + */ +function analyzeDrift(communities, communityDirs) { + const dirToCommunities = new Map(); for (const [cid, dirs] of communityDirs) { for (const dir of dirs) { if (!dirToCommunities.has(dir)) dirToCommunities.set(dir, new Set()); dirToCommunities.get(dir).add(cid); } } + const splitCandidates = []; for (const [dir, cids] of dirToCommunities) { if (cids.size >= 2) { @@ -109,7 +86,6 @@ export function communitiesData(customDbPath, opts = {}) { } splitCandidates.sort((a, b) => b.communityCount - a.communityCount); - // Merge candidates: communities spanning 2+ directories const mergeCandidates = []; for (const c of communities) { const dirCount = Object.keys(c.directories).length; @@ -124,17 +100,56 @@ export function communitiesData(customDbPath, opts = {}) { } mergeCandidates.sort((a, b) => b.directoryCount - a.directoryCount); - // Drift score: 0-100 based on how much directory structure diverges from communities const totalDirs = dirToCommunities.size; - const splitDirs = splitCandidates.length; - const splitRatio = totalDirs > 0 ? splitDirs / totalDirs : 0; - + const splitRatio = totalDirs > 0 ? splitCandidates.length / totalDirs : 0; const totalComms = communities.length; - const mergeComms = mergeCandidates.length; - const mergeRatio = totalComms > 0 ? mergeComms / totalComms : 0; - + const mergeRatio = totalComms > 0 ? mergeCandidates.length / totalComms : 0; const driftScore = Math.round(((splitRatio + mergeRatio) / 2) * 100); + return { splitCandidates, mergeCandidates, driftScore }; +} + +// ─── Core Analysis ──────────────────────────────────────────────────── + +/** + * Run Louvain community detection and return structured data. + * + * @param {string} [customDbPath] - Path to graph.db + * @param {object} [opts] + * @param {boolean} [opts.functions] - Function-level instead of file-level + * @param {number} [opts.resolution] - Louvain resolution (default 1.0) + * @param {boolean} [opts.noTests] - Exclude test files + * @param {boolean} [opts.drift] - Drift-only mode (omit community member lists) + * @param {boolean} [opts.json] - JSON output (used by CLI wrapper only) + * @returns {{ communities: object[], modularity: number, drift: object, summary: object }} + */ +export function communitiesData(customDbPath, opts = {}) { + const { repo, close } = openRepo(customDbPath, opts); + let graph; + try { + graph = buildDependencyGraph(repo, { + fileLevel: !opts.functions, + noTests: opts.noTests, + }); + } finally { + close(); + } + + if (graph.nodeCount === 0 || graph.edgeCount === 0) { + return { + communities: [], + modularity: 0, + drift: { splitCandidates: [], mergeCandidates: [] }, + summary: { communityCount: 0, modularity: 0, nodeCount: graph.nodeCount, driftScore: 0 }, + }; + } + + const resolution = opts.resolution ?? 1.0; + const { assignments, modularity } = louvainCommunities(graph, { resolution }); + + const { communities, communityDirs } = buildCommunityObjects(graph, assignments, opts); + const { splitCandidates, mergeCandidates, driftScore } = analyzeDrift(communities, communityDirs); + const base = { communities: opts.drift ? [] : communities, modularity: +modularity.toFixed(4), diff --git a/src/features/triage.js b/src/features/triage.js index 00b35ccd..8c23875a 100644 --- a/src/features/triage.js +++ b/src/features/triage.js @@ -4,8 +4,83 @@ import { warn } from '../infrastructure/logger.js'; import { isTestFile } from '../infrastructure/test-filter.js'; import { paginateResult } from '../shared/paginate.js'; +// ─── Scoring ───────────────────────────────────────────────────────── + +const SORT_FNS = { + risk: (a, b) => b.riskScore - a.riskScore, + complexity: (a, b) => b.cognitive - a.cognitive, + churn: (a, b) => b.churn - a.churn, + 'fan-in': (a, b) => b.fanIn - a.fanIn, + mi: (a, b) => a.maintainabilityIndex - b.maintainabilityIndex, +}; + +/** + * Build scored triage items from raw rows and risk metrics. + * @param {object[]} rows - Raw DB rows + * @param {object[]} riskMetrics - Per-row risk metric objects from scoreRisk + * @returns {object[]} + */ +function buildTriageItems(rows, riskMetrics) { + return rows.map((r, i) => ({ + name: r.name, + kind: r.kind, + file: r.file, + line: r.line, + role: r.role || null, + fanIn: r.fan_in, + cognitive: r.cognitive, + churn: r.churn, + maintainabilityIndex: r.mi, + normFanIn: riskMetrics[i].normFanIn, + normComplexity: riskMetrics[i].normComplexity, + normChurn: riskMetrics[i].normChurn, + normMI: riskMetrics[i].normMI, + roleWeight: riskMetrics[i].roleWeight, + riskScore: riskMetrics[i].riskScore, + })); +} + +/** + * Compute signal coverage and summary statistics. + * @param {object[]} filtered - All filtered rows + * @param {object[]} scored - Scored and filtered items + * @param {object} weights - Active weights + * @returns {object} + */ +function computeTriageSummary(filtered, scored, weights) { + const signalCoverage = { + complexity: round4(filtered.filter((r) => r.cognitive > 0).length / filtered.length), + churn: round4(filtered.filter((r) => r.churn > 0).length / filtered.length), + fanIn: round4(filtered.filter((r) => r.fan_in > 0).length / filtered.length), + mi: round4(filtered.filter((r) => r.mi > 0).length / filtered.length), + }; + + const scores = scored.map((it) => it.riskScore); + const avgScore = + scores.length > 0 ? round4(scores.reduce((a, b) => a + b, 0) / scores.length) : 0; + const maxScore = scores.length > 0 ? round4(Math.max(...scores)) : 0; + + return { + total: filtered.length, + analyzed: scored.length, + avgScore, + maxScore, + weights, + signalCoverage, + }; +} + // ─── Data Function ──────────────────────────────────────────────────── +const EMPTY_SUMMARY = (weights) => ({ + total: 0, + analyzed: 0, + avgScore: 0, + maxScore: 0, + weights, + signalCoverage: {}, +}); + /** * Compute composite risk scores for all symbols. * @@ -17,9 +92,6 @@ export function triageData(customDbPath, opts = {}) { const { repo, close } = openRepo(customDbPath, opts); try { const noTests = opts.noTests || false; - const fileFilter = opts.file || null; - const kindFilter = opts.kind || null; - const roleFilter = opts.role || null; const minScore = opts.minScore != null ? Number(opts.minScore) : null; const sort = opts.sort || 'risk'; const weights = { ...DEFAULT_WEIGHTS, ...(opts.weights || {}) }; @@ -28,86 +100,29 @@ export function triageData(customDbPath, opts = {}) { try { rows = repo.findNodesForTriage({ noTests, - file: fileFilter, - kind: kindFilter, - role: roleFilter, + file: opts.file || null, + kind: opts.kind || null, + role: opts.role || null, }); } catch (err) { warn(`triage query failed: ${err.message}`); - return { - items: [], - summary: { total: 0, analyzed: 0, avgScore: 0, maxScore: 0, weights, signalCoverage: {} }, - }; + return { items: [], summary: EMPTY_SUMMARY(weights) }; } - // Post-filter test files (belt-and-suspenders) const filtered = noTests ? rows.filter((r) => !isTestFile(r.file)) : rows; - if (filtered.length === 0) { - return { - items: [], - summary: { total: 0, analyzed: 0, avgScore: 0, maxScore: 0, weights, signalCoverage: {} }, - }; + return { items: [], summary: EMPTY_SUMMARY(weights) }; } - // Delegate scoring to classifier const riskMetrics = scoreRisk(filtered, weights); + const items = buildTriageItems(filtered, riskMetrics); - // Compute risk scores - const items = filtered.map((r, i) => ({ - name: r.name, - kind: r.kind, - file: r.file, - line: r.line, - role: r.role || null, - fanIn: r.fan_in, - cognitive: r.cognitive, - churn: r.churn, - maintainabilityIndex: r.mi, - normFanIn: riskMetrics[i].normFanIn, - normComplexity: riskMetrics[i].normComplexity, - normChurn: riskMetrics[i].normChurn, - normMI: riskMetrics[i].normMI, - roleWeight: riskMetrics[i].roleWeight, - riskScore: riskMetrics[i].riskScore, - })); - - // Apply minScore filter const scored = minScore != null ? items.filter((it) => it.riskScore >= minScore) : items; - - // Sort - const sortFns = { - risk: (a, b) => b.riskScore - a.riskScore, - complexity: (a, b) => b.cognitive - a.cognitive, - churn: (a, b) => b.churn - a.churn, - 'fan-in': (a, b) => b.fanIn - a.fanIn, - mi: (a, b) => a.maintainabilityIndex - b.maintainabilityIndex, - }; - scored.sort(sortFns[sort] || sortFns.risk); - - // Signal coverage: % of items with non-zero signal - const signalCoverage = { - complexity: round4(filtered.filter((r) => r.cognitive > 0).length / filtered.length), - churn: round4(filtered.filter((r) => r.churn > 0).length / filtered.length), - fanIn: round4(filtered.filter((r) => r.fan_in > 0).length / filtered.length), - mi: round4(filtered.filter((r) => r.mi > 0).length / filtered.length), - }; - - const scores = scored.map((it) => it.riskScore); - const avgScore = - scores.length > 0 ? round4(scores.reduce((a, b) => a + b, 0) / scores.length) : 0; - const maxScore = scores.length > 0 ? round4(Math.max(...scores)) : 0; + scored.sort(SORT_FNS[sort] || SORT_FNS.risk); const result = { items: scored, - summary: { - total: filtered.length, - analyzed: scored.length, - avgScore, - maxScore, - weights, - signalCoverage, - }, + summary: computeTriageSummary(filtered, scored, weights), }; return paginateResult(result, 'items', { From 330d33aea3cb5e9182e062019737e9a4b0088351 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 06:18:03 -0600 Subject: [PATCH 21/21] fix: include remaining merge changes from main --- docs/examples/claude-code-skills/README.md | 31 ++++++++++++-------- src/cli/commands/roles.js | 3 +- src/mcp/tool-registry.js | 2 +- src/shared/kinds.js | 2 +- tests/graph/classifiers/roles.test.js | 25 ++++++++++++++++ tests/integration/roles.test.js | 34 ++++++++++++++++++++++ tests/unit/roles.test.js | 10 ++++++- 7 files changed, 91 insertions(+), 16 deletions(-) diff --git a/docs/examples/claude-code-skills/README.md b/docs/examples/claude-code-skills/README.md index 1c38adb2..ed25abab 100644 --- a/docs/examples/claude-code-skills/README.md +++ b/docs/examples/claude-code-skills/README.md @@ -22,6 +22,12 @@ A single AI agent cannot hold an entire large codebase in context. The Titan Par /titan-sync → sync.json (execution plan) │ ▼ +/titan-forge → executes sync.json (one phase per invocation, resumable) + │ ├─ codegraph context/fn-impact before each change + │ ├─ /titan-gate validates each commit + │ └─ advances titan-state.json + │ + ▼ /titan-gate (validates each commit: codegraph + lint/build/test) /titan-reset (escape hatch: clean up everything) @@ -34,6 +40,7 @@ A single AI agent cannot hold an entire large codebase in context. The Titan Par | `/titan-recon` | RECON | Builds graph + embeddings, complexity health baseline, domains, priority queue, work batches, `GLOBAL_ARCH.md`, baseline snapshot | `titan-state.json` | | `/titan-gauntlet` | GAUNTLET | 4-pillar audit (17 rules) using full codegraph metrics (`cognitive`, `cyclomatic`, `halstead.bugs`, `halstead.effort`, `mi`, `loc.sloc`). Batches of 5, NDJSON writes, session resume | `gauntlet.ndjson` | | `/titan-sync` | GLOBAL SYNC | Dependency clusters, code ownership, shared abstractions, ordered execution plan with logical commits | `sync.json` | +| `/titan-forge` | FORGE | Executes `sync.json` one phase at a time — makes code changes, validates with `/titan-gate`, commits, tracks progress. Resumable across sessions | `titan-state.json` (execution block) | | `/titan-gate` | STATE MACHINE | `codegraph check --staged --cycles --blast-radius 30 --boundaries` + lint/build/test. Snapshot restore on failure | `gate-log.ndjson` | | `/titan-reset` | ESCAPE HATCH | Restores baseline snapshot, deletes all artifacts and snapshots, rebuilds graph | — | @@ -62,8 +69,8 @@ codegraph build . /titan-recon # Map the codebase, produce priority queue + embeddings /titan-gauntlet 5 # Audit top targets in batches of 5 /titan-sync # Plan shared abstractions and execution order -# ... make changes based on sync plan ... -/titan-gate # Validate before each commit +/titan-forge # Execute next phase (re-run for each phase) + # (calls /titan-gate automatically per commit) ``` If GAUNTLET runs out of context, just re-invoke `/titan-gauntlet` — it resumes from the next pending batch. @@ -73,6 +80,7 @@ If GAUNTLET runs out of context, just re-invoke `/titan-gauntlet` — it resumes - `/titan-recon` always works standalone (builds graph fresh) - `/titan-gauntlet` falls back to `codegraph triage` if no RECON artifact exists - `/titan-sync` requires GAUNTLET artifacts (warns if missing) +- `/titan-forge` requires SYNC artifacts (`sync.json`); supports `--phase N`, `--target `, `--dry-run` - `/titan-gate` works with or without prior artifacts (uses default thresholds) - `/titan-reset` cleans up everything — use when you want to start over @@ -83,11 +91,10 @@ If GAUNTLET runs out of context, just re-invoke `/titan-gauntlet` — it resumes /titan-gauntlet # Once (or multiple sessions): audit everything /titan-sync # Once: plan the work -# Then for each fix: -# 1. Make changes based on sync plan -# 2. Stage changes -/titan-gate # Validate -# 3. Commit if PASS +# Then for each phase: +/titan-forge # Executes one phase, validates, commits +/titan-forge # Re-run for next phase +/titan-forge # ...until all phases complete ``` ## Artifacts @@ -100,7 +107,7 @@ All artifacts are written to `.codegraph/titan/` (6 files, no redundancy): | `GLOBAL_ARCH.md` | Markdown | RECON | GAUNTLET, SYNC | | `gauntlet.ndjson` | NDJSON | GAUNTLET | SYNC | | `gauntlet-summary.json` | JSON | GAUNTLET | SYNC, GATE | -| `sync.json` | JSON | SYNC | GATE | +| `sync.json` | JSON | SYNC | FORGE, GATE | | `gate-log.ndjson` | NDJSON | GATE | Audit trail | NDJSON format (one JSON object per line) means partial results survive crashes mid-batch. @@ -170,21 +177,21 @@ All skills enforce worktree isolation as their first step. If invoked from the m | `codegraph communities` | RECON | Module boundaries and drift | | `codegraph roles` | RECON, GAUNTLET | Core/dead/entry symbol classification | | `codegraph structure` | RECON | Directory cohesion | -| `codegraph complexity --health` | RECON, GAUNTLET, GATE | Full metrics: cognitive, cyclomatic, nesting, Halstead, MI | +| `codegraph complexity --health` | RECON, GAUNTLET, GATE, FORGE | Full metrics: cognitive, cyclomatic, nesting, Halstead, MI | | `codegraph complexity --above-threshold` | RECON | Only functions exceeding thresholds | | `codegraph batch complexity` | GAUNTLET | Multi-target complexity in one call | | `codegraph batch context` | GAUNTLET | Multi-target context in one call | | `codegraph check --staged --cycles --blast-radius --boundaries` | GATE | Full validation predicates | | `codegraph ast --kind call\|await\|string` | GAUNTLET | AST pattern detection | | `codegraph dataflow` | GAUNTLET | Data flow and mutation analysis | -| `codegraph exports` | GAUNTLET | Per-symbol export consumers | -| `codegraph fn-impact` | GAUNTLET, SYNC | Blast radius | +| `codegraph exports` | GAUNTLET, FORGE | Per-symbol export consumers | +| `codegraph fn-impact` | GAUNTLET, SYNC, FORGE | Blast radius | | `codegraph search` | GAUNTLET | Duplicate code detection (needs embeddings) | | `codegraph co-change` | GAUNTLET, SYNC | Git history coupling | | `codegraph path` | SYNC | Dependency paths between targets | | `codegraph cycles` | SYNC, GATE | Circular dependency detection | | `codegraph deps` | SYNC | File-level dependency map | -| `codegraph context` | SYNC | Full function context | +| `codegraph context` | SYNC, FORGE | Full function context | | `codegraph owners` | SYNC | CODEOWNERS mapping for cross-team coordination | | `codegraph branch-compare` | SYNC, GATE | Structural diff between refs | | `codegraph diff-impact` | GATE | Impact of staged changes | diff --git a/src/cli/commands/roles.js b/src/cli/commands/roles.js index df756333..cb5a66c1 100644 --- a/src/cli/commands/roles.js +++ b/src/cli/commands/roles.js @@ -3,7 +3,8 @@ import { roles } from '../../presentation/queries-cli.js'; export const command = { name: 'roles', - description: 'Show node role classification: entry, core, utility, adapter, dead, leaf', + description: + 'Show node role classification: entry, core, utility, adapter, dead, test-only, leaf', options: [ ['-d, --db ', 'Path to graph.db'], ['--role ', `Filter by role (${VALID_ROLES.join(', ')})`], diff --git a/src/mcp/tool-registry.js b/src/mcp/tool-registry.js index c81baee8..08a2d260 100644 --- a/src/mcp/tool-registry.js +++ b/src/mcp/tool-registry.js @@ -362,7 +362,7 @@ const BASE_TOOLS = [ { name: 'node_roles', description: - 'Show node role classification (entry, core, utility, adapter, dead, leaf) based on connectivity patterns', + 'Show node role classification (entry, core, utility, adapter, dead, test-only, leaf) based on connectivity patterns', inputSchema: { type: 'object', properties: { diff --git a/src/shared/kinds.js b/src/shared/kinds.js index 3f469c43..498ad210 100644 --- a/src/shared/kinds.js +++ b/src/shared/kinds.js @@ -47,4 +47,4 @@ export const STRUCTURAL_EDGE_KINDS = ['parameter_of', 'receiver']; // Full set for MCP enum and validation export const EVERY_EDGE_KIND = [...CORE_EDGE_KINDS, ...STRUCTURAL_EDGE_KINDS]; -export const VALID_ROLES = ['entry', 'core', 'utility', 'adapter', 'dead', 'leaf']; +export const VALID_ROLES = ['entry', 'core', 'utility', 'adapter', 'dead', 'test-only', 'leaf']; diff --git a/tests/graph/classifiers/roles.test.js b/tests/graph/classifiers/roles.test.js index b790996c..e76cc539 100644 --- a/tests/graph/classifiers/roles.test.js +++ b/tests/graph/classifiers/roles.test.js @@ -60,4 +60,29 @@ describe('classifyRoles', () => { const roles = classifyRoles(nodes); expect(roles.get('1')).toBe('leaf'); }); + + it('classifies test-only when fanIn is 0 but testOnlyFanIn > 0', () => { + const nodes = [ + { id: '1', name: 'helperForTests', fanIn: 0, fanOut: 0, isExported: false, testOnlyFanIn: 3 }, + ]; + const roles = classifyRoles(nodes); + expect(roles.get('1')).toBe('test-only'); + }); + + it('classifies dead when fanIn is 0 and testOnlyFanIn is 0', () => { + const nodes = [ + { id: '1', name: 'reallyDead', fanIn: 0, fanOut: 0, isExported: false, testOnlyFanIn: 0 }, + ]; + const roles = classifyRoles(nodes); + expect(roles.get('1')).toBe('dead'); + }); + + it('ignores testOnlyFanIn when fanIn > 0', () => { + const nodes = [ + { id: '1', name: 'normalLeaf', fanIn: 1, fanOut: 0, isExported: false, testOnlyFanIn: 2 }, + { id: '2', name: 'hub', fanIn: 10, fanOut: 10, isExported: true }, + ]; + const roles = classifyRoles(nodes); + expect(roles.get('1')).toBe('leaf'); + }); }); diff --git a/tests/integration/roles.test.js b/tests/integration/roles.test.js index 2a92b16a..04dca9b1 100644 --- a/tests/integration/roles.test.js +++ b/tests/integration/roles.test.js @@ -60,6 +60,7 @@ beforeAll(() => { const helper = insertNode(db, 'helper', 'function', 'lib.js', 1); const format = insertNode(db, 'format', 'function', 'lib.js', 10); insertNode(db, 'unused', 'function', 'lib.js', 20); + const testHelper = insertNode(db, 'testHelper', 'function', 'lib.js', 30); const testFn = insertNode(db, 'testMain', 'function', 'app.test.js', 1); // Import edges @@ -72,11 +73,13 @@ beforeAll(() => { // processData → format (cross-file) → makes format exported // helper → format (same file) // testFn → main (cross-file) → makes main exported + // testFn → testHelper (cross-file) → testHelper only called from test insertEdge(db, main, process_, 'calls'); insertEdge(db, main, helper, 'calls'); insertEdge(db, process_, format, 'calls'); insertEdge(db, helper, format, 'calls'); insertEdge(db, testFn, main, 'calls'); + insertEdge(db, testFn, testHelper, 'calls'); // unused has no callers and no cross-file callers → dead @@ -133,6 +136,37 @@ describe('rolesData', () => { expect(s.file).not.toMatch(/\.test\./); } }); + + test('reclassifies test-only-called symbols when noTests is true', () => { + const data = rolesData(dbPath, { noTests: true }); + const th = data.symbols.find((s) => s.name === 'testHelper'); + expect(th).toBeDefined(); + expect(th.role).toBe('test-only'); + }); + + test('does not reclassify test-only-called symbols when noTests is false', () => { + const data = rolesData(dbPath); + const th = data.symbols.find((s) => s.name === 'testHelper'); + expect(th).toBeDefined(); + expect(th.role).not.toBe('test-only'); + }); + + test('filters by role=test-only with noTests', () => { + const data = rolesData(dbPath, { noTests: true, role: 'test-only' }); + expect(data.count).toBeGreaterThan(0); + for (const s of data.symbols) { + expect(s.role).toBe('test-only'); + } + const names = data.symbols.map((s) => s.name); + expect(names).toContain('testHelper'); + }); + + test('unused symbol stays dead with noTests', () => { + const data = rolesData(dbPath, { noTests: true }); + const unused = data.symbols.find((s) => s.name === 'unused'); + expect(unused).toBeDefined(); + expect(unused.role).toBe('dead'); + }); }); // ─── statsData includes roles ─────────────────────────────────────────── diff --git a/tests/unit/roles.test.js b/tests/unit/roles.test.js index b703f899..1e8702a2 100644 --- a/tests/unit/roles.test.js +++ b/tests/unit/roles.test.js @@ -151,7 +151,15 @@ describe('classifyNodeRoles', () => { it('handles empty graph without crashing', () => { const summary = classifyNodeRoles(db); - expect(summary).toEqual({ entry: 0, core: 0, utility: 0, adapter: 0, dead: 0, leaf: 0 }); + expect(summary).toEqual({ + entry: 0, + core: 0, + utility: 0, + adapter: 0, + dead: 0, + 'test-only': 0, + leaf: 0, + }); }); it('adapts median thresholds to data', () => {