From 66d67f799a0cfb30f86175d3531463efb665fee6 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 30 Mar 2026 01:11:41 -0600 Subject: [PATCH 01/10] refactor(ast): remove redundant call AST node extraction from WASM visitor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Call sites are already captured in FileSymbols.calls during extraction and stored as graph edges — the separate `call` entries in ast_nodes were written but never queried. This removes ~100 lines of call-specific subtree walking logic (walkSubtree, walkCallArguments, collectNode, extractCallName) and the call_expression entry from JS/TS astTypes. --- src/ast-analysis/rules/javascript.ts | 1 - .../visitors/ast-store-visitor.ts | 135 +++++------------- 2 files changed, 33 insertions(+), 103 deletions(-) diff --git a/src/ast-analysis/rules/javascript.ts b/src/ast-analysis/rules/javascript.ts index b4cec274..8140abc4 100644 --- a/src/ast-analysis/rules/javascript.ts +++ b/src/ast-analysis/rules/javascript.ts @@ -237,7 +237,6 @@ export const dataflow: DataflowRulesConfig = makeDataflowRules({ // ─── AST Node Types ─────────────────────────────────────────────────────── export const astTypes: Record | null = { - call_expression: 'call', new_expression: 'new', throw_statement: 'throw', await_expression: 'await', diff --git a/src/ast-analysis/visitors/ast-store-visitor.ts b/src/ast-analysis/visitors/ast-store-visitor.ts index a7cdcddd..08c54a4e 100644 --- a/src/ast-analysis/visitors/ast-store-visitor.ts +++ b/src/ast-analysis/visitors/ast-store-visitor.ts @@ -44,14 +44,6 @@ function extractExpressionText(node: TreeSitterNode): string | null { return truncate(node.text); } -function extractCallName(node: TreeSitterNode): string { - for (const field of ['function', 'method', 'name']) { - const fn = node.childForFieldName(field); - if (fn) return fn.text; - } - return node.text?.split('(')[0] || '?'; -} - function extractName(kind: string, node: TreeSitterNode): string | null { if (kind === 'throw') { for (let i = 0; i < node.childCount; i++) { @@ -110,93 +102,6 @@ export function createAstStoreVisitor( return nodeIdMap.get(`${parentDef.name}|${parentDef.kind}|${parentDef.line}`) || null; } - /** Recursively walk a subtree collecting AST nodes — used for arguments-only traversal. */ - function walkSubtree(node: TreeSitterNode | null): void { - if (!node) return; - if (matched.has(node.id)) return; - - const kind = astTypeMap[node.type]; - if (kind === 'call') { - // Capture this call and recurse only into its arguments - collectNode(node, kind); - walkCallArguments(node); - return; - } - if (kind) { - collectNode(node, kind); - if (kind !== 'string' && kind !== 'regex') return; // skipChildren for non-leaf kinds - } - for (let i = 0; i < node.childCount; i++) { - walkSubtree(node.child(i)); - } - } - - /** - * Recurse into only the arguments of a call node — mirrors the native engine's - * strategy that prevents double-counting nested calls in the function field - * (e.g. chained calls like `a().b()`). - */ - function walkCallArguments(callNode: TreeSitterNode): void { - // Try field-based lookup first, fall back to kind-based matching - const argsNode = - callNode.childForFieldName('arguments') ?? - findChildByKind(callNode, ['arguments', 'argument_list', 'method_arguments']); - if (!argsNode) return; - for (let i = 0; i < argsNode.childCount; i++) { - walkSubtree(argsNode.child(i)); - } - } - - function findChildByKind(node: TreeSitterNode, kinds: string[]): TreeSitterNode | null { - for (let i = 0; i < node.childCount; i++) { - const child = node.child(i); - if (child && kinds.includes(child.type)) return child; - } - return null; - } - - function collectNode(node: TreeSitterNode, kind: string): void { - if (matched.has(node.id)) return; - - const line = node.startPosition.row + 1; - let name: string | null | undefined; - let text: string | null = null; - - if (kind === 'call') { - name = extractCallName(node); - text = truncate(node.text); - } else if (kind === 'new') { - name = extractNewName(node); - text = truncate(node.text); - } else if (kind === 'throw') { - name = extractName('throw', node); - text = extractExpressionText(node); - } else if (kind === 'await') { - name = extractName('await', node); - text = extractExpressionText(node); - } else if (kind === 'string') { - const content = node.text?.replace(/^['"`]|['"`]$/g, '') || ''; - if (content.length < 2) return; - name = truncate(content, 100); - text = truncate(node.text); - } else if (kind === 'regex') { - name = node.text || '?'; - text = truncate(node.text); - } - - rows.push({ - file: relPath, - line, - kind, - name, - text, - receiver: null, - parentNodeId: resolveParentNodeId(line), - }); - - matched.add(node.id); - } - return { name: 'ast-store', @@ -206,15 +111,41 @@ export function createAstStoreVisitor( const kind = astTypeMap[node.type]; if (!kind) return; - collectNode(node, kind); - - if (kind === 'call') { - // Mirror native: skip full subtree, recurse only into arguments. - // Prevents double-counting chained calls like service.getUser().getName(). - walkCallArguments(node); - return { skipChildren: true }; + const line = node.startPosition.row + 1; + let name: string | null | undefined; + let text: string | null = null; + + if (kind === 'new') { + name = extractNewName(node); + text = truncate(node.text); + } else if (kind === 'throw') { + name = extractName('throw', node); + text = extractExpressionText(node); + } else if (kind === 'await') { + name = extractName('await', node); + text = extractExpressionText(node); + } else if (kind === 'string') { + const content = node.text?.replace(/^['"`]|['"`]$/g, '') || ''; + if (content.length < 2) return; + name = truncate(content, 100); + text = truncate(node.text); + } else if (kind === 'regex') { + name = node.text || '?'; + text = truncate(node.text); } + rows.push({ + file: relPath, + line, + kind, + name, + text, + receiver: null, + parentNodeId: resolveParentNodeId(line), + }); + + matched.add(node.id); + if (kind !== 'string' && kind !== 'regex') { return { skipChildren: true }; } From fa56fa07239f4a71562fc91970862d9d3d17a89b Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 30 Mar 2026 01:25:07 -0600 Subject: [PATCH 02/10] refactor(edges): remove pre-3.2.0 receiver edge supplement shim MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The supplementReceiverEdges fallback was added for native binaries older than 3.2.0 that didn't emit receiver or type-resolved method-call edges. Current version is 3.5.0 — all published binaries handle receivers natively. Removes ~70 lines of dead compat code. --- .../graph/builder/stages/build-edges.ts | 70 +------------------ 1 file changed, 2 insertions(+), 68 deletions(-) diff --git a/src/domain/graph/builder/stages/build-edges.ts b/src/domain/graph/builder/stages/build-edges.ts index 626d5e6d..20652a84 100644 --- a/src/domain/graph/builder/stages/build-edges.ts +++ b/src/domain/graph/builder/stages/build-edges.ts @@ -61,12 +61,6 @@ interface NativeEdge { dynamic: number; } -/** TypeMap entry used in receiver supplement (normalized from native format). */ -interface NormalizedTypeEntry { - type: string; - confidence: number; -} - // ── Node lookup setup ─────────────────────────────────────────────────── function makeGetNodeIdStmt(db: BetterSqlite3Database): NodeIdStmt { @@ -210,14 +204,6 @@ function buildCallEdgesNative( for (const e of nativeEdges) { allEdgeRows.push([e.sourceId, e.targetId, e.kind, e.confidence, e.dynamic]); } - - // Older native binaries (< 3.2.0) don't emit receiver or type-resolved method-call - // edges. Supplement them on the JS side if the native binary missed them. - // TODO: Remove once all published native binaries handle receivers (>= 3.2.0) - const hasReceiver = nativeEdges.some((e) => e.kind === 'receiver'); - if (!hasReceiver) { - supplementReceiverEdges(ctx, nativeFiles, getNodeIdStmt, allEdgeRows); - } } function buildImportedNamesForNative( @@ -242,58 +228,6 @@ function buildImportedNamesForNative( return importedNames; } -// ── Receiver edge supplement for older native binaries ────────────────── - -function supplementReceiverEdges( - ctx: PipelineContext, - nativeFiles: NativeFileEntry[], - getNodeIdStmt: NodeIdStmt, - allEdgeRows: EdgeRowTuple[], -): void { - const seenCallEdges = new Set(); - // Collect existing edges to avoid duplicates - for (const row of allEdgeRows) { - seenCallEdges.add(`${row[0]}|${row[1]}|${row[2]}`); - } - - for (const nf of nativeFiles) { - const relPath = nf.file; - const typeMap = new Map( - nf.typeMap.map((t) => [t.name, { type: t.typeName, confidence: t.confidence ?? 0.9 }]), - ); - const fileNodeRow = { id: nf.fileNodeId }; - - for (const call of nf.calls) { - if (!call.receiver || BUILTIN_RECEIVERS.has(call.receiver)) continue; - if (call.receiver === 'this' || call.receiver === 'self' || call.receiver === 'super') - continue; - - const caller = findCaller(call, nf.definitions, relPath, getNodeIdStmt, fileNodeRow); - - // Receiver edge: caller → receiver type node - buildReceiverEdge(ctx, call, caller, relPath, seenCallEdges, allEdgeRows, typeMap); - - // Type-resolved method call: caller → Type.method - const typeEntry = typeMap.get(call.receiver); - const typeName = typeEntry ? typeEntry.type : null; - if (typeName) { - const qualifiedName = `${typeName}.${call.name}`; - const targets = (ctx.nodesByName.get(qualifiedName) || []).filter( - (n) => n.kind === 'method', - ); - for (const t of targets) { - const key = `${caller.id}|${t.id}|calls`; - if (t.id !== caller.id && !seenCallEdges.has(key)) { - seenCallEdges.add(key); - const confidence = computeConfidence(relPath, t.file, null); - allEdgeRows.push([caller.id, t.id, 'calls', confidence, call.dynamic ? 1 : 0]); - } - } - } - } - } -} - // ── Call edges (JS fallback) ──────────────────────────────────────────── function buildCallEdgesJS( @@ -495,7 +429,7 @@ function buildReceiverEdge( relPath: string, seenCallEdges: Set, allEdgeRows: EdgeRowTuple[], - typeMap: Map, + typeMap: Map, ): void { const receiverKinds = new Set(['class', 'struct', 'interface', 'type', 'module']); const typeEntry = typeMap?.get(call.receiver!); @@ -608,7 +542,7 @@ function loadNodes(ctx: PipelineContext): { rows: QueryNodeRow[]; scoped: boolea /** * For scoped node loading, patch nodesByName.get with a lazy SQL fallback - * so global name-only lookups (resolveByMethodOrGlobal, supplementReceiverEdges) + * so global name-only lookups (resolveByMethodOrGlobal) * can still find nodes outside the scoped set. */ function addLazyFallback(ctx: PipelineContext, scopedLoad: boolean): void { From 8e6a9e88b2bd261080326e648eb519687f74fb04 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 30 Mar 2026 02:01:02 -0600 Subject: [PATCH 03/10] perf(db): native rusqlite bulk-insert paths for complexity, CFG, and dataflow Add three new NativeDatabase methods (bulk_insert_complexity, bulk_insert_cfg, bulk_insert_dataflow) that write pre-computed Rust analysis results directly to SQLite via rusqlite, bypassing better-sqlite3 JS round-trips. Wire native fast paths in complexity.ts, cfg.ts, and dataflow.ts that try the rusqlite bulk insert first, falling back to better-sqlite3 when native data is unavailable. Update README with a table explaining what Rust handles on the native path end-to-end. --- README.md | 13 ++ crates/codegraph-core/src/native_db.rs | 236 +++++++++++++++++++++++++ src/features/cfg.ts | 47 ++++- src/features/complexity.ts | 57 +++++- src/features/dataflow.ts | 106 ++++++++++- src/types.ts | 50 ++++++ 6 files changed, 506 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 9b0b5c81..aec62699 100644 --- a/README.md +++ b/README.md @@ -531,6 +531,19 @@ Codegraph ships with two parsing engines: Both engines produce identical output. Use `--engine native|wasm|auto` to control selection (default: `auto`). +On the native path, Rust handles the entire hot pipeline end-to-end: + +| Phase | What Rust does | +|-------|---------------| +| **Parse** | Parallel multi-file tree-sitter parsing via rayon (3.5× faster than WASM) | +| **Extract** | Symbols, imports, calls, classes, type maps, AST nodes — all in one pass | +| **Analyze** | Complexity (cognitive, cyclomatic, Halstead), CFG, and dataflow pre-computed per function during parse | +| **Resolve** | Import resolution with 6-level priority system and confidence scoring | +| **Edges** | Call, receiver, extends, and implements edge inference | +| **DB writes** | All inserts (nodes, edges, AST nodes, complexity, CFG, dataflow) via rusqlite — `better-sqlite3` is lazy-loaded only for the WASM fallback path | + +The Rust crate (`crates/codegraph-core/`) exposes a `NativeDatabase` napi-rs class that holds a persistent `rusqlite::Connection` for the full build lifecycle, eliminating JS↔SQLite round-trips on every operation. + ### Call Resolution Calls are resolved with **qualified resolution** — method calls (`obj.method()`) are distinguished from standalone function calls, and built-in receivers (`console`, `Math`, `JSON`, `Array`, `Promise`, etc.) are filtered out automatically. Import scope is respected: a call to `foo()` only resolves to functions that are actually imported or defined in the same file, eliminating false positives from name collisions. diff --git a/crates/codegraph-core/src/native_db.rs b/crates/codegraph-core/src/native_db.rs index 8f55b00c..ce58d28f 100644 --- a/crates/codegraph-core/src/native_db.rs +++ b/crates/codegraph-core/src/native_db.rs @@ -281,6 +281,74 @@ pub struct BuildMetaEntry { pub value: String, } +// ── Bulk-insert input types ──────────────────────────────────────────── + +/// A single complexity metrics row for bulk insertion. +#[napi(object)] +#[derive(Debug, Clone)] +pub struct ComplexityRow { + pub node_id: i64, + pub cognitive: u32, + pub cyclomatic: u32, + pub max_nesting: u32, + pub loc: u32, + pub sloc: u32, + pub comment_lines: u32, + pub halstead_n1: u32, + pub halstead_n2: u32, + pub halstead_big_n1: u32, + pub halstead_big_n2: u32, + pub halstead_vocabulary: u32, + pub halstead_length: u32, + pub halstead_volume: f64, + pub halstead_difficulty: f64, + pub halstead_effort: f64, + pub halstead_bugs: f64, + pub maintainability_index: f64, +} + +/// A CFG entry for a single function: blocks + edges. +#[napi(object)] +#[derive(Debug, Clone)] +pub struct CfgEntry { + pub node_id: i64, + pub blocks: Vec, + pub edges: Vec, +} + +/// A single CFG block for bulk insertion. +#[napi(object)] +#[derive(Debug, Clone)] +pub struct CfgBlockRow { + pub index: u32, + pub block_type: String, + pub start_line: Option, + pub end_line: Option, + pub label: Option, +} + +/// A single CFG edge for bulk insertion. +#[napi(object)] +#[derive(Debug, Clone)] +pub struct CfgEdgeRow { + pub source_index: u32, + pub target_index: u32, + pub kind: String, +} + +/// A single dataflow edge for bulk insertion. +#[napi(object)] +#[derive(Debug, Clone)] +pub struct DataflowEdge { + pub source_id: i64, + pub target_id: i64, + pub kind: String, + pub param_index: Option, + pub expression: Option, + pub line: Option, + pub confidence: f64, +} + // ── NativeDatabase class ──────────────────────────────────────────────── /// Persistent rusqlite Connection wrapper exposed to JS via napi-rs. @@ -698,6 +766,174 @@ impl NativeDatabase { Ok(ast_db::do_insert_ast_nodes(conn, &batches).unwrap_or(0)) } + /// Bulk-insert complexity metrics for functions/methods. + /// Each row maps a node_id to its complexity metrics. + /// Returns the number of rows inserted (0 on failure). + #[napi] + pub fn bulk_insert_complexity(&self, rows: Vec) -> napi::Result { + if rows.is_empty() { + return Ok(0); + } + let conn = self.conn()?; + if !has_table(conn, "function_complexity") { + return Ok(0); + } + let tx = conn + .unchecked_transaction() + .map_err(|e| napi::Error::from_reason(format!("complexity tx failed: {e}")))?; + let mut total = 0u32; + { + let mut stmt = tx.prepare( + "INSERT OR REPLACE INTO function_complexity \ + (node_id, cognitive, cyclomatic, max_nesting, \ + loc, sloc, comment_lines, \ + halstead_n1, halstead_n2, halstead_big_n1, halstead_big_n2, \ + halstead_vocabulary, halstead_length, halstead_volume, \ + halstead_difficulty, halstead_effort, halstead_bugs, \ + maintainability_index) \ + VALUES (?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14,?15,?16,?17,?18)", + ) + .map_err(|e| napi::Error::from_reason(format!("complexity prepare failed: {e}")))?; + + for r in &rows { + if stmt + .execute(params![ + r.node_id, + r.cognitive, + r.cyclomatic, + r.max_nesting, + r.loc, + r.sloc, + r.comment_lines, + r.halstead_n1, + r.halstead_n2, + r.halstead_big_n1, + r.halstead_big_n2, + r.halstead_vocabulary, + r.halstead_length, + r.halstead_volume, + r.halstead_difficulty, + r.halstead_effort, + r.halstead_bugs, + r.maintainability_index, + ]) + .is_ok() + { + total += 1; + } + } + } + tx.commit() + .map_err(|e| napi::Error::from_reason(format!("complexity commit failed: {e}")))?; + Ok(total) + } + + /// Bulk-insert CFG blocks and edges for functions/methods. + /// Returns the number of blocks inserted (0 on failure). + #[napi] + pub fn bulk_insert_cfg(&self, entries: Vec) -> napi::Result { + if entries.is_empty() { + return Ok(0); + } + let conn = self.conn()?; + if !has_table(conn, "cfg_blocks") { + return Ok(0); + } + let tx = conn + .unchecked_transaction() + .map_err(|e| napi::Error::from_reason(format!("cfg tx failed: {e}")))?; + let mut total = 0u32; + { + let mut block_stmt = tx.prepare( + "INSERT INTO cfg_blocks \ + (function_node_id, block_index, block_type, start_line, end_line, label) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + ) + .map_err(|e| napi::Error::from_reason(format!("cfg_blocks prepare failed: {e}")))?; + + let mut edge_stmt = tx.prepare( + "INSERT INTO cfg_edges \ + (function_node_id, source_block_id, target_block_id, kind) \ + VALUES (?1, ?2, ?3, ?4)", + ) + .map_err(|e| napi::Error::from_reason(format!("cfg_edges prepare failed: {e}")))?; + + for entry in &entries { + let mut block_db_ids: std::collections::HashMap = + std::collections::HashMap::new(); + for block in &entry.blocks { + if let Ok(()) = block_stmt.execute(params![ + entry.node_id, + block.index, + &block.block_type, + block.start_line, + block.end_line, + &block.label, + ]) { + block_db_ids.insert(block.index, tx.last_insert_rowid()); + total += 1; + } + } + for edge in &entry.edges { + if let (Some(&src), Some(&tgt)) = ( + block_db_ids.get(&edge.source_index), + block_db_ids.get(&edge.target_index), + ) { + let _ = edge_stmt.execute(params![entry.node_id, src, tgt, &edge.kind]); + } + } + } + } + tx.commit() + .map_err(|e| napi::Error::from_reason(format!("cfg commit failed: {e}")))?; + Ok(total) + } + + /// Bulk-insert dataflow edges (flows_to, returns, mutates). + /// Returns the number of edges inserted (0 on failure). + #[napi] + pub fn bulk_insert_dataflow(&self, edges: Vec) -> napi::Result { + if edges.is_empty() { + return Ok(0); + } + let conn = self.conn()?; + if !has_table(conn, "dataflow") { + return Ok(0); + } + let tx = conn + .unchecked_transaction() + .map_err(|e| napi::Error::from_reason(format!("dataflow tx failed: {e}")))?; + let mut total = 0u32; + { + let mut stmt = tx.prepare( + "INSERT INTO dataflow \ + (source_id, target_id, kind, param_index, expression, line, confidence) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)", + ) + .map_err(|e| napi::Error::from_reason(format!("dataflow prepare failed: {e}")))?; + + for e in &edges { + if stmt + .execute(params![ + e.source_id, + e.target_id, + &e.kind, + e.param_index, + &e.expression, + e.line, + e.confidence, + ]) + .is_ok() + { + total += 1; + } + } + } + tx.commit() + .map_err(|e| napi::Error::from_reason(format!("dataflow commit failed: {e}")))?; + Ok(total) + } + /// Full role classification: queries all nodes, computes fan-in/fan-out, /// classifies roles, and batch-updates the `role` column. #[napi] diff --git a/src/features/cfg.ts b/src/features/cfg.ts index 389ee3c2..d6999486 100644 --- a/src/features/cfg.ts +++ b/src/features/cfg.ts @@ -279,12 +279,57 @@ export async function buildCFGData( db: BetterSqlite3Database, fileSymbols: Map, rootDir: string, - _engineOpts?: unknown, + engineOpts?: { nativeDb?: { bulkInsertCfg?(entries: Array>): number } }, ): Promise { // Fast path: when all function/method defs already have native CFG data, // skip WASM parser init, tree parsing, and JS visitor entirely — just persist. const allNative = allCfgNative(fileSymbols); + // ── Native bulk-insert fast path ────────────────────────────────────── + const nativeDb = engineOpts?.nativeDb; + if (allNative && nativeDb?.bulkInsertCfg) { + const entries: Array> = []; + + for (const [relPath, symbols] of fileSymbols) { + const ext = path.extname(relPath).toLowerCase(); + if (!CFG_EXTENSIONS.has(ext)) continue; + + for (const def of symbols.definitions) { + if (def.kind !== 'function' && def.kind !== 'method') continue; + if (!def.line) continue; + + const nodeId = getFunctionNodeId(db, def.name, relPath, def.line); + if (!nodeId) continue; + + deleteCfgForNode(db, nodeId); + if (!def.cfg?.blocks?.length) continue; + + const cfg = def.cfg as unknown as { blocks: CfgBuildBlock[]; edges: CfgBuildEdge[] }; + entries.push({ + nodeId, + blocks: cfg.blocks.map((b) => ({ + index: b.index, + blockType: b.type, + startLine: b.startLine ?? null, + endLine: b.endLine ?? null, + label: b.label ?? null, + })), + edges: cfg.edges.map((e) => ({ + sourceIndex: e.sourceIndex, + targetIndex: e.targetIndex, + kind: e.kind, + })), + }); + } + } + + if (entries.length > 0) { + const inserted = nativeDb.bulkInsertCfg(entries); + info(`CFG (native bulk): ${inserted} blocks across ${entries.length} functions`); + } + return; + } + const extToLang = buildExtToLangMap(); let parsers: unknown = null; let getParserFn: unknown = null; diff --git a/src/features/complexity.ts b/src/features/complexity.ts index b0a1d05e..ca950234 100644 --- a/src/features/complexity.ts +++ b/src/features/complexity.ts @@ -502,8 +502,63 @@ export async function buildComplexityMetrics( db: BetterSqlite3Database, fileSymbols: Map, rootDir: string, - _engineOpts?: unknown, + engineOpts?: { + nativeDb?: { bulkInsertComplexity?(rows: Array>): number }; + }, ): Promise { + // ── Native bulk-insert fast path ────────────────────────────────────── + const nativeDb = engineOpts?.nativeDb; + if (nativeDb?.bulkInsertComplexity) { + const rows: Array> = []; + let needsJsFallback = false; + + for (const [relPath, symbols] of fileSymbols) { + for (const def of symbols.definitions) { + if (def.kind !== 'function' && def.kind !== 'method') continue; + if (!def.line) continue; + if (!def.complexity) { + needsJsFallback = true; + break; + } + const nodeId = getFunctionNodeId(db, def.name, relPath, def.line); + if (!nodeId) continue; + const ch = def.complexity.halstead; + const cl = def.complexity.loc; + rows.push({ + nodeId, + cognitive: def.complexity.cognitive ?? 0, + cyclomatic: def.complexity.cyclomatic ?? 0, + maxNesting: def.complexity.maxNesting ?? 0, + loc: cl ? cl.loc : 0, + sloc: cl ? cl.sloc : 0, + commentLines: cl ? cl.commentLines : 0, + halsteadN1: ch ? ch.n1 : 0, + halsteadN2: ch ? ch.n2 : 0, + halsteadBigN1: ch ? ch.bigN1 : 0, + halsteadBigN2: ch ? ch.bigN2 : 0, + halsteadVocabulary: ch ? ch.vocabulary : 0, + halsteadLength: ch ? ch.length : 0, + halsteadVolume: ch ? ch.volume : 0, + halsteadDifficulty: ch ? ch.difficulty : 0, + halsteadEffort: ch ? ch.effort : 0, + halsteadBugs: ch ? ch.bugs : 0, + maintainabilityIndex: def.complexity.maintainabilityIndex ?? 0, + }); + } + if (needsJsFallback) break; + } + + if (!needsJsFallback && rows.length > 0) { + const inserted = nativeDb.bulkInsertComplexity(rows); + if (inserted === rows.length) { + info(`Complexity (native bulk): ${inserted} functions analyzed`); + return; + } + debug(`Native bulkInsertComplexity partial: ${inserted}/${rows.length} — falling back to JS`); + } + } + + // ── JS fallback path ───────────────────────────────────────────────── const { parsers, extToLang } = await initWasmParsersIfNeeded(fileSymbols); const { getParser } = await import('../domain/parser.js'); diff --git a/src/features/dataflow.ts b/src/features/dataflow.ts index 8315b524..fda56c17 100644 --- a/src/features/dataflow.ts +++ b/src/features/dataflow.ts @@ -241,9 +241,113 @@ export async function buildDataflowEdges( db: BetterSqlite3Database, fileSymbols: Map, rootDir: string, - _engineOpts?: unknown, + engineOpts?: { + nativeDb?: { bulkInsertDataflow?(edges: Array>): number }; + }, ): Promise { const extToLang = buildExtToLangMap(); + + // ── Native bulk-insert fast path ────────────────────────────────────── + const nativeDb = engineOpts?.nativeDb; + if (nativeDb?.bulkInsertDataflow) { + let needsJsFallback = false; + const nativeEdges: Array> = []; + + const getNodeByNameAndFile = db.prepare<{ + id: number; + name: string; + kind: string; + file: string; + line: number; + }>( + `SELECT id, name, kind, file, line FROM nodes + WHERE name = ? AND file = ? AND kind IN ('function', 'method')`, + ); + const getNodeByName = db.prepare<{ + id: number; + name: string; + kind: string; + file: string; + line: number; + }>( + `SELECT id, name, kind, file, line FROM nodes + WHERE name = ? AND kind IN ('function', 'method') + ORDER BY file, line LIMIT 10`, + ); + + for (const [relPath, symbols] of fileSymbols) { + const ext = path.extname(relPath).toLowerCase(); + if (!DATAFLOW_EXTENSIONS.has(ext)) continue; + if (!symbols.dataflow) { + needsJsFallback = true; + break; + } + + const resolveNode = (funcName: string): { id: number } | null => { + const local = getNodeByNameAndFile.all(funcName, relPath); + if (local.length > 0) return local[0]!; + const global = getNodeByName.all(funcName); + return global.length > 0 ? global[0]! : null; + }; + + const data = symbols.dataflow; + for (const flow of data.argFlows as ArgFlow[]) { + const sourceNode = resolveNode(flow.callerFunc); + const targetNode = resolveNode(flow.calleeName); + if (sourceNode && targetNode) { + nativeEdges.push({ + sourceId: sourceNode.id, + targetId: targetNode.id, + kind: 'flows_to', + paramIndex: flow.argIndex, + expression: flow.expression, + line: flow.line, + confidence: flow.confidence, + }); + } + } + for (const assignment of data.assignments as Assignment[]) { + const producerNode = resolveNode(assignment.sourceCallName); + const consumerNode = resolveNode(assignment.callerFunc); + if (producerNode && consumerNode) { + nativeEdges.push({ + sourceId: producerNode.id, + targetId: consumerNode.id, + kind: 'returns', + paramIndex: null, + expression: assignment.expression, + line: assignment.line, + confidence: 1.0, + }); + } + } + for (const mut of data.mutations as Mutation[]) { + const mutatorNode = resolveNode(mut.funcName); + if (mutatorNode && mut.binding?.type === 'param') { + nativeEdges.push({ + sourceId: mutatorNode.id, + targetId: mutatorNode.id, + kind: 'mutates', + paramIndex: null, + expression: mut.mutatingExpr, + line: mut.line, + confidence: 1.0, + }); + } + } + } + + if (!needsJsFallback) { + if (nativeEdges.length > 0) { + const inserted = nativeDb.bulkInsertDataflow(nativeEdges); + info(`Dataflow (native bulk): ${inserted} edges inserted`); + } + return; + } + debug('Dataflow: some files lack pre-computed data — falling back to JS'); + } + + // ── JS fallback path ───────────────────────────────────────────────── const { parsers, getParserFn } = await initDataflowParsers(fileSymbols); const insert = db.prepare( diff --git a/src/types.ts b/src/types.ts index 00870ef9..0245a1d6 100644 --- a/src/types.ts +++ b/src/types.ts @@ -2050,6 +2050,56 @@ export interface NativeDatabase { }>; }>, ): number; + bulkInsertComplexity?( + rows: Array<{ + nodeId: number; + cognitive: number; + cyclomatic: number; + maxNesting: number; + loc: number; + sloc: number; + commentLines: number; + halsteadN1: number; + halsteadN2: number; + halsteadBigN1: number; + halsteadBigN2: number; + halsteadVocabulary: number; + halsteadLength: number; + halsteadVolume: number; + halsteadDifficulty: number; + halsteadEffort: number; + halsteadBugs: number; + maintainabilityIndex: number; + }>, + ): number; + bulkInsertCfg?( + entries: Array<{ + nodeId: number; + blocks: Array<{ + index: number; + blockType: string; + startLine?: number | null; + endLine?: number | null; + label?: string | null; + }>; + edges: Array<{ + sourceIndex: number; + targetIndex: number; + kind: string; + }>; + }>, + ): number; + bulkInsertDataflow?( + edges: Array<{ + sourceId: number; + targetId: number; + kind: string; + paramIndex?: number | null; + expression?: string | null; + line?: number | null; + confidence: number; + }>, + ): number; classifyRolesFull(): { entry: number; core: number; From 8d8b38b86319f9727182bb79448ad38efd2cb35a Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 30 Mar 2026 02:49:31 -0600 Subject: [PATCH 04/10] fix(rust): use wildcard pattern for execute() return type (#686) block_stmt.execute() returns Result, not Result<()> --- crates/codegraph-core/src/native_db.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/codegraph-core/src/native_db.rs b/crates/codegraph-core/src/native_db.rs index ce58d28f..418c63cd 100644 --- a/crates/codegraph-core/src/native_db.rs +++ b/crates/codegraph-core/src/native_db.rs @@ -862,7 +862,7 @@ impl NativeDatabase { let mut block_db_ids: std::collections::HashMap = std::collections::HashMap::new(); for block in &entry.blocks { - if let Ok(()) = block_stmt.execute(params![ + if let Ok(_) = block_stmt.execute(params![ entry.node_id, block.index, &block.block_type, From 03ebe9f91a9b8d3a1d1342c9ae11eba4816e34fb Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 30 Mar 2026 03:15:07 -0600 Subject: [PATCH 05/10] refactor(extractors): add parser abstraction layer (Phase 7.1) Extract shared patterns from 9 language extractors into 4 reusable helpers in helpers.ts, reducing per-language boilerplate by ~30 lines: - findParentNode: replaces 6 findParent*/findCurrentImpl functions - extractBodyMembers: replaces 5 body-iteration patterns for enums/structs - stripQuotes: replaces inline .replace(/"/g,'') across 3 extractors - lastPathSegment: replaces inline .split('.').pop() across 6 extractors Net: +77 helper lines, -159 extractor lines = -82 lines total. --- docs/roadmap/ROADMAP.md | 15 ++++--- src/extractors/csharp.ts | 54 +++++++++++-------------- src/extractors/go.ts | 13 ++++-- src/extractors/hcl.ts | 12 +++--- src/extractors/helpers.ts | 77 +++++++++++++++++++++++++++++++++++- src/extractors/java.ts | 43 ++++++++------------ src/extractors/javascript.ts | 14 ++----- src/extractors/php.ts | 26 ++++++------ src/extractors/python.ts | 19 +++++---- src/extractors/ruby.ts | 19 +++------ src/extractors/rust.ts | 56 +++++++++----------------- 11 files changed, 189 insertions(+), 159 deletions(-) diff --git a/docs/roadmap/ROADMAP.md b/docs/roadmap/ROADMAP.md index 88966106..442e1023 100644 --- a/docs/roadmap/ROADMAP.md +++ b/docs/roadmap/ROADMAP.md @@ -1297,17 +1297,22 @@ Structure building is unchanged — at 22ms it's already fast. **Why after Phase 6:** The native analysis acceleration work (Phase 6) establishes the dual-engine pipeline that new language grammars plug into. Adding languages before the engine is complete would mean porting extractors twice. With Phase 6 done, each new language needs only a `LANGUAGE_REGISTRY` entry + extractor function, and both engines support it automatically. -### 7.1 -- Parser Abstraction Layer +### 7.1 -- Parser Abstraction Layer ✅ Extract shared patterns from existing extractors into reusable helpers to reduce per-language boilerplate from ~200 lines to ~80 lines. | Helper | Purpose | |--------|---------| -| `findParentNode(node, typeNames)` | Walk parent chain to find enclosing class/struct | -| `extractBodyMethods(bodyNode, parentName)` | Extract method definitions from a body block | -| `normalizeImportPath(importText)` | Cross-language import path normalization | +| ✅ `findParentNode(node, typeNames, nameField?)` | Walk parent chain to find enclosing class/struct | +| ✅ `extractBodyMembers(node, bodyFields, memberType, kind, nameField?, visibility?)` | Extract child declarations from a body block | +| ✅ `stripQuotes(text)` | Strip leading/trailing quotes from string literals | +| ✅ `lastPathSegment(path, separator?)` | Extract last segment of a delimited import path | -**New file:** `src/parser-utils.js` +**File:** `src/extractors/helpers.ts` (extended existing helper module) + +- `findParentNode` replaces 6 per-language `findParent*` functions (JS, Python, Java, C#, Ruby, Rust) +- `extractBodyMembers` replaces 5 body-iteration patterns (Rust struct/enum, Java enum, C# enum, PHP enum) +- `stripQuotes` + `lastPathSegment` replace inline `.replace(/"/g, '')` and `.split('.').pop()` patterns across 7 extractors ### 7.2 -- Batch 1: High Demand diff --git a/src/extractors/csharp.ts b/src/extractors/csharp.ts index 3a79bb28..16ed0b90 100644 --- a/src/extractors/csharp.ts +++ b/src/extractors/csharp.ts @@ -6,7 +6,15 @@ import type { TreeSitterNode, TreeSitterTree, } from '../types.js'; -import { extractModifierVisibility, findChild, MAX_WALK_DEPTH, nodeEndLine } from './helpers.js'; +import { + extractBodyMembers, + extractModifierVisibility, + findChild, + findParentNode, + lastPathSegment, + MAX_WALK_DEPTH, + nodeEndLine, +} from './helpers.js'; /** * Extract symbols from C# files. @@ -208,7 +216,7 @@ function handleCsUsingDirective(node: TreeSitterNode, ctx: ExtractorOutput): voi findChild(node, 'identifier'); if (!nameNode) return; const fullPath = nameNode.text; - const lastName = fullPath.split('.').pop() ?? fullPath; + const lastName = lastPathSegment(fullPath, '.'); ctx.imports.push({ source: fullPath, names: [lastName], @@ -246,22 +254,15 @@ function handleCsObjectCreation(node: TreeSitterNode, ctx: ExtractorOutput): voi if (typeName) ctx.calls.push({ name: typeName, line: node.startPosition.row + 1 }); } +const CS_PARENT_TYPES = [ + 'class_declaration', + 'struct_declaration', + 'interface_declaration', + 'enum_declaration', + 'record_declaration', +] as const; function findCSharpParentType(node: TreeSitterNode): string | null { - let current = node.parent; - while (current) { - if ( - current.type === 'class_declaration' || - current.type === 'struct_declaration' || - current.type === 'interface_declaration' || - current.type === 'enum_declaration' || - current.type === 'record_declaration' - ) { - const nameNode = current.childForFieldName('name'); - return nameNode ? nameNode.text : null; - } - current = current.parent; - } - return null; + return findParentNode(node, CS_PARENT_TYPES); } // ── Child extraction helpers ──────────────────────────────────────────────── @@ -307,19 +308,12 @@ function extractCSharpClassFields(classNode: TreeSitterNode): SubDeclaration[] { } function extractCSharpEnumMembers(enumNode: TreeSitterNode): SubDeclaration[] { - const constants: SubDeclaration[] = []; - const body = - enumNode.childForFieldName('body') || findChild(enumNode, 'enum_member_declaration_list'); - if (!body) return constants; - for (let i = 0; i < body.childCount; i++) { - const member = body.child(i); - if (!member || member.type !== 'enum_member_declaration') continue; - const nameNode = member.childForFieldName('name'); - if (nameNode) { - constants.push({ name: nameNode.text, kind: 'constant', line: member.startPosition.row + 1 }); - } - } - return constants; + return extractBodyMembers( + enumNode, + ['body', 'enum_member_declaration_list'], + 'enum_member_declaration', + 'constant', + ); } // ── Type map extraction ────────────────────────────────────────────────────── diff --git a/src/extractors/go.ts b/src/extractors/go.ts index 3e832b37..3e857b28 100644 --- a/src/extractors/go.ts +++ b/src/extractors/go.ts @@ -6,7 +6,14 @@ import type { TreeSitterTree, TypeMapEntry, } from '../types.js'; -import { findChild, goVisibility, MAX_WALK_DEPTH, nodeEndLine } from './helpers.js'; +import { + findChild, + goVisibility, + lastPathSegment, + MAX_WALK_DEPTH, + nodeEndLine, + stripQuotes, +} from './helpers.js'; /** * Extract symbols from Go files. @@ -170,9 +177,9 @@ function handleGoImportDecl(node: TreeSitterNode, ctx: ExtractorOutput): void { function extractGoImportSpec(spec: TreeSitterNode, ctx: ExtractorOutput): void { const pathNode = spec.childForFieldName('path'); if (pathNode) { - const importPath = pathNode.text.replace(/"/g, ''); + const importPath = stripQuotes(pathNode.text); const nameNode = spec.childForFieldName('name'); - const alias = nameNode ? nameNode.text : (importPath.split('/').pop() ?? importPath); + const alias = nameNode ? nameNode.text : lastPathSegment(importPath); ctx.imports.push({ source: importPath, names: [alias], diff --git a/src/extractors/hcl.ts b/src/extractors/hcl.ts index a37792f9..cf69687a 100644 --- a/src/extractors/hcl.ts +++ b/src/extractors/hcl.ts @@ -6,7 +6,7 @@ import type { TreeSitterNode, TreeSitterTree, } from '../types.js'; -import { nodeEndLine } from './helpers.js'; +import { nodeEndLine, stripQuotes } from './helpers.js'; /** * Extract symbols from HCL (Terraform) files. @@ -80,18 +80,18 @@ function resolveHclBlockName(blockType: string, strings: TreeSitterNode[]): stri const s0 = strings[0]; const s1 = strings[1]; if (blockType === 'resource' && s0 && s1) { - return `${s0.text.replace(/"/g, '')}.${s1.text.replace(/"/g, '')}`; + return `${stripQuotes(s0.text)}.${stripQuotes(s1.text)}`; } if (blockType === 'data' && s0 && s1) { - return `data.${s0.text.replace(/"/g, '')}.${s1.text.replace(/"/g, '')}`; + return `data.${stripQuotes(s0.text)}.${stripQuotes(s1.text)}`; } if ((blockType === 'variable' || blockType === 'output' || blockType === 'module') && s0) { - return `${blockType}.${s0.text.replace(/"/g, '')}`; + return `${blockType}.${stripQuotes(s0.text)}`; } if (blockType === 'locals') return 'locals'; if (blockType === 'terraform' || blockType === 'provider') { let name = blockType; - if (s0) name += `.${s0.text.replace(/"/g, '')}`; + if (s0) name += `.${stripQuotes(s0.text)}`; return name; } return ''; @@ -126,7 +126,7 @@ function extractHclModuleSource( const key = attr.childForFieldName('key') || attr.child(0); const val = attr.childForFieldName('val') || attr.child(2); if (key && key.text === 'source' && val) { - const src = val.text.replace(/"/g, ''); + const src = stripQuotes(val.text); if (src.startsWith('./') || src.startsWith('../')) { ctx.imports.push({ source: src, names: [], line: attr.startPosition.row + 1 }); } diff --git a/src/extractors/helpers.ts b/src/extractors/helpers.ts index 56b05543..1c146277 100644 --- a/src/extractors/helpers.ts +++ b/src/extractors/helpers.ts @@ -1,4 +1,4 @@ -import type { TreeSitterNode } from '../types.js'; +import type { SubDeclaration, TreeSitterNode } from '../types.js'; /** * Maximum recursion depth for tree-sitter AST walkers. @@ -70,6 +70,81 @@ export function rustVisibility(node: TreeSitterNode): 'public' | 'private' { return 'private'; } +// ── Parser abstraction helpers ───────────────────────────────────────────── + +/** + * Walk up the parent chain to find an enclosing node whose type is in `typeNames`. + * Returns the text of `nameField` (default `'name'`) on the matching ancestor, or null. + * + * Replaces per-language `findParentClass` / `findParentType` / `findCurrentImpl` helpers. + */ +export function findParentNode( + node: TreeSitterNode, + typeNames: readonly string[], + nameField: string = 'name', +): string | null { + let current = node.parent; + while (current) { + if (typeNames.includes(current.type)) { + const nameNode = current.childForFieldName(nameField); + return nameNode ? nameNode.text : null; + } + current = current.parent; + } + return null; +} + +/** + * Extract child declarations from a container node's body. + * Finds the body via `bodyFields` (tries childForFieldName then findChild for each), + * iterates its children, filters by `memberType`, extracts `nameField`, and returns SubDeclarations. + * + * Replaces per-language extractStructFields / extractEnumVariants / extractEnumConstants helpers + * for the common case where each member has a direct name field. + */ +export function extractBodyMembers( + containerNode: TreeSitterNode, + bodyFields: readonly string[], + memberType: string, + kind: SubDeclaration['kind'], + nameField: string = 'name', + visibility?: (member: TreeSitterNode) => SubDeclaration['visibility'], +): SubDeclaration[] { + const members: SubDeclaration[] = []; + let body: TreeSitterNode | null = null; + for (const field of bodyFields) { + body = containerNode.childForFieldName(field) || findChild(containerNode, field); + if (body) break; + } + if (!body) return members; + for (let i = 0; i < body.childCount; i++) { + const member = body.child(i); + if (!member || member.type !== memberType) continue; + const nn = member.childForFieldName(nameField); + if (nn) { + const entry: SubDeclaration = { name: nn.text, kind, line: member.startPosition.row + 1 }; + if (visibility) entry.visibility = visibility(member); + members.push(entry); + } + } + return members; +} + +/** + * Strip leading/trailing quotes (single, double, or backtick) from a string. + */ +export function stripQuotes(text: string): string { + return text.replace(/^['"`]|['"`]$/g, ''); +} + +/** + * Extract the last segment of a delimited path. + * e.g. `lastPathSegment('java.util.List', '.')` → `'List'` + */ +export function lastPathSegment(path: string, separator: string = '/'): string { + return path.split(separator).pop() ?? path; +} + export function extractModifierVisibility( node: TreeSitterNode, modifierTypes: Set = DEFAULT_MODIFIER_TYPES, diff --git a/src/extractors/java.ts b/src/extractors/java.ts index 6277ff02..b29d053c 100644 --- a/src/extractors/java.ts +++ b/src/extractors/java.ts @@ -6,7 +6,14 @@ import type { TreeSitterTree, TypeMapEntry, } from '../types.js'; -import { extractModifierVisibility, findChild, nodeEndLine } from './helpers.js'; +import { + extractBodyMembers, + extractModifierVisibility, + findChild, + findParentNode, + lastPathSegment, + nodeEndLine, +} from './helpers.js'; /** * Extract symbols from Java files. @@ -218,7 +225,7 @@ function handleJavaImportDecl(node: TreeSitterNode, ctx: ExtractorOutput): void const child = node.child(i); if (child && (child.type === 'scoped_identifier' || child.type === 'identifier')) { const fullPath = child.text; - const lastName = fullPath.split('.').pop() ?? fullPath; + const lastName = lastPathSegment(fullPath, '.'); ctx.imports.push({ source: fullPath, names: [lastName], @@ -263,20 +270,13 @@ function handleJavaObjectCreation(node: TreeSitterNode, ctx: ExtractorOutput): v if (typeName) ctx.calls.push({ name: typeName, line: node.startPosition.row + 1 }); } +const JAVA_PARENT_TYPES = [ + 'class_declaration', + 'enum_declaration', + 'interface_declaration', +] as const; function findJavaParentClass(node: TreeSitterNode): string | null { - let current = node.parent; - while (current) { - if ( - current.type === 'class_declaration' || - current.type === 'enum_declaration' || - current.type === 'interface_declaration' - ) { - const nameNode = current.childForFieldName('name'); - return nameNode ? nameNode.text : null; - } - current = current.parent; - } - return null; + return findParentNode(node, JAVA_PARENT_TYPES); } // ── Child extraction helpers ──────────────────────────────────────────────── @@ -333,16 +333,5 @@ function extractClassFields(classNode: TreeSitterNode): SubDeclaration[] { } function extractEnumConstants(enumNode: TreeSitterNode): SubDeclaration[] { - const constants: SubDeclaration[] = []; - const body = enumNode.childForFieldName('body') || findChild(enumNode, 'enum_body'); - if (!body) return constants; - for (let i = 0; i < body.childCount; i++) { - const member = body.child(i); - if (!member || member.type !== 'enum_constant') continue; - const nameNode = member.childForFieldName('name'); - if (nameNode) { - constants.push({ name: nameNode.text, kind: 'constant', line: member.startPosition.row + 1 }); - } - } - return constants; + return extractBodyMembers(enumNode, ['body', 'enum_body'], 'enum_constant', 'constant'); } diff --git a/src/extractors/javascript.ts b/src/extractors/javascript.ts index fc32576c..3b083ed7 100644 --- a/src/extractors/javascript.ts +++ b/src/extractors/javascript.ts @@ -12,7 +12,7 @@ import type { TreeSitterTree, TypeMapEntry, } from '../types.js'; -import { findChild, MAX_WALK_DEPTH, nodeEndLine } from './helpers.js'; +import { findChild, findParentNode, MAX_WALK_DEPTH, nodeEndLine } from './helpers.js'; /** Built-in globals that start with uppercase but are not user-defined types. */ const BUILTIN_GLOBALS: Set = new Set([ @@ -1191,17 +1191,9 @@ function extractSuperclass(heritage: TreeSitterNode): string | null { return null; } +const JS_CLASS_TYPES = ['class_declaration', 'class'] as const; function findParentClass(node: TreeSitterNode): string | null { - let current = node.parent; - while (current) { - const t = current.type; - if (t === 'class_declaration' || t === 'class') { - const nameNode = current.childForFieldName('name'); - return nameNode ? nameNode.text : null; - } - current = current.parent; - } - return null; + return findParentNode(node, JS_CLASS_TYPES); } function extractImportNames(node: TreeSitterNode): string[] { diff --git a/src/extractors/php.ts b/src/extractors/php.ts index 653971ee..dc2820fd 100644 --- a/src/extractors/php.ts +++ b/src/extractors/php.ts @@ -5,7 +5,14 @@ import type { TreeSitterNode, TreeSitterTree, } from '../types.js'; -import { extractModifierVisibility, findChild, MAX_WALK_DEPTH, nodeEndLine } from './helpers.js'; +import { + extractBodyMembers, + extractModifierVisibility, + findChild, + lastPathSegment, + MAX_WALK_DEPTH, + nodeEndLine, +} from './helpers.js'; function extractPhpParameters(fnNode: TreeSitterNode): SubDeclaration[] { const params: SubDeclaration[] = []; @@ -65,18 +72,7 @@ function extractPhpClassChildren(classNode: TreeSitterNode): SubDeclaration[] { } function extractPhpEnumCases(enumNode: TreeSitterNode): SubDeclaration[] { - const children: SubDeclaration[] = []; - const body = enumNode.childForFieldName('body') || findChild(enumNode, 'enum_declaration_list'); - if (!body) return children; - for (let i = 0; i < body.childCount; i++) { - const member = body.child(i); - if (!member || member.type !== 'enum_case') continue; - const nameNode = member.childForFieldName('name'); - if (nameNode) { - children.push({ name: nameNode.text, kind: 'constant', line: member.startPosition.row + 1 }); - } - } - return children; + return extractBodyMembers(enumNode, ['body', 'enum_declaration_list'], 'enum_case', 'constant'); } /** @@ -272,7 +268,7 @@ function handlePhpNamespaceUse(node: TreeSitterNode, ctx: ExtractorOutput): void const nameNode = findChild(child, 'qualified_name') || findChild(child, 'name'); if (nameNode) { const fullPath = nameNode.text; - const lastName = fullPath.split('\\').pop() ?? fullPath; + const lastName = lastPathSegment(fullPath, '\\'); const alias = child.childForFieldName('alias'); ctx.imports.push({ source: fullPath, @@ -284,7 +280,7 @@ function handlePhpNamespaceUse(node: TreeSitterNode, ctx: ExtractorOutput): void } if (child && (child.type === 'qualified_name' || child.type === 'name')) { const fullPath = child.text; - const lastName = fullPath.split('\\').pop() ?? fullPath; + const lastName = lastPathSegment(fullPath, '\\'); ctx.imports.push({ source: fullPath, names: [lastName], diff --git a/src/extractors/python.ts b/src/extractors/python.ts index b1d8804a..8f98ca34 100644 --- a/src/extractors/python.ts +++ b/src/extractors/python.ts @@ -6,7 +6,13 @@ import type { TreeSitterTree, TypeMapEntry, } from '../types.js'; -import { findChild, MAX_WALK_DEPTH, nodeEndLine, pythonVisibility } from './helpers.js'; +import { + findChild, + findParentNode, + MAX_WALK_DEPTH, + nodeEndLine, + pythonVisibility, +} from './helpers.js'; /** Built-in globals that start with uppercase but are not user-defined types. */ const BUILTIN_GLOBALS_PY: Set = new Set([ @@ -441,14 +447,7 @@ function extractPythonTypeName(typeNode: TreeSitterNode): string | null { return null; } +const PY_CLASS_TYPES = ['class_definition'] as const; function findPythonParentClass(node: TreeSitterNode): string | null { - let current = node.parent; - while (current) { - if (current.type === 'class_definition') { - const nameNode = current.childForFieldName('name'); - return nameNode ? nameNode.text : null; - } - current = current.parent; - } - return null; + return findParentNode(node, PY_CLASS_TYPES); } diff --git a/src/extractors/ruby.ts b/src/extractors/ruby.ts index 6b7ba20a..2c9bb2d5 100644 --- a/src/extractors/ruby.ts +++ b/src/extractors/ruby.ts @@ -5,7 +5,7 @@ import type { TreeSitterNode, TreeSitterTree, } from '../types.js'; -import { findChild, nodeEndLine } from './helpers.js'; +import { findChild, findParentNode, lastPathSegment, nodeEndLine, stripQuotes } from './helpers.js'; /** * Extract symbols from Ruby files. @@ -176,10 +176,10 @@ function handleRubyRequire(node: TreeSitterNode, ctx: ExtractorOutput): void { for (let i = 0; i < args.childCount; i++) { const arg = args.child(i); if (arg && (arg.type === 'string' || arg.type === 'string_content')) { - const strContent = arg.text.replace(/^['"]|['"]$/g, ''); + const strContent = stripQuotes(arg.text); ctx.imports.push({ source: strContent, - names: [strContent.split('/').pop() ?? strContent], + names: [lastPathSegment(strContent)], line: node.startPosition.row + 1, rubyRequire: true, }); @@ -190,7 +190,7 @@ function handleRubyRequire(node: TreeSitterNode, ctx: ExtractorOutput): void { if (content) { ctx.imports.push({ source: content.text, - names: [content.text.split('/').pop() ?? content.text], + names: [lastPathSegment(content.text)], line: node.startPosition.row + 1, rubyRequire: true, }); @@ -221,16 +221,9 @@ function handleRubyModuleInclusion( } } +const RUBY_PARENT_TYPES = ['class', 'module'] as const; function findRubyParentClass(node: TreeSitterNode): string | null { - let current = node.parent; - while (current) { - if (current.type === 'class' || current.type === 'module') { - const nameNode = current.childForFieldName('name'); - return nameNode ? nameNode.text : null; - } - current = current.parent; - } - return null; + return findParentNode(node, RUBY_PARENT_TYPES); } // ── Child extraction helpers ──────────────────────────────────────────────── diff --git a/src/extractors/rust.ts b/src/extractors/rust.ts index 3f40737e..81657d0a 100644 --- a/src/extractors/rust.ts +++ b/src/extractors/rust.ts @@ -5,7 +5,14 @@ import type { TreeSitterNode, TreeSitterTree, } from '../types.js'; -import { findChild, MAX_WALK_DEPTH, nodeEndLine, rustVisibility } from './helpers.js'; +import { + extractBodyMembers, + findParentNode, + lastPathSegment, + MAX_WALK_DEPTH, + nodeEndLine, + rustVisibility, +} from './helpers.js'; /** * Extract symbols from Rust files. @@ -206,16 +213,9 @@ function handleRustMacroInvocation(node: TreeSitterNode, ctx: ExtractorOutput): } } +const RUST_IMPL_TYPES = ['impl_item'] as const; function findCurrentImpl(node: TreeSitterNode): string | null { - let current = node.parent; - while (current) { - if (current.type === 'impl_item') { - const typeNode = current.childForFieldName('type'); - return typeNode ? typeNode.text : null; - } - current = current.parent; - } - return null; + return findParentNode(node, RUST_IMPL_TYPES, 'type'); } // ── Child extraction helpers ──────────────────────────────────────────────── @@ -227,8 +227,6 @@ function extractRustParameters(paramListNode: TreeSitterNode | null): SubDeclara const param = paramListNode.child(i); if (!param) continue; if (param.type === 'self_parameter') { - // Skip self parameters — matches native engine behaviour - continue; } else if (param.type === 'parameter') { const pattern = param.childForFieldName('pattern'); if (pattern) { @@ -240,34 +238,16 @@ function extractRustParameters(paramListNode: TreeSitterNode | null): SubDeclara } function extractStructFields(structNode: TreeSitterNode): SubDeclaration[] { - const fields: SubDeclaration[] = []; - const fieldList = - structNode.childForFieldName('body') || findChild(structNode, 'field_declaration_list'); - if (!fieldList) return fields; - for (let i = 0; i < fieldList.childCount; i++) { - const field = fieldList.child(i); - if (!field || field.type !== 'field_declaration') continue; - const nameNode = field.childForFieldName('name'); - if (nameNode) { - fields.push({ name: nameNode.text, kind: 'property', line: field.startPosition.row + 1 }); - } - } - return fields; + return extractBodyMembers( + structNode, + ['body', 'field_declaration_list'], + 'field_declaration', + 'property', + ); } function extractEnumVariants(enumNode: TreeSitterNode): SubDeclaration[] { - const variants: SubDeclaration[] = []; - const body = enumNode.childForFieldName('body') || findChild(enumNode, 'enum_variant_list'); - if (!body) return variants; - for (let i = 0; i < body.childCount; i++) { - const variant = body.child(i); - if (!variant || variant.type !== 'enum_variant') continue; - const nameNode = variant.childForFieldName('name'); - if (nameNode) { - variants.push({ name: nameNode.text, kind: 'constant', line: variant.startPosition.row + 1 }); - } - } - return variants; + return extractBodyMembers(enumNode, ['body', 'enum_variant_list'], 'enum_variant', 'constant'); } function extractRustTypeMap(node: TreeSitterNode, ctx: ExtractorOutput): void { @@ -375,7 +355,7 @@ function extractRustUsePath(node: TreeSitterNode | null): { source: string; name if (node.type === 'scoped_identifier' || node.type === 'identifier') { const text = node.text; - const lastName = text.split('::').pop() ?? text; + const lastName = lastPathSegment(text, '::'); return [{ source: text, names: [lastName] }]; } From 0db912ab22a1c36f7c8180abe081e9ae70db859a Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 30 Mar 2026 03:15:18 -0600 Subject: [PATCH 06/10] refactor(ast): remove dead matched Set from ast-store-visitor The matched Set was a deduplication guard left over from the removed manual walkSubtree logic. The DFS framework visits each node exactly once, so the guard never fires. Removing it eliminates a no-op allocation. --- src/ast-analysis/visitors/ast-store-visitor.ts | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/ast-analysis/visitors/ast-store-visitor.ts b/src/ast-analysis/visitors/ast-store-visitor.ts index 08c54a4e..8f173313 100644 --- a/src/ast-analysis/visitors/ast-store-visitor.ts +++ b/src/ast-analysis/visitors/ast-store-visitor.ts @@ -82,7 +82,6 @@ export function createAstStoreVisitor( nodeIdMap: Map, ): Visitor { const rows: AstStoreRow[] = []; - const matched = new Set(); function findParentDef(line: number): Definition | null { let best: Definition | null = null; @@ -106,8 +105,6 @@ export function createAstStoreVisitor( name: 'ast-store', enterNode(node: TreeSitterNode, _context: VisitorContext): EnterNodeResult | undefined { - if (matched.has(node.id)) return; - const kind = astTypeMap[node.type]; if (!kind) return; @@ -144,8 +141,6 @@ export function createAstStoreVisitor( parentNodeId: resolveParentNodeId(line), }); - matched.add(node.id); - if (kind !== 'string' && kind !== 'regex') { return { skipChildren: true }; } From 1deaa1c5c5504ae66b5d50d5e282c95a4c35f671 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 30 Mar 2026 03:15:29 -0600 Subject: [PATCH 07/10] fix(ast): remove call kind from ast_nodes to restore engine parity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The WASM visitor no longer extracts call AST nodes (removed in this PR), but the native engine and the symbols.calls fallback path still inserted them. This broke the build-parity test. Remove call kind insertion from both the native bulk-insert filter and the WASM symbols.calls path. Call AST nodes were dead — never queried by any feature or command. --- src/features/ast.ts | 35 +++++++++++---------------------- tests/parsers/ast-nodes.test.ts | 7 ++----- 2 files changed, 13 insertions(+), 29 deletions(-) diff --git a/src/features/ast.ts b/src/features/ast.ts index 01cc984c..ea2be11a 100644 --- a/src/features/ast.ts +++ b/src/features/ast.ts @@ -101,9 +101,11 @@ export async function buildAstNodes( for (const [relPath, symbols] of fileSymbols) { if (Array.isArray(symbols.astNodes)) { + // Filter out 'call' kind — dead AST node type, see JS fallback path comment. + const filtered = symbols.astNodes.filter((n) => n.kind !== 'call'); batches.push({ file: relPath, - nodes: symbols.astNodes.map((n) => ({ + nodes: filtered.map((n) => ({ line: n.line, kind: n.kind, name: n.name, @@ -158,32 +160,17 @@ export async function buildAstNodes( nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id); } - // When native astNodes includes call entries, skip separate symbols.calls processing - // to avoid duplication. Fall back to symbols.calls for WASM or older native binaries. + // Call AST nodes were removed — 'call' kind entries in ast_nodes are dead + // (never queried by any feature or command). symbols.calls are still used + // for call *edges* but no longer written to ast_nodes. const nativeProvidedAstNodes = Array.isArray(symbols.astNodes); - if (symbols.calls && !nativeProvidedAstNodes) { - for (const call of symbols.calls) { - const parentDef = findParentDef(defs, call.line); - let parentNodeId: number | null = null; - if (parentDef) { - parentNodeId = - nodeIdMap.get(`${parentDef.name}|${parentDef.kind}|${parentDef.line}`) || null; - } - allRows.push({ - file: relPath, - line: call.line, - kind: 'call', - name: call.name, - text: call.dynamic ? `[dynamic] ${call.name}` : null, - receiver: call.receiver || null, - parentNodeId, - }); - } - } if (Array.isArray(symbols.astNodes)) { - // Native engine provided AST nodes (may be empty for files with no AST content) - for (const n of symbols.astNodes) { + // Native engine provided AST nodes (may be empty for files with no AST content). + // Filter out 'call' kind — call AST nodes are dead (never queried by any feature). + // The WASM visitor no longer extracts them; native binaries still emit them until + // the next Rust release strips them from the extractor. + for (const n of symbols.astNodes.filter((n) => n.kind !== 'call')) { const parentDef = findParentDef(defs, n.line); let parentNodeId: number | null = null; if (parentDef) { diff --git a/tests/parsers/ast-nodes.test.ts b/tests/parsers/ast-nodes.test.ts index 5f3e9179..5e26a77d 100644 --- a/tests/parsers/ast-nodes.test.ts +++ b/tests/parsers/ast-nodes.test.ts @@ -100,12 +100,9 @@ function queryAllAstNodes() { // ─── Tests ──────────────────────────────────────────────────────────── describe('buildAstNodes — JS extraction', () => { - test('captures call nodes from symbols.calls', () => { + test('call kind AST nodes are no longer stored (dead code removed)', () => { const calls = queryAstNodes('call'); - expect(calls.length).toBeGreaterThanOrEqual(1); - const callNames = calls.map((c) => c.name); - // eval, fetch, console.log should be among calls (depending on parser extraction) - expect(callNames.some((n) => n === 'eval' || n === 'fetch' || n === 'console.log')).toBe(true); + expect(calls.length).toBe(0); }); test('captures new_expression as kind:new', () => { From fa28ea13601ef3732431b554ee4f5d724ac18007 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 30 Mar 2026 03:16:28 -0600 Subject: [PATCH 08/10] Revert "refactor(extractors): add parser abstraction layer (Phase 7.1)" This reverts commit 03ebe9f91a9b8d3a1d1342c9ae11eba4816e34fb. --- docs/roadmap/ROADMAP.md | 15 +++---- src/extractors/csharp.ts | 54 ++++++++++++++----------- src/extractors/go.ts | 13 ++---- src/extractors/hcl.ts | 12 +++--- src/extractors/helpers.ts | 77 +----------------------------------- src/extractors/java.ts | 43 ++++++++++++-------- src/extractors/javascript.ts | 14 +++++-- src/extractors/php.ts | 26 ++++++------ src/extractors/python.ts | 19 ++++----- src/extractors/ruby.ts | 19 ++++++--- src/extractors/rust.ts | 56 +++++++++++++++++--------- 11 files changed, 159 insertions(+), 189 deletions(-) diff --git a/docs/roadmap/ROADMAP.md b/docs/roadmap/ROADMAP.md index 442e1023..88966106 100644 --- a/docs/roadmap/ROADMAP.md +++ b/docs/roadmap/ROADMAP.md @@ -1297,22 +1297,17 @@ Structure building is unchanged — at 22ms it's already fast. **Why after Phase 6:** The native analysis acceleration work (Phase 6) establishes the dual-engine pipeline that new language grammars plug into. Adding languages before the engine is complete would mean porting extractors twice. With Phase 6 done, each new language needs only a `LANGUAGE_REGISTRY` entry + extractor function, and both engines support it automatically. -### 7.1 -- Parser Abstraction Layer ✅ +### 7.1 -- Parser Abstraction Layer Extract shared patterns from existing extractors into reusable helpers to reduce per-language boilerplate from ~200 lines to ~80 lines. | Helper | Purpose | |--------|---------| -| ✅ `findParentNode(node, typeNames, nameField?)` | Walk parent chain to find enclosing class/struct | -| ✅ `extractBodyMembers(node, bodyFields, memberType, kind, nameField?, visibility?)` | Extract child declarations from a body block | -| ✅ `stripQuotes(text)` | Strip leading/trailing quotes from string literals | -| ✅ `lastPathSegment(path, separator?)` | Extract last segment of a delimited import path | +| `findParentNode(node, typeNames)` | Walk parent chain to find enclosing class/struct | +| `extractBodyMethods(bodyNode, parentName)` | Extract method definitions from a body block | +| `normalizeImportPath(importText)` | Cross-language import path normalization | -**File:** `src/extractors/helpers.ts` (extended existing helper module) - -- `findParentNode` replaces 6 per-language `findParent*` functions (JS, Python, Java, C#, Ruby, Rust) -- `extractBodyMembers` replaces 5 body-iteration patterns (Rust struct/enum, Java enum, C# enum, PHP enum) -- `stripQuotes` + `lastPathSegment` replace inline `.replace(/"/g, '')` and `.split('.').pop()` patterns across 7 extractors +**New file:** `src/parser-utils.js` ### 7.2 -- Batch 1: High Demand diff --git a/src/extractors/csharp.ts b/src/extractors/csharp.ts index 16ed0b90..3a79bb28 100644 --- a/src/extractors/csharp.ts +++ b/src/extractors/csharp.ts @@ -6,15 +6,7 @@ import type { TreeSitterNode, TreeSitterTree, } from '../types.js'; -import { - extractBodyMembers, - extractModifierVisibility, - findChild, - findParentNode, - lastPathSegment, - MAX_WALK_DEPTH, - nodeEndLine, -} from './helpers.js'; +import { extractModifierVisibility, findChild, MAX_WALK_DEPTH, nodeEndLine } from './helpers.js'; /** * Extract symbols from C# files. @@ -216,7 +208,7 @@ function handleCsUsingDirective(node: TreeSitterNode, ctx: ExtractorOutput): voi findChild(node, 'identifier'); if (!nameNode) return; const fullPath = nameNode.text; - const lastName = lastPathSegment(fullPath, '.'); + const lastName = fullPath.split('.').pop() ?? fullPath; ctx.imports.push({ source: fullPath, names: [lastName], @@ -254,15 +246,22 @@ function handleCsObjectCreation(node: TreeSitterNode, ctx: ExtractorOutput): voi if (typeName) ctx.calls.push({ name: typeName, line: node.startPosition.row + 1 }); } -const CS_PARENT_TYPES = [ - 'class_declaration', - 'struct_declaration', - 'interface_declaration', - 'enum_declaration', - 'record_declaration', -] as const; function findCSharpParentType(node: TreeSitterNode): string | null { - return findParentNode(node, CS_PARENT_TYPES); + let current = node.parent; + while (current) { + if ( + current.type === 'class_declaration' || + current.type === 'struct_declaration' || + current.type === 'interface_declaration' || + current.type === 'enum_declaration' || + current.type === 'record_declaration' + ) { + const nameNode = current.childForFieldName('name'); + return nameNode ? nameNode.text : null; + } + current = current.parent; + } + return null; } // ── Child extraction helpers ──────────────────────────────────────────────── @@ -308,12 +307,19 @@ function extractCSharpClassFields(classNode: TreeSitterNode): SubDeclaration[] { } function extractCSharpEnumMembers(enumNode: TreeSitterNode): SubDeclaration[] { - return extractBodyMembers( - enumNode, - ['body', 'enum_member_declaration_list'], - 'enum_member_declaration', - 'constant', - ); + const constants: SubDeclaration[] = []; + const body = + enumNode.childForFieldName('body') || findChild(enumNode, 'enum_member_declaration_list'); + if (!body) return constants; + for (let i = 0; i < body.childCount; i++) { + const member = body.child(i); + if (!member || member.type !== 'enum_member_declaration') continue; + const nameNode = member.childForFieldName('name'); + if (nameNode) { + constants.push({ name: nameNode.text, kind: 'constant', line: member.startPosition.row + 1 }); + } + } + return constants; } // ── Type map extraction ────────────────────────────────────────────────────── diff --git a/src/extractors/go.ts b/src/extractors/go.ts index 3e857b28..3e832b37 100644 --- a/src/extractors/go.ts +++ b/src/extractors/go.ts @@ -6,14 +6,7 @@ import type { TreeSitterTree, TypeMapEntry, } from '../types.js'; -import { - findChild, - goVisibility, - lastPathSegment, - MAX_WALK_DEPTH, - nodeEndLine, - stripQuotes, -} from './helpers.js'; +import { findChild, goVisibility, MAX_WALK_DEPTH, nodeEndLine } from './helpers.js'; /** * Extract symbols from Go files. @@ -177,9 +170,9 @@ function handleGoImportDecl(node: TreeSitterNode, ctx: ExtractorOutput): void { function extractGoImportSpec(spec: TreeSitterNode, ctx: ExtractorOutput): void { const pathNode = spec.childForFieldName('path'); if (pathNode) { - const importPath = stripQuotes(pathNode.text); + const importPath = pathNode.text.replace(/"/g, ''); const nameNode = spec.childForFieldName('name'); - const alias = nameNode ? nameNode.text : lastPathSegment(importPath); + const alias = nameNode ? nameNode.text : (importPath.split('/').pop() ?? importPath); ctx.imports.push({ source: importPath, names: [alias], diff --git a/src/extractors/hcl.ts b/src/extractors/hcl.ts index cf69687a..a37792f9 100644 --- a/src/extractors/hcl.ts +++ b/src/extractors/hcl.ts @@ -6,7 +6,7 @@ import type { TreeSitterNode, TreeSitterTree, } from '../types.js'; -import { nodeEndLine, stripQuotes } from './helpers.js'; +import { nodeEndLine } from './helpers.js'; /** * Extract symbols from HCL (Terraform) files. @@ -80,18 +80,18 @@ function resolveHclBlockName(blockType: string, strings: TreeSitterNode[]): stri const s0 = strings[0]; const s1 = strings[1]; if (blockType === 'resource' && s0 && s1) { - return `${stripQuotes(s0.text)}.${stripQuotes(s1.text)}`; + return `${s0.text.replace(/"/g, '')}.${s1.text.replace(/"/g, '')}`; } if (blockType === 'data' && s0 && s1) { - return `data.${stripQuotes(s0.text)}.${stripQuotes(s1.text)}`; + return `data.${s0.text.replace(/"/g, '')}.${s1.text.replace(/"/g, '')}`; } if ((blockType === 'variable' || blockType === 'output' || blockType === 'module') && s0) { - return `${blockType}.${stripQuotes(s0.text)}`; + return `${blockType}.${s0.text.replace(/"/g, '')}`; } if (blockType === 'locals') return 'locals'; if (blockType === 'terraform' || blockType === 'provider') { let name = blockType; - if (s0) name += `.${stripQuotes(s0.text)}`; + if (s0) name += `.${s0.text.replace(/"/g, '')}`; return name; } return ''; @@ -126,7 +126,7 @@ function extractHclModuleSource( const key = attr.childForFieldName('key') || attr.child(0); const val = attr.childForFieldName('val') || attr.child(2); if (key && key.text === 'source' && val) { - const src = stripQuotes(val.text); + const src = val.text.replace(/"/g, ''); if (src.startsWith('./') || src.startsWith('../')) { ctx.imports.push({ source: src, names: [], line: attr.startPosition.row + 1 }); } diff --git a/src/extractors/helpers.ts b/src/extractors/helpers.ts index 1c146277..56b05543 100644 --- a/src/extractors/helpers.ts +++ b/src/extractors/helpers.ts @@ -1,4 +1,4 @@ -import type { SubDeclaration, TreeSitterNode } from '../types.js'; +import type { TreeSitterNode } from '../types.js'; /** * Maximum recursion depth for tree-sitter AST walkers. @@ -70,81 +70,6 @@ export function rustVisibility(node: TreeSitterNode): 'public' | 'private' { return 'private'; } -// ── Parser abstraction helpers ───────────────────────────────────────────── - -/** - * Walk up the parent chain to find an enclosing node whose type is in `typeNames`. - * Returns the text of `nameField` (default `'name'`) on the matching ancestor, or null. - * - * Replaces per-language `findParentClass` / `findParentType` / `findCurrentImpl` helpers. - */ -export function findParentNode( - node: TreeSitterNode, - typeNames: readonly string[], - nameField: string = 'name', -): string | null { - let current = node.parent; - while (current) { - if (typeNames.includes(current.type)) { - const nameNode = current.childForFieldName(nameField); - return nameNode ? nameNode.text : null; - } - current = current.parent; - } - return null; -} - -/** - * Extract child declarations from a container node's body. - * Finds the body via `bodyFields` (tries childForFieldName then findChild for each), - * iterates its children, filters by `memberType`, extracts `nameField`, and returns SubDeclarations. - * - * Replaces per-language extractStructFields / extractEnumVariants / extractEnumConstants helpers - * for the common case where each member has a direct name field. - */ -export function extractBodyMembers( - containerNode: TreeSitterNode, - bodyFields: readonly string[], - memberType: string, - kind: SubDeclaration['kind'], - nameField: string = 'name', - visibility?: (member: TreeSitterNode) => SubDeclaration['visibility'], -): SubDeclaration[] { - const members: SubDeclaration[] = []; - let body: TreeSitterNode | null = null; - for (const field of bodyFields) { - body = containerNode.childForFieldName(field) || findChild(containerNode, field); - if (body) break; - } - if (!body) return members; - for (let i = 0; i < body.childCount; i++) { - const member = body.child(i); - if (!member || member.type !== memberType) continue; - const nn = member.childForFieldName(nameField); - if (nn) { - const entry: SubDeclaration = { name: nn.text, kind, line: member.startPosition.row + 1 }; - if (visibility) entry.visibility = visibility(member); - members.push(entry); - } - } - return members; -} - -/** - * Strip leading/trailing quotes (single, double, or backtick) from a string. - */ -export function stripQuotes(text: string): string { - return text.replace(/^['"`]|['"`]$/g, ''); -} - -/** - * Extract the last segment of a delimited path. - * e.g. `lastPathSegment('java.util.List', '.')` → `'List'` - */ -export function lastPathSegment(path: string, separator: string = '/'): string { - return path.split(separator).pop() ?? path; -} - export function extractModifierVisibility( node: TreeSitterNode, modifierTypes: Set = DEFAULT_MODIFIER_TYPES, diff --git a/src/extractors/java.ts b/src/extractors/java.ts index b29d053c..6277ff02 100644 --- a/src/extractors/java.ts +++ b/src/extractors/java.ts @@ -6,14 +6,7 @@ import type { TreeSitterTree, TypeMapEntry, } from '../types.js'; -import { - extractBodyMembers, - extractModifierVisibility, - findChild, - findParentNode, - lastPathSegment, - nodeEndLine, -} from './helpers.js'; +import { extractModifierVisibility, findChild, nodeEndLine } from './helpers.js'; /** * Extract symbols from Java files. @@ -225,7 +218,7 @@ function handleJavaImportDecl(node: TreeSitterNode, ctx: ExtractorOutput): void const child = node.child(i); if (child && (child.type === 'scoped_identifier' || child.type === 'identifier')) { const fullPath = child.text; - const lastName = lastPathSegment(fullPath, '.'); + const lastName = fullPath.split('.').pop() ?? fullPath; ctx.imports.push({ source: fullPath, names: [lastName], @@ -270,13 +263,20 @@ function handleJavaObjectCreation(node: TreeSitterNode, ctx: ExtractorOutput): v if (typeName) ctx.calls.push({ name: typeName, line: node.startPosition.row + 1 }); } -const JAVA_PARENT_TYPES = [ - 'class_declaration', - 'enum_declaration', - 'interface_declaration', -] as const; function findJavaParentClass(node: TreeSitterNode): string | null { - return findParentNode(node, JAVA_PARENT_TYPES); + let current = node.parent; + while (current) { + if ( + current.type === 'class_declaration' || + current.type === 'enum_declaration' || + current.type === 'interface_declaration' + ) { + const nameNode = current.childForFieldName('name'); + return nameNode ? nameNode.text : null; + } + current = current.parent; + } + return null; } // ── Child extraction helpers ──────────────────────────────────────────────── @@ -333,5 +333,16 @@ function extractClassFields(classNode: TreeSitterNode): SubDeclaration[] { } function extractEnumConstants(enumNode: TreeSitterNode): SubDeclaration[] { - return extractBodyMembers(enumNode, ['body', 'enum_body'], 'enum_constant', 'constant'); + const constants: SubDeclaration[] = []; + const body = enumNode.childForFieldName('body') || findChild(enumNode, 'enum_body'); + if (!body) return constants; + for (let i = 0; i < body.childCount; i++) { + const member = body.child(i); + if (!member || member.type !== 'enum_constant') continue; + const nameNode = member.childForFieldName('name'); + if (nameNode) { + constants.push({ name: nameNode.text, kind: 'constant', line: member.startPosition.row + 1 }); + } + } + return constants; } diff --git a/src/extractors/javascript.ts b/src/extractors/javascript.ts index 3b083ed7..fc32576c 100644 --- a/src/extractors/javascript.ts +++ b/src/extractors/javascript.ts @@ -12,7 +12,7 @@ import type { TreeSitterTree, TypeMapEntry, } from '../types.js'; -import { findChild, findParentNode, MAX_WALK_DEPTH, nodeEndLine } from './helpers.js'; +import { findChild, MAX_WALK_DEPTH, nodeEndLine } from './helpers.js'; /** Built-in globals that start with uppercase but are not user-defined types. */ const BUILTIN_GLOBALS: Set = new Set([ @@ -1191,9 +1191,17 @@ function extractSuperclass(heritage: TreeSitterNode): string | null { return null; } -const JS_CLASS_TYPES = ['class_declaration', 'class'] as const; function findParentClass(node: TreeSitterNode): string | null { - return findParentNode(node, JS_CLASS_TYPES); + let current = node.parent; + while (current) { + const t = current.type; + if (t === 'class_declaration' || t === 'class') { + const nameNode = current.childForFieldName('name'); + return nameNode ? nameNode.text : null; + } + current = current.parent; + } + return null; } function extractImportNames(node: TreeSitterNode): string[] { diff --git a/src/extractors/php.ts b/src/extractors/php.ts index dc2820fd..653971ee 100644 --- a/src/extractors/php.ts +++ b/src/extractors/php.ts @@ -5,14 +5,7 @@ import type { TreeSitterNode, TreeSitterTree, } from '../types.js'; -import { - extractBodyMembers, - extractModifierVisibility, - findChild, - lastPathSegment, - MAX_WALK_DEPTH, - nodeEndLine, -} from './helpers.js'; +import { extractModifierVisibility, findChild, MAX_WALK_DEPTH, nodeEndLine } from './helpers.js'; function extractPhpParameters(fnNode: TreeSitterNode): SubDeclaration[] { const params: SubDeclaration[] = []; @@ -72,7 +65,18 @@ function extractPhpClassChildren(classNode: TreeSitterNode): SubDeclaration[] { } function extractPhpEnumCases(enumNode: TreeSitterNode): SubDeclaration[] { - return extractBodyMembers(enumNode, ['body', 'enum_declaration_list'], 'enum_case', 'constant'); + const children: SubDeclaration[] = []; + const body = enumNode.childForFieldName('body') || findChild(enumNode, 'enum_declaration_list'); + if (!body) return children; + for (let i = 0; i < body.childCount; i++) { + const member = body.child(i); + if (!member || member.type !== 'enum_case') continue; + const nameNode = member.childForFieldName('name'); + if (nameNode) { + children.push({ name: nameNode.text, kind: 'constant', line: member.startPosition.row + 1 }); + } + } + return children; } /** @@ -268,7 +272,7 @@ function handlePhpNamespaceUse(node: TreeSitterNode, ctx: ExtractorOutput): void const nameNode = findChild(child, 'qualified_name') || findChild(child, 'name'); if (nameNode) { const fullPath = nameNode.text; - const lastName = lastPathSegment(fullPath, '\\'); + const lastName = fullPath.split('\\').pop() ?? fullPath; const alias = child.childForFieldName('alias'); ctx.imports.push({ source: fullPath, @@ -280,7 +284,7 @@ function handlePhpNamespaceUse(node: TreeSitterNode, ctx: ExtractorOutput): void } if (child && (child.type === 'qualified_name' || child.type === 'name')) { const fullPath = child.text; - const lastName = lastPathSegment(fullPath, '\\'); + const lastName = fullPath.split('\\').pop() ?? fullPath; ctx.imports.push({ source: fullPath, names: [lastName], diff --git a/src/extractors/python.ts b/src/extractors/python.ts index 8f98ca34..b1d8804a 100644 --- a/src/extractors/python.ts +++ b/src/extractors/python.ts @@ -6,13 +6,7 @@ import type { TreeSitterTree, TypeMapEntry, } from '../types.js'; -import { - findChild, - findParentNode, - MAX_WALK_DEPTH, - nodeEndLine, - pythonVisibility, -} from './helpers.js'; +import { findChild, MAX_WALK_DEPTH, nodeEndLine, pythonVisibility } from './helpers.js'; /** Built-in globals that start with uppercase but are not user-defined types. */ const BUILTIN_GLOBALS_PY: Set = new Set([ @@ -447,7 +441,14 @@ function extractPythonTypeName(typeNode: TreeSitterNode): string | null { return null; } -const PY_CLASS_TYPES = ['class_definition'] as const; function findPythonParentClass(node: TreeSitterNode): string | null { - return findParentNode(node, PY_CLASS_TYPES); + let current = node.parent; + while (current) { + if (current.type === 'class_definition') { + const nameNode = current.childForFieldName('name'); + return nameNode ? nameNode.text : null; + } + current = current.parent; + } + return null; } diff --git a/src/extractors/ruby.ts b/src/extractors/ruby.ts index 2c9bb2d5..6b7ba20a 100644 --- a/src/extractors/ruby.ts +++ b/src/extractors/ruby.ts @@ -5,7 +5,7 @@ import type { TreeSitterNode, TreeSitterTree, } from '../types.js'; -import { findChild, findParentNode, lastPathSegment, nodeEndLine, stripQuotes } from './helpers.js'; +import { findChild, nodeEndLine } from './helpers.js'; /** * Extract symbols from Ruby files. @@ -176,10 +176,10 @@ function handleRubyRequire(node: TreeSitterNode, ctx: ExtractorOutput): void { for (let i = 0; i < args.childCount; i++) { const arg = args.child(i); if (arg && (arg.type === 'string' || arg.type === 'string_content')) { - const strContent = stripQuotes(arg.text); + const strContent = arg.text.replace(/^['"]|['"]$/g, ''); ctx.imports.push({ source: strContent, - names: [lastPathSegment(strContent)], + names: [strContent.split('/').pop() ?? strContent], line: node.startPosition.row + 1, rubyRequire: true, }); @@ -190,7 +190,7 @@ function handleRubyRequire(node: TreeSitterNode, ctx: ExtractorOutput): void { if (content) { ctx.imports.push({ source: content.text, - names: [lastPathSegment(content.text)], + names: [content.text.split('/').pop() ?? content.text], line: node.startPosition.row + 1, rubyRequire: true, }); @@ -221,9 +221,16 @@ function handleRubyModuleInclusion( } } -const RUBY_PARENT_TYPES = ['class', 'module'] as const; function findRubyParentClass(node: TreeSitterNode): string | null { - return findParentNode(node, RUBY_PARENT_TYPES); + let current = node.parent; + while (current) { + if (current.type === 'class' || current.type === 'module') { + const nameNode = current.childForFieldName('name'); + return nameNode ? nameNode.text : null; + } + current = current.parent; + } + return null; } // ── Child extraction helpers ──────────────────────────────────────────────── diff --git a/src/extractors/rust.ts b/src/extractors/rust.ts index 81657d0a..3f40737e 100644 --- a/src/extractors/rust.ts +++ b/src/extractors/rust.ts @@ -5,14 +5,7 @@ import type { TreeSitterNode, TreeSitterTree, } from '../types.js'; -import { - extractBodyMembers, - findParentNode, - lastPathSegment, - MAX_WALK_DEPTH, - nodeEndLine, - rustVisibility, -} from './helpers.js'; +import { findChild, MAX_WALK_DEPTH, nodeEndLine, rustVisibility } from './helpers.js'; /** * Extract symbols from Rust files. @@ -213,9 +206,16 @@ function handleRustMacroInvocation(node: TreeSitterNode, ctx: ExtractorOutput): } } -const RUST_IMPL_TYPES = ['impl_item'] as const; function findCurrentImpl(node: TreeSitterNode): string | null { - return findParentNode(node, RUST_IMPL_TYPES, 'type'); + let current = node.parent; + while (current) { + if (current.type === 'impl_item') { + const typeNode = current.childForFieldName('type'); + return typeNode ? typeNode.text : null; + } + current = current.parent; + } + return null; } // ── Child extraction helpers ──────────────────────────────────────────────── @@ -227,6 +227,8 @@ function extractRustParameters(paramListNode: TreeSitterNode | null): SubDeclara const param = paramListNode.child(i); if (!param) continue; if (param.type === 'self_parameter') { + // Skip self parameters — matches native engine behaviour + continue; } else if (param.type === 'parameter') { const pattern = param.childForFieldName('pattern'); if (pattern) { @@ -238,16 +240,34 @@ function extractRustParameters(paramListNode: TreeSitterNode | null): SubDeclara } function extractStructFields(structNode: TreeSitterNode): SubDeclaration[] { - return extractBodyMembers( - structNode, - ['body', 'field_declaration_list'], - 'field_declaration', - 'property', - ); + const fields: SubDeclaration[] = []; + const fieldList = + structNode.childForFieldName('body') || findChild(structNode, 'field_declaration_list'); + if (!fieldList) return fields; + for (let i = 0; i < fieldList.childCount; i++) { + const field = fieldList.child(i); + if (!field || field.type !== 'field_declaration') continue; + const nameNode = field.childForFieldName('name'); + if (nameNode) { + fields.push({ name: nameNode.text, kind: 'property', line: field.startPosition.row + 1 }); + } + } + return fields; } function extractEnumVariants(enumNode: TreeSitterNode): SubDeclaration[] { - return extractBodyMembers(enumNode, ['body', 'enum_variant_list'], 'enum_variant', 'constant'); + const variants: SubDeclaration[] = []; + const body = enumNode.childForFieldName('body') || findChild(enumNode, 'enum_variant_list'); + if (!body) return variants; + for (let i = 0; i < body.childCount; i++) { + const variant = body.child(i); + if (!variant || variant.type !== 'enum_variant') continue; + const nameNode = variant.childForFieldName('name'); + if (nameNode) { + variants.push({ name: nameNode.text, kind: 'constant', line: variant.startPosition.row + 1 }); + } + } + return variants; } function extractRustTypeMap(node: TreeSitterNode, ctx: ExtractorOutput): void { @@ -355,7 +375,7 @@ function extractRustUsePath(node: TreeSitterNode | null): { source: string; name if (node.type === 'scoped_identifier' || node.type === 'identifier') { const text = node.text; - const lastName = lastPathSegment(text, '::'); + const lastName = text.split('::').pop() ?? text; return [{ source: text, names: [lastName] }]; } From e9524f2161801ddb32115c063dad2769792d3886 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 30 Mar 2026 03:22:29 -0600 Subject: [PATCH 09/10] fix(ast): filter native call AST nodes before engine visitor check The engine.ts check `!Array.isArray(symbols.astNodes)` skips the WASM visitor when native provides astNodes. If native only produced call entries (now dead), the array was non-empty but contained only filtered- out kinds, causing the WASM visitor to be skipped and no AST nodes to be stored. Filter call entries early so the WASM visitor runs when no non-call entries remain. --- src/ast-analysis/engine.ts | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/ast-analysis/engine.ts b/src/ast-analysis/engine.ts index fdee6cae..d4e5a6e7 100644 --- a/src/ast-analysis/engine.ts +++ b/src/ast-analysis/engine.ts @@ -181,6 +181,18 @@ function setupVisitors( }; // AST-store visitor + // Strip dead 'call' kind from native astNodes — call AST nodes are no longer + // extracted by WASM. If only calls remain, clear the array so the WASM visitor + // runs and extracts the non-call kinds (new, throw, await, string, regex). + if (Array.isArray(symbols.astNodes)) { + const filtered = symbols.astNodes.filter((n) => n.kind !== 'call'); + if (filtered.length === 0) { + symbols.astNodes = undefined; + } else { + symbols.astNodes = filtered; + } + } + let astVisitor: Visitor | null = null; const astTypeMap = AST_TYPE_MAPS.get(langId); if (doAst && astTypeMap && WALK_EXTENSIONS.has(ext) && !Array.isArray(symbols.astNodes)) { From 81f64d6e4d315aa728c5aaafff227a07baab8ea1 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 30 Mar 2026 03:30:21 -0600 Subject: [PATCH 10/10] fix(ast): trigger WASM tree fallback when native only provided call AST nodes Move the call-kind filter from setupVisitors (which only runs for files with _tree) to the top of runAnalyses so it applies to all files including native-parsed ones. Add AST needs to ensureWasmTreesIfNeeded so WASM trees are created when native astNodes were cleared. Also remove unused nativeProvidedAstNodes variable and fix formatting. --- src/ast-analysis/engine.ts | 32 +++++++++++++++++--------------- src/features/ast.ts | 1 - 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/src/ast-analysis/engine.ts b/src/ast-analysis/engine.ts index d4e5a6e7..b9c4ed25 100644 --- a/src/ast-analysis/engine.ts +++ b/src/ast-analysis/engine.ts @@ -102,11 +102,12 @@ async function ensureWasmTreesIfNeeded( opts: AnalysisOpts, rootDir: string, ): Promise { + const doAst = opts.ast !== false; const doComplexity = opts.complexity !== false; const doCfg = opts.cfg !== false; const doDataflow = opts.dataflow !== false; - if (!doComplexity && !doCfg && !doDataflow) return; + if (!doAst && !doComplexity && !doCfg && !doDataflow) return; let needsWasmTrees = false; for (const [relPath, symbols] of fileSymbols) { @@ -131,6 +132,8 @@ async function ensureWasmTreesIfNeeded( d.endLine > d.line && !d.name.includes('.'); + // AST: need tree when native didn't provide non-call astNodes + const needsAst = doAst && !Array.isArray(symbols.astNodes) && WALK_EXTENSIONS.has(ext); const needsComplexity = doComplexity && COMPLEXITY_EXTENSIONS.has(ext) && @@ -141,7 +144,7 @@ async function ensureWasmTreesIfNeeded( defs.some((d) => hasFuncBody(d) && d.cfg !== null && !Array.isArray(d.cfg?.blocks)); const needsDataflow = doDataflow && !symbols.dataflow && DATAFLOW_EXTENSIONS.has(ext); - if (needsComplexity || needsCfg || needsDataflow) { + if (needsAst || needsComplexity || needsCfg || needsDataflow) { needsWasmTrees = true; break; } @@ -180,19 +183,7 @@ function setupVisitors( getFunctionName: (_node: TreeSitterNode) => null, }; - // AST-store visitor - // Strip dead 'call' kind from native astNodes — call AST nodes are no longer - // extracted by WASM. If only calls remain, clear the array so the WASM visitor - // runs and extracts the non-call kinds (new, throw, await, string, regex). - if (Array.isArray(symbols.astNodes)) { - const filtered = symbols.astNodes.filter((n) => n.kind !== 'call'); - if (filtered.length === 0) { - symbols.astNodes = undefined; - } else { - symbols.astNodes = filtered; - } - } - + // AST-store visitor (call kind already filtered in runAnalyses upfront) let astVisitor: Visitor | null = null; const astTypeMap = AST_TYPE_MAPS.get(langId); if (doAst && astTypeMap && WALK_EXTENSIONS.has(ext) && !Array.isArray(symbols.astNodes)) { @@ -430,6 +421,17 @@ export async function runAnalyses( if (!doAst && !doComplexity && !doCfg && !doDataflow) return timing; + // Strip dead 'call' kind from native astNodes upfront. Call AST nodes are no + // longer extracted by the WASM visitor; native binaries still emit them until + // the Rust extractors are updated (see #701). Clear the array when only calls + // remain so the WASM visitor runs and extracts non-call kinds. + for (const [, symbols] of fileSymbols) { + if (Array.isArray(symbols.astNodes)) { + const filtered = symbols.astNodes.filter((n) => n.kind !== 'call'); + symbols.astNodes = filtered.length > 0 ? (filtered as typeof symbols.astNodes) : undefined; + } + } + const extToLang = buildExtToLangMap(); // WASM pre-parse for files that need it diff --git a/src/features/ast.ts b/src/features/ast.ts index ea2be11a..a4ca4c25 100644 --- a/src/features/ast.ts +++ b/src/features/ast.ts @@ -163,7 +163,6 @@ export async function buildAstNodes( // Call AST nodes were removed — 'call' kind entries in ast_nodes are dead // (never queried by any feature or command). symbols.calls are still used // for call *edges* but no longer written to ast_nodes. - const nativeProvidedAstNodes = Array.isArray(symbols.astNodes); if (Array.isArray(symbols.astNodes)) { // Native engine provided AST nodes (may be empty for files with no AST content).