diff --git a/src/ast-analysis/engine.ts b/src/ast-analysis/engine.ts index a8fb2e44..266653a9 100644 --- a/src/ast-analysis/engine.ts +++ b/src/ast-analysis/engine.ts @@ -427,17 +427,6 @@ export async function runAnalyses( if (!doAst && !doComplexity && !doCfg && !doDataflow) return timing; - // Strip dead 'call' kind from native astNodes upfront. Call AST nodes are no - // longer extracted by the WASM visitor; native binaries still emit them until - // the Rust extractors are updated (see #701). Clear the array when only calls - // remain so the WASM visitor runs and extracts non-call kinds. - for (const [, symbols] of fileSymbols) { - if (Array.isArray(symbols.astNodes)) { - const filtered = symbols.astNodes.filter((n) => n.kind !== 'call'); - symbols.astNodes = filtered.length > 0 ? (filtered as typeof symbols.astNodes) : undefined; - } - } - const extToLang = buildExtToLangMap(); // WASM pre-parse for files that need it diff --git a/src/ast-analysis/rules/javascript.ts b/src/ast-analysis/rules/javascript.ts index 8140abc4..b4cec274 100644 --- a/src/ast-analysis/rules/javascript.ts +++ b/src/ast-analysis/rules/javascript.ts @@ -237,6 +237,7 @@ export const dataflow: DataflowRulesConfig = makeDataflowRules({ // ─── AST Node Types ─────────────────────────────────────────────────────── export const astTypes: Record | null = { + call_expression: 'call', new_expression: 'new', throw_statement: 'throw', await_expression: 'await', diff --git a/src/ast-analysis/visitors/ast-store-visitor.ts b/src/ast-analysis/visitors/ast-store-visitor.ts index 8f173313..82d8748f 100644 --- a/src/ast-analysis/visitors/ast-store-visitor.ts +++ b/src/ast-analysis/visitors/ast-store-visitor.ts @@ -14,7 +14,7 @@ interface AstStoreRow { kind: string; name: string | null | undefined; text: string | null; - receiver: null; + receiver: string | null; parentNodeId: number | null; } @@ -44,6 +44,22 @@ function extractExpressionText(node: TreeSitterNode): string | null { return truncate(node.text); } +function extractCallName(node: TreeSitterNode): string { + for (const field of ['function', 'method', 'name']) { + const fn = node.childForFieldName(field); + if (fn) return fn.text; + } + return node.text?.split('(')[0] || '?'; +} + +/** Extract receiver for call expressions (e.g. "obj" in "obj.method()"). */ +function extractCallReceiver(node: TreeSitterNode): string | null { + const fn = node.childForFieldName('function'); + if (!fn || fn.type !== 'member_expression') return null; + const obj = fn.childForFieldName('object'); + return obj ? obj.text : null; +} + function extractName(kind: string, node: TreeSitterNode): string | null { if (kind === 'throw') { for (let i = 0; i < node.childCount; i++) { @@ -82,6 +98,7 @@ export function createAstStoreVisitor( nodeIdMap: Map, ): Visitor { const rows: AstStoreRow[] = []; + const matched = new Set(); function findParentDef(line: number): Definition | null { let best: Definition | null = null; @@ -101,45 +118,115 @@ export function createAstStoreVisitor( return nodeIdMap.get(`${parentDef.name}|${parentDef.kind}|${parentDef.line}`) || null; } + /** Recursively walk a subtree collecting AST nodes — used for arguments-only traversal. */ + function walkSubtree(node: TreeSitterNode | null): void { + if (!node) return; + if (matched.has(node.id)) return; + + const kind = astTypeMap[node.type]; + if (kind === 'call') { + // Capture this call and recurse only into its arguments + collectNode(node, kind); + walkCallArguments(node); + return; + } + if (kind) { + collectNode(node, kind); + if (kind !== 'string' && kind !== 'regex') return; // skipChildren for non-leaf kinds + } + for (let i = 0; i < node.childCount; i++) { + walkSubtree(node.child(i)); + } + } + + /** + * Recurse into only the arguments of a call node — mirrors the native engine's + * strategy that prevents double-counting nested calls in the function field + * (e.g. chained calls like `a().b()`). + */ + function walkCallArguments(callNode: TreeSitterNode): void { + // Try field-based lookup first, fall back to kind-based matching + const argsNode = + callNode.childForFieldName('arguments') ?? + findChildByKind(callNode, ['arguments', 'argument_list', 'method_arguments']); + if (!argsNode) return; + for (let i = 0; i < argsNode.childCount; i++) { + walkSubtree(argsNode.child(i)); + } + } + + function findChildByKind(node: TreeSitterNode, kinds: string[]): TreeSitterNode | null { + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child && kinds.includes(child.type)) return child; + } + return null; + } + + function collectNode(node: TreeSitterNode, kind: string): void { + if (matched.has(node.id)) return; + + const line = node.startPosition.row + 1; + let name: string | null | undefined; + let text: string | null = null; + let receiver: string | null = null; + + if (kind === 'call') { + name = extractCallName(node); + text = truncate(node.text); + receiver = extractCallReceiver(node); + } else if (kind === 'new') { + name = extractNewName(node); + text = truncate(node.text); + } else if (kind === 'throw') { + name = extractName('throw', node); + text = extractExpressionText(node); + } else if (kind === 'await') { + name = extractName('await', node); + text = extractExpressionText(node); + } else if (kind === 'string') { + const content = node.text?.replace(/^['"`]|['"`]$/g, '') || ''; + if (content.length < 2) return; + name = truncate(content, 100); + text = truncate(node.text); + } else if (kind === 'regex') { + name = node.text || '?'; + text = truncate(node.text); + } + + rows.push({ + file: relPath, + line, + kind, + name, + text, + receiver, + parentNodeId: resolveParentNodeId(line), + }); + + matched.add(node.id); + } + return { name: 'ast-store', enterNode(node: TreeSitterNode, _context: VisitorContext): EnterNodeResult | undefined { + // Guard: skip re-collection but do NOT skipChildren — node.id (memory address) + // can be reused by tree-sitter, so a collision would incorrectly suppress an + // unrelated subtree. The parent call's skipChildren handles the intended case. + if (matched.has(node.id)) return; + const kind = astTypeMap[node.type]; if (!kind) return; - const line = node.startPosition.row + 1; - let name: string | null | undefined; - let text: string | null = null; - - if (kind === 'new') { - name = extractNewName(node); - text = truncate(node.text); - } else if (kind === 'throw') { - name = extractName('throw', node); - text = extractExpressionText(node); - } else if (kind === 'await') { - name = extractName('await', node); - text = extractExpressionText(node); - } else if (kind === 'string') { - const content = node.text?.replace(/^['"`]|['"`]$/g, '') || ''; - if (content.length < 2) return; - name = truncate(content, 100); - text = truncate(node.text); - } else if (kind === 'regex') { - name = node.text || '?'; - text = truncate(node.text); - } + collectNode(node, kind); - rows.push({ - file: relPath, - line, - kind, - name, - text, - receiver: null, - parentNodeId: resolveParentNodeId(line), - }); + if (kind === 'call') { + // Mirror native: skip full subtree, recurse only into arguments. + // Prevents double-counting chained calls like service.getUser().getName(). + walkCallArguments(node); + return { skipChildren: true }; + } if (kind !== 'string' && kind !== 'regex') { return { skipChildren: true }; diff --git a/src/domain/graph/builder/pipeline.ts b/src/domain/graph/builder/pipeline.ts index 376460d7..26cd2f86 100644 --- a/src/domain/graph/builder/pipeline.ts +++ b/src/domain/graph/builder/pipeline.ts @@ -185,6 +185,11 @@ async function runPipelineStages(ctx: PipelineContext): Promise { /* ignore close errors */ } ctx.nativeDb = undefined; + // Also clear stale reference in engineOpts to prevent stages from + // calling methods on the closed NativeDatabase. + if (ctx.engineOpts?.nativeDb) { + ctx.engineOpts.nativeDb = undefined; + } } await collectFiles(ctx); diff --git a/src/domain/graph/builder/stages/build-edges.ts b/src/domain/graph/builder/stages/build-edges.ts index cc44207b..ea6dff0f 100644 --- a/src/domain/graph/builder/stages/build-edges.ts +++ b/src/domain/graph/builder/stages/build-edges.ts @@ -6,9 +6,9 @@ */ import path from 'node:path'; import { performance } from 'node:perf_hooks'; -import { getNodeId } from '#db/index.js'; -import { debug } from '#infrastructure/logger.js'; -import { loadNative } from '#infrastructure/native.js'; +import { getNodeId } from '../../../../db/index.js'; +import { debug } from '../../../../infrastructure/logger.js'; +import { loadNative } from '../../../../infrastructure/native.js'; import type { BetterSqlite3Database, Call, @@ -18,7 +18,7 @@ import type { NativeAddon, NodeRow, TypeMapEntry, -} from '#types'; +} from '../../../../types.js'; import { computeConfidence } from '../../resolve.js'; import type { PipelineContext } from '../context.js'; import { BUILTIN_RECEIVERS, batchInsertEdges } from '../helpers.js'; diff --git a/src/domain/graph/builder/stages/build-structure.ts b/src/domain/graph/builder/stages/build-structure.ts index bf634092..15a4976f 100644 --- a/src/domain/graph/builder/stages/build-structure.ts +++ b/src/domain/graph/builder/stages/build-structure.ts @@ -5,9 +5,9 @@ */ import path from 'node:path'; import { performance } from 'node:perf_hooks'; -import { debug } from '#infrastructure/logger.js'; -import { normalizePath } from '#shared/constants.js'; -import type { ExtractorOutput } from '#types'; +import { debug } from '../../../../infrastructure/logger.js'; +import { normalizePath } from '../../../../shared/constants.js'; +import type { ExtractorOutput } from '../../../../types.js'; import type { PipelineContext } from '../context.js'; import { readFileSafe } from '../helpers.js'; diff --git a/src/domain/graph/builder/stages/collect-files.ts b/src/domain/graph/builder/stages/collect-files.ts index 64567ceb..aaa658b5 100644 --- a/src/domain/graph/builder/stages/collect-files.ts +++ b/src/domain/graph/builder/stages/collect-files.ts @@ -7,8 +7,8 @@ */ import fs from 'node:fs'; import path from 'node:path'; -import { debug, info } from '#infrastructure/logger.js'; -import { normalizePath } from '#shared/constants.js'; +import { debug, info } from '../../../../infrastructure/logger.js'; +import { normalizePath } from '../../../../shared/constants.js'; import { readJournal } from '../../journal.js'; import type { PipelineContext } from '../context.js'; import { collectFiles as collectFilesUtil } from '../helpers.js'; diff --git a/src/features/ast.ts b/src/features/ast.ts index 43d1b953..f6c1973c 100644 --- a/src/features/ast.ts +++ b/src/features/ast.ts @@ -103,16 +103,14 @@ export async function buildAstNodes( for (const [relPath, symbols] of fileSymbols) { if (Array.isArray(symbols.astNodes)) { - // Filter out 'call' kind — dead AST node type, see JS fallback path comment. - const filtered = symbols.astNodes.filter((n) => n.kind !== 'call'); batches.push({ file: relPath, - nodes: filtered.map((n) => ({ + nodes: symbols.astNodes.map((n) => ({ line: n.line, kind: n.kind, name: n.name, text: n.text, - receiver: n.receiver, + receiver: n.receiver ?? '', })), }); } else if (symbols.calls || symbols._tree) { @@ -168,16 +166,9 @@ export async function buildAstNodes( nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id); } - // Call AST nodes were removed — 'call' kind entries in ast_nodes are dead - // (never queried by any feature or command). symbols.calls are still used - // for call *edges* but no longer written to ast_nodes. - if (Array.isArray(symbols.astNodes)) { - // Native engine provided AST nodes (may be empty for files with no AST content). - // Filter out 'call' kind — call AST nodes are dead (never queried by any feature). - // The WASM visitor no longer extracts them; native binaries still emit them until - // the next Rust release strips them from the extractor. - for (const n of symbols.astNodes.filter((n) => n.kind !== 'call')) { + // Native engine provided AST nodes (may be empty for files with no AST content) + for (const n of symbols.astNodes) { const parentDef = findParentDef(defs, n.line); let parentNodeId: number | null = null; if (parentDef) { diff --git a/tests/integration/build-parity.test.ts b/tests/integration/build-parity.test.ts index e1d95706..e6febfa5 100644 --- a/tests/integration/build-parity.test.ts +++ b/tests/integration/build-parity.test.ts @@ -114,6 +114,18 @@ describeOrSkip('Build parity: native vs WASM', () => { it('produces identical ast_nodes', () => { const wasmGraph = readGraph(path.join(wasmDir, '.codegraph', 'graph.db')); const nativeGraph = readGraph(path.join(nativeDir, '.codegraph', 'graph.db')); + // Diagnostic: log counts to help debug CI-only parity failures + if (nativeGraph.astNodes.length !== wasmGraph.astNodes.length) { + console.error( + `[parity-diag] native astNodes: ${nativeGraph.astNodes.length}, wasm astNodes: ${wasmGraph.astNodes.length}`, + ); + console.error( + `[parity-diag] native kinds: ${JSON.stringify([...new Set((nativeGraph.astNodes as any[]).map((n: any) => n.kind))])}`, + ); + console.error( + `[parity-diag] wasm kinds: ${JSON.stringify([...new Set((wasmGraph.astNodes as any[]).map((n: any) => n.kind))])}`, + ); + } expect(nativeGraph.astNodes).toEqual(wasmGraph.astNodes); }); }); diff --git a/tests/parsers/ast-nodes.test.ts b/tests/parsers/ast-nodes.test.ts index 5e26a77d..ca3c27cb 100644 --- a/tests/parsers/ast-nodes.test.ts +++ b/tests/parsers/ast-nodes.test.ts @@ -100,9 +100,15 @@ function queryAllAstNodes() { // ─── Tests ──────────────────────────────────────────────────────────── describe('buildAstNodes — JS extraction', () => { - test('call kind AST nodes are no longer stored (dead code removed)', () => { + test('captures call_expression as kind:call', () => { const calls = queryAstNodes('call'); - expect(calls.length).toBe(0); + // eval(input), result.set('data', data), console.log(result) + // Note: fetch('/api/data') is inside await — captured as kind:await, not kind:call + expect(calls.length).toBe(3); + const names = calls.map((n) => n.name); + expect(names).toContain('eval'); + expect(names).toContain('result.set'); + expect(names).toContain('console.log'); }); test('captures new_expression as kind:new', () => {