From b1ab4df3a55b839f00a50749246e0aa8456d22e1 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 25 Mar 2026 01:05:56 -0600 Subject: [PATCH 01/10] feat(native): extract call AST nodes in Rust and fix WASM fallback detection Add call_expression extraction to the native Rust AST walker for all 11 languages, producing kind="call" nodes with receiver info in ast_nodes. Previously call nodes only came from the separate symbols.calls field. Fix empty-array detection bug: symbols.astNodes?.length is falsy for [], causing spurious WASM re-walks even when native extraction ran. Changed to Array.isArray(symbols.astNodes) in both engine.ts and ast.ts. When native astNodes includes call entries, skip separate symbols.calls processing to avoid duplication (backward-compatible with older binaries). --- .../codegraph-core/src/extractors/helpers.rs | 59 +++++ .../src/extractors/javascript.rs | 52 +++- src/ast-analysis/engine.ts | 2 +- src/features/ast.ts | 10 +- tests/engines/ast-parity.test.ts | 244 ++++++++++++++++++ 5 files changed, 363 insertions(+), 4 deletions(-) create mode 100644 tests/engines/ast-parity.test.ts diff --git a/crates/codegraph-core/src/extractors/helpers.rs b/crates/codegraph-core/src/extractors/helpers.rs index a9ac2524..3690925b 100644 --- a/crates/codegraph-core/src/extractors/helpers.rs +++ b/crates/codegraph-core/src/extractors/helpers.rs @@ -100,6 +100,8 @@ pub const AST_TEXT_MAX: usize = 200; /// Language-specific AST node type configuration. pub struct LangAstConfig { + /// Node types mapping to `"call"` kind (e.g. `call_expression`, `method_invocation`) + pub call_types: &'static [&'static str], /// Node types mapping to `"new"` kind (e.g. `new_expression`, `object_creation_expression`) pub new_types: &'static [&'static str], /// Node types mapping to `"throw"` kind (e.g. `throw_statement`, `raise_statement`) @@ -120,6 +122,7 @@ pub struct LangAstConfig { // ── Per-language configs ───────────────────────────────────────────────────── pub const PYTHON_AST_CONFIG: LangAstConfig = LangAstConfig { + call_types: &["call"], new_types: &[], throw_types: &["raise_statement"], await_types: &["await"], @@ -130,6 +133,7 @@ pub const PYTHON_AST_CONFIG: LangAstConfig = LangAstConfig { }; pub const GO_AST_CONFIG: LangAstConfig = LangAstConfig { + call_types: &["call_expression"], new_types: &[], throw_types: &[], await_types: &[], @@ -140,6 +144,7 @@ pub const GO_AST_CONFIG: LangAstConfig = LangAstConfig { }; pub const RUST_AST_CONFIG: LangAstConfig = LangAstConfig { + call_types: &["call_expression"], new_types: &[], throw_types: &[], await_types: &["await_expression"], @@ -150,6 +155,7 @@ pub const RUST_AST_CONFIG: LangAstConfig = LangAstConfig { }; pub const JAVA_AST_CONFIG: LangAstConfig = LangAstConfig { + call_types: &["method_invocation"], new_types: &["object_creation_expression"], throw_types: &["throw_statement"], await_types: &[], @@ -160,6 +166,7 @@ pub const JAVA_AST_CONFIG: LangAstConfig = LangAstConfig { }; pub const CSHARP_AST_CONFIG: LangAstConfig = LangAstConfig { + call_types: &["invocation_expression"], new_types: &["object_creation_expression"], throw_types: &["throw_statement", "throw_expression"], await_types: &["await_expression"], @@ -170,6 +177,7 @@ pub const CSHARP_AST_CONFIG: LangAstConfig = LangAstConfig { }; pub const RUBY_AST_CONFIG: LangAstConfig = LangAstConfig { + call_types: &["call", "method_call"], new_types: &[], throw_types: &[], await_types: &[], @@ -180,6 +188,7 @@ pub const RUBY_AST_CONFIG: LangAstConfig = LangAstConfig { }; pub const PHP_AST_CONFIG: LangAstConfig = LangAstConfig { + call_types: &["function_call_expression", "member_call_expression", "scoped_call_expression"], new_types: &["object_creation_expression"], throw_types: &["throw_expression"], await_types: &[], @@ -229,6 +238,36 @@ fn walk_ast_nodes_with_config_depth( } let kind = node.kind(); + // Call extraction — checked first since calls are the most common AST node kind. + // Do NOT recurse children: prevents double-counting nested calls like `a(b())`. + if config.call_types.contains(&kind) { + let name = extract_call_name(node, source); + let receiver = extract_call_receiver(node, source); + let text = truncate(node_text(node, source), AST_TEXT_MAX); + ast_nodes.push(AstNode { + kind: "call".to_string(), + name, + line: start_line(node), + text: Some(text), + receiver, + }); + // Recurse into arguments only — nested calls in args should be captured. + // Find the arguments child and walk its children. + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + let ck = child.kind(); + if ck == "arguments" || ck == "argument_list" || ck == "method_arguments" { + for j in 0..child.child_count() { + if let Some(arg) = child.child(j) { + walk_ast_nodes_with_config_depth(&arg, source, ast_nodes, config, depth + 1); + } + } + } + } + } + return; + } + if config.new_types.contains(&kind) { let name = extract_constructor_name(node, source); let text = truncate(node_text(node, source), AST_TEXT_MAX); @@ -400,6 +439,26 @@ fn extract_call_name(node: &Node, source: &[u8]) -> String { text.split('(').next().unwrap_or("?").to_string() } +/// Extract receiver from a call node (e.g. `obj` from `obj.method()`). +/// Looks for a member-expression-like function child and extracts the object part. +fn extract_call_receiver(node: &Node, source: &[u8]) -> Option { + // Try "function" field first (JS/TS: call_expression -> member_expression) + // Then "object" (Go, Python), then "receiver" (Ruby) + for field in &["function", "object", "receiver"] { + if let Some(fn_node) = node.child_by_field_name(field) { + // If the function/object node is a member_expression, extract its object + if let Some(obj) = fn_node.child_by_field_name("object") { + return Some(node_text(&obj, source).to_string()); + } + // For Ruby/Go where the receiver is directly a field + if *field == "object" || *field == "receiver" { + return Some(node_text(&fn_node, source).to_string()); + } + } + } + None +} + /// Extract expression text from throw/await — skip the keyword child. fn extract_child_expression_text(node: &Node, source: &[u8]) -> Option { const KEYWORDS: &[&str] = &["throw", "raise", "await", "new"]; diff --git a/crates/codegraph-core/src/extractors/javascript.rs b/crates/codegraph-core/src/extractors/javascript.rs index 30a032b8..bf7207b1 100644 --- a/crates/codegraph-core/src/extractors/javascript.rs +++ b/crates/codegraph-core/src/extractors/javascript.rs @@ -518,7 +518,7 @@ fn walk_node_depth(node: &Node, source: &[u8], symbols: &mut FileSymbols, depth: const TEXT_MAX: usize = 200; -/// Walk the tree collecting new/throw/await/string/regex AST nodes. +/// Walk the tree collecting call/new/throw/await/string/regex AST nodes. /// Mirrors `walkAst()` in `ast.js:216-276`. fn walk_ast_nodes(node: &Node, source: &[u8], ast_nodes: &mut Vec) { walk_ast_nodes_depth(node, source, ast_nodes, 0); @@ -529,6 +529,28 @@ fn walk_ast_nodes_depth(node: &Node, source: &[u8], ast_nodes: &mut Vec return; } match node.kind() { + "call_expression" => { + let (name, receiver) = extract_js_call_ast(node, source); + let text = truncate(node_text(node, source), TEXT_MAX); + ast_nodes.push(AstNode { + kind: "call".to_string(), + name, + line: start_line(node), + text: Some(text), + receiver, + }); + // Recurse into arguments only — nested calls in args should be captured. + if let Some(args) = node.child_by_field_name("arguments") + .or_else(|| find_child(node, "arguments")) + { + for i in 0..args.child_count() { + if let Some(arg) = args.child(i) { + walk_ast_nodes_depth(&arg, source, ast_nodes, depth + 1); + } + } + } + return; + } "new_expression" => { let name = extract_new_name(node, source); let text = truncate(node_text(node, source), TEXT_MAX); @@ -698,6 +720,34 @@ fn extract_expression_text(node: &Node, source: &[u8]) -> Option { Some(truncate(node_text(node, source), TEXT_MAX)) } +/// Extract call name and optional receiver from a JS/TS `call_expression`. +/// `fetch()` → ("fetch", None); `obj.method()` → ("obj.method", Some("obj")) +fn extract_js_call_ast(node: &Node, source: &[u8]) -> (String, Option) { + if let Some(fn_node) = node.child_by_field_name("function") { + match fn_node.kind() { + "member_expression" => { + let name = node_text(&fn_node, source).to_string(); + let receiver = fn_node.child_by_field_name("object") + .map(|obj| node_text(&obj, source).to_string()); + (name, receiver) + } + "identifier" => { + (node_text(&fn_node, source).to_string(), None) + } + _ => { + // Computed call like `fn[key]()` — use full text before `(` + let text = node_text(node, source); + let name = text.split('(').next().unwrap_or("?").to_string(); + (name, None) + } + } + } else { + let text = node_text(node, source); + let name = text.split('(').next().unwrap_or("?").to_string(); + (name, None) + } +} + // ── Extended kinds helpers ────────────────────────────────────────────────── fn extract_js_parameters(node: &Node, source: &[u8]) -> Vec { diff --git a/src/ast-analysis/engine.ts b/src/ast-analysis/engine.ts index 740df5bc..8fd73b99 100644 --- a/src/ast-analysis/engine.ts +++ b/src/ast-analysis/engine.ts @@ -172,7 +172,7 @@ function setupVisitors( // AST-store visitor let astVisitor: Visitor | null = null; const astTypeMap = AST_TYPE_MAPS.get(langId); - if (doAst && astTypeMap && WALK_EXTENSIONS.has(ext) && !symbols.astNodes?.length) { + if (doAst && astTypeMap && WALK_EXTENSIONS.has(ext) && !Array.isArray(symbols.astNodes)) { const nodeIdMap = new Map(); for (const row of bulkNodeIdsByFile(db, relPath)) { nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id); diff --git a/src/features/ast.ts b/src/features/ast.ts index fec0a3b0..90b40bbb 100644 --- a/src/features/ast.ts +++ b/src/features/ast.ts @@ -93,7 +93,11 @@ export async function buildAstNodes( nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id); } - if (symbols.calls) { + // When native astNodes includes call entries, skip separate symbols.calls processing + // to avoid duplication. Fall back to symbols.calls for WASM or older native binaries. + const nativeHasCalls = + Array.isArray(symbols.astNodes) && symbols.astNodes.some((n) => n.kind === 'call'); + if (symbols.calls && !nativeHasCalls) { for (const call of symbols.calls) { const parentDef = findParentDef(defs, call.line); let parentNodeId: number | null = null; @@ -113,7 +117,8 @@ export async function buildAstNodes( } } - if (symbols.astNodes?.length) { + if (Array.isArray(symbols.astNodes)) { + // Native engine provided AST nodes (may be empty for files with no AST content) for (const n of symbols.astNodes) { const parentDef = findParentDef(defs, n.line); let parentNodeId: number | null = null; @@ -132,6 +137,7 @@ export async function buildAstNodes( }); } } else { + // WASM fallback — walk tree if available const ext = path.extname(relPath).toLowerCase(); if (WALK_EXTENSIONS.has(ext) && symbols._tree) { const astRows: AstRow[] = []; diff --git a/tests/engines/ast-parity.test.ts b/tests/engines/ast-parity.test.ts new file mode 100644 index 00000000..301df35c --- /dev/null +++ b/tests/engines/ast-parity.test.ts @@ -0,0 +1,244 @@ +/** + * AST node extraction parity tests (native vs WASM). + * + * Verifies that the native Rust engine extracts identical AST nodes + * (call, new, throw, await, string, regex) to the WASM visitor for JS/TS. + * + * Skipped when the native engine is not installed. + */ + +import { beforeAll, describe, expect, it } from 'vitest'; +import { createParsers, extractSymbols, getParser } from '../../src/domain/parser.js'; +import { isNativeAvailable } from '../../src/infrastructure/native.js'; + +let native: ReturnType; +let parsers: ReturnType; + +function wasmExtract(code: string, filePath: string) { + const parser = getParser(parsers, filePath); + if (!parser) return null; + const tree = parser.parse(code); + return extractSymbols(tree, filePath); +} + +function nativeExtract(code: string, filePath: string) { + // 4th arg = include_ast_nodes = true + return native.parseFile(filePath, code, false, true); +} + +interface AstNodeLike { + kind: string; + name: string; + line: number; + text?: string; + receiver?: string; +} + +/** Normalize AST nodes for comparison — strip text content (may differ in truncation). */ +function normalizeAstNodes(nodes: AstNodeLike[]) { + return (nodes || []) + .map((n) => ({ + kind: n.kind, + name: n.name, + line: n.line, + ...(n.receiver ? { receiver: n.receiver } : {}), + })) + .sort( + (a, b) => a.line - b.line || a.kind.localeCompare(b.kind) || a.name.localeCompare(b.name), + ); +} + +/** Count nodes per kind. */ +function countByKind(nodes: AstNodeLike[]): Record { + const counts: Record = {}; + for (const n of nodes || []) { + counts[n.kind] = (counts[n.kind] || 0) + 1; + } + return counts; +} + +// ─── Test snippets ────────────────────────────────────────────────────── + +const JS_SNIPPET = ` +import fs from 'fs'; +import path from 'path'; + +class MyError extends Error { + constructor(msg) { + super(msg); + } +} + +function greet(name) { + console.log("Hello " + name); + const result = fetch("/api/users"); + return result; +} + +async function loadData(url) { + const resp = await fetch(url); + const data = await resp.json(); + if (!data) { + throw new MyError("no data"); + } + return data; +} + +const pattern = /^[a-z]+$/i; +const obj = new Map(); +const value = "some string value"; +`; + +const TS_SNIPPET = ` +interface Config { + apiUrl: string; + timeout: number; +} + +async function request(config: Config): Promise { + const url = config.apiUrl; + const resp = await fetch(url, { signal: AbortSignal.timeout(config.timeout) }); + if (!resp.ok) { + throw new Error(\`HTTP \${resp.status}\`); + } + return resp.json(); +} + +function processItems(items: string[]): void { + items.forEach(item => { + console.log(item); + item.trim().toLowerCase(); + }); +} +`; + +const MULTI_CALL_SNIPPET = ` +function nested() { + const a = foo(bar(baz())); + const b = obj.method(helper()); + console.log("test"); +} +`; + +describe('AST node parity (native vs WASM)', () => { + beforeAll(async () => { + if (!isNativeAvailable()) return; + const mod = await import('../../src/infrastructure/native.js'); + native = (mod as Record).loadNative; + if (typeof native === 'function') native = native(); + parsers = await createParsers(); + }); + + it.skipIf(!isNativeAvailable())('JS: same AST node kinds and counts', () => { + const wasmResult = wasmExtract(JS_SNIPPET, '/test/sample.js'); + const nativeResult = nativeExtract(JS_SNIPPET, '/test/sample.js'); + + expect(wasmResult).toBeTruthy(); + expect(nativeResult).toBeTruthy(); + + // Native now includes call nodes in astNodes; WASM doesn't (calls are separate). + // Compare non-call AST nodes for exact parity. + const wasmNodes = normalizeAstNodes( + (wasmResult?.astNodes || []).filter((n: AstNodeLike) => n.kind !== 'call'), + ); + const nativeNodes = normalizeAstNodes( + (nativeResult.astNodes || nativeResult.ast_nodes || []).filter( + (n: AstNodeLike) => n.kind !== 'call', + ), + ); + + expect(nativeNodes).toEqual(wasmNodes); + }); + + it.skipIf(!isNativeAvailable())('JS: native astNodes includes call kind', () => { + const nativeResult = nativeExtract(JS_SNIPPET, '/test/sample.js'); + const astNodes = nativeResult.astNodes || nativeResult.ast_nodes || []; + const callNodes = astNodes.filter((n: AstNodeLike) => n.kind === 'call'); + + // JS snippet has: console.log, fetch (×2), resp.json, Map (new is separate) + expect(callNodes.length).toBeGreaterThan(0); + + // Verify call nodes have expected structure + for (const node of callNodes) { + expect(node.kind).toBe('call'); + expect(typeof node.name).toBe('string'); + expect(typeof node.line).toBe('number'); + } + }); + + it.skipIf(!isNativeAvailable())('JS: call receiver extraction', () => { + const nativeResult = nativeExtract(JS_SNIPPET, '/test/sample.js'); + const astNodes = nativeResult.astNodes || nativeResult.ast_nodes || []; + const callNodes = astNodes.filter((n: AstNodeLike) => n.kind === 'call'); + + // console.log() should have receiver "console" + const consoleLog = callNodes.find((n: AstNodeLike) => n.name === 'console.log'); + expect(consoleLog).toBeTruthy(); + expect(consoleLog.receiver).toBe('console'); + + // fetch() should have no receiver + const fetchCall = callNodes.find((n: AstNodeLike) => n.name === 'fetch'); + expect(fetchCall).toBeTruthy(); + expect(fetchCall.receiver).toBeFalsy(); + }); + + it.skipIf(!isNativeAvailable())('TS: same non-call AST node counts', () => { + const wasmResult = wasmExtract(TS_SNIPPET, '/test/sample.ts'); + const nativeResult = nativeExtract(TS_SNIPPET, '/test/sample.ts'); + + expect(wasmResult).toBeTruthy(); + expect(nativeResult).toBeTruthy(); + + const wasmCounts = countByKind( + (wasmResult?.astNodes || []).filter((n: AstNodeLike) => n.kind !== 'call'), + ); + const nativeCounts = countByKind( + (nativeResult.astNodes || nativeResult.ast_nodes || []).filter( + (n: AstNodeLike) => n.kind !== 'call', + ), + ); + + expect(nativeCounts).toEqual(wasmCounts); + }); + + it.skipIf(!isNativeAvailable())('JS: nested calls are not double-counted', () => { + const nativeResult = nativeExtract(MULTI_CALL_SNIPPET, '/test/nested.js'); + const astNodes = nativeResult.astNodes || nativeResult.ast_nodes || []; + const callNodes = astNodes.filter((n: AstNodeLike) => n.kind === 'call'); + + // foo(bar(baz())) should produce 3 separate call nodes + const names = callNodes.map((n: AstNodeLike) => n.name).sort(); + expect(names).toContain('foo'); + expect(names).toContain('bar'); + expect(names).toContain('baz'); + expect(names).toContain('console.log'); + expect(names).toContain('obj.method'); + expect(names).toContain('helper'); + + // No duplicate lines for the nested chain + const fooLine = callNodes.find((n: AstNodeLike) => n.name === 'foo')?.line; + const barLine = callNodes.find((n: AstNodeLike) => n.name === 'bar')?.line; + const bazLine = callNodes.find((n: AstNodeLike) => n.name === 'baz')?.line; + // All on the same line but each as separate nodes + expect(fooLine).toBe(barLine); + expect(barLine).toBe(bazLine); + }); + + it.skipIf(!isNativeAvailable())('JS: native calls match legacy calls field count', () => { + const nativeResult = nativeExtract(JS_SNIPPET, '/test/sample.js'); + const astNodes = nativeResult.astNodes || nativeResult.ast_nodes || []; + const nativeCallNodes = astNodes.filter((n: AstNodeLike) => n.kind === 'call'); + const legacyCalls = nativeResult.calls || []; + + // Native ast_nodes call count should match the legacy calls field + expect(nativeCallNodes.length).toBe(legacyCalls.length); + }); + + it.skipIf(!isNativeAvailable())('empty file returns empty astNodes array (not undefined)', () => { + const nativeResult = nativeExtract('// empty file\n', '/test/empty.js'); + const astNodes = nativeResult.astNodes || nativeResult.ast_nodes; + + // Should be an array (possibly empty), not undefined + expect(Array.isArray(astNodes)).toBe(true); + }); +}); From 06116cba2dd492c994a9a6ce9a2c7d559865b342 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 25 Mar 2026 11:07:25 -0600 Subject: [PATCH 02/10] fix(native): address Rust, Go, and await extraction issues - Add method_call_expression to RUST_AST_CONFIG.call_types so idiomatic Rust method calls (e.g. vec.push(x)) are captured by native extraction. - Fix extract_call_receiver to handle Go's selector_expression which uses operand field instead of object. - Add early return after await node in generic walker to prevent double-counting awaited calls (consistent with JS-specific walker). --- crates/codegraph-core/src/extractors/helpers.rs | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/crates/codegraph-core/src/extractors/helpers.rs b/crates/codegraph-core/src/extractors/helpers.rs index 3690925b..d15fa493 100644 --- a/crates/codegraph-core/src/extractors/helpers.rs +++ b/crates/codegraph-core/src/extractors/helpers.rs @@ -144,7 +144,7 @@ pub const GO_AST_CONFIG: LangAstConfig = LangAstConfig { }; pub const RUST_AST_CONFIG: LangAstConfig = LangAstConfig { - call_types: &["call_expression"], + call_types: &["call_expression", "method_call_expression"], new_types: &[], throw_types: &[], await_types: &["await_expression"], @@ -300,7 +300,9 @@ fn walk_ast_nodes_with_config_depth( text, receiver: None, }); - // Fall through to recurse children + // Don't fall through — prevents double-counting the awaited call + // (consistent with the JS-specific walker which returns early) + return; } else if config.string_types.contains(&kind) { let raw = node_text(node, source); let is_raw_string = kind.contains("raw_string"); @@ -446,10 +448,16 @@ fn extract_call_receiver(node: &Node, source: &[u8]) -> Option { // Then "object" (Go, Python), then "receiver" (Ruby) for field in &["function", "object", "receiver"] { if let Some(fn_node) = node.child_by_field_name(field) { - // If the function/object node is a member_expression, extract its object + // JS/TS/Python: member_expression / attribute with "object" field if let Some(obj) = fn_node.child_by_field_name("object") { return Some(node_text(&obj, source).to_string()); } + // Go: selector_expression uses "operand" not "object" + if fn_node.kind() == "selector_expression" { + if let Some(operand) = fn_node.child_by_field_name("operand") { + return Some(node_text(&operand, source).to_string()); + } + } // For Ruby/Go where the receiver is directly a field if *field == "object" || *field == "receiver" { return Some(node_text(&fn_node, source).to_string()); From 74e8941b828aa4e5282f0988cf358dd12be1b8a2 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 25 Mar 2026 11:07:42 -0600 Subject: [PATCH 03/10] fix(ast): use Array.isArray guard for nativeHasCalls deduplication When native provides astNodes (even empty), trust it exclusively and skip symbols.calls to avoid silent drops. Older binaries without astNodes fall back to symbols.calls as before. --- src/features/ast.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/features/ast.ts b/src/features/ast.ts index 90b40bbb..baab1d71 100644 --- a/src/features/ast.ts +++ b/src/features/ast.ts @@ -95,8 +95,7 @@ export async function buildAstNodes( // When native astNodes includes call entries, skip separate symbols.calls processing // to avoid duplication. Fall back to symbols.calls for WASM or older native binaries. - const nativeHasCalls = - Array.isArray(symbols.astNodes) && symbols.astNodes.some((n) => n.kind === 'call'); + const nativeHasCalls = Array.isArray(symbols.astNodes); if (symbols.calls && !nativeHasCalls) { for (const call of symbols.calls) { const parentDef = findParentDef(defs, call.line); From bb675018084ef07fc5d8fd65056e199e098dbf19 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 25 Mar 2026 11:07:56 -0600 Subject: [PATCH 04/10] fix(test): make ast-parity tests resilient to older native binaries - Use getNative() instead of unsafe module cast for type safety. - Probe native binary for call AST support; skip call-specific tests when running against published binaries without call extraction. - Replace broken WASM comparison (extractSymbols doesn't produce astNodes) with independent validation of native output. - Remove unused WASM helpers and imports. --- tests/engines/ast-parity.test.ts | 218 ++++++++++++++----------------- 1 file changed, 100 insertions(+), 118 deletions(-) diff --git a/tests/engines/ast-parity.test.ts b/tests/engines/ast-parity.test.ts index 301df35c..bb971351 100644 --- a/tests/engines/ast-parity.test.ts +++ b/tests/engines/ast-parity.test.ts @@ -8,23 +8,8 @@ */ import { beforeAll, describe, expect, it } from 'vitest'; -import { createParsers, extractSymbols, getParser } from '../../src/domain/parser.js'; -import { isNativeAvailable } from '../../src/infrastructure/native.js'; - -let native: ReturnType; -let parsers: ReturnType; - -function wasmExtract(code: string, filePath: string) { - const parser = getParser(parsers, filePath); - if (!parser) return null; - const tree = parser.parse(code); - return extractSymbols(tree, filePath); -} - -function nativeExtract(code: string, filePath: string) { - // 4th arg = include_ast_nodes = true - return native.parseFile(filePath, code, false, true); -} +import { getNative, isNativeAvailable } from '../../src/infrastructure/native.js'; +import type { NativeAddon } from '../../src/types.js'; interface AstNodeLike { kind: string; @@ -34,27 +19,20 @@ interface AstNodeLike { receiver?: string; } -/** Normalize AST nodes for comparison — strip text content (may differ in truncation). */ -function normalizeAstNodes(nodes: AstNodeLike[]) { - return (nodes || []) - .map((n) => ({ - kind: n.kind, - name: n.name, - line: n.line, - ...(n.receiver ? { receiver: n.receiver } : {}), - })) - .sort( - (a, b) => a.line - b.line || a.kind.localeCompare(b.kind) || a.name.localeCompare(b.name), - ); +interface NativeResult { + astNodes?: AstNodeLike[]; + ast_nodes?: AstNodeLike[]; + calls?: Array<{ name: string; line: number; receiver?: string; dynamic?: boolean }>; + definitions?: Array<{ name: string; kind: string; line: number }>; } -/** Count nodes per kind. */ -function countByKind(nodes: AstNodeLike[]): Record { - const counts: Record = {}; - for (const n of nodes || []) { - counts[n.kind] = (counts[n.kind] || 0) + 1; - } - return counts; +let native: NativeAddon | null = null; +/** Whether the installed native binary supports call AST nodes. */ +let nativeSupportsCallAst = false; + +function nativeExtract(code: string, filePath: string): NativeResult { + // 4th arg = include_ast_nodes = true + return native?.parseFile(filePath, code, false, true) as NativeResult; } // ─── Test snippets ────────────────────────────────────────────────────── @@ -123,116 +101,120 @@ function nested() { describe('AST node parity (native vs WASM)', () => { beforeAll(async () => { if (!isNativeAvailable()) return; - const mod = await import('../../src/infrastructure/native.js'); - native = (mod as Record).loadNative; - if (typeof native === 'function') native = native(); - parsers = await createParsers(); - }); - - it.skipIf(!isNativeAvailable())('JS: same AST node kinds and counts', () => { - const wasmResult = wasmExtract(JS_SNIPPET, '/test/sample.js'); - const nativeResult = nativeExtract(JS_SNIPPET, '/test/sample.js'); - - expect(wasmResult).toBeTruthy(); - expect(nativeResult).toBeTruthy(); - - // Native now includes call nodes in astNodes; WASM doesn't (calls are separate). - // Compare non-call AST nodes for exact parity. - const wasmNodes = normalizeAstNodes( - (wasmResult?.astNodes || []).filter((n: AstNodeLike) => n.kind !== 'call'), - ); - const nativeNodes = normalizeAstNodes( - (nativeResult.astNodes || nativeResult.ast_nodes || []).filter( - (n: AstNodeLike) => n.kind !== 'call', - ), - ); - - expect(nativeNodes).toEqual(wasmNodes); + native = getNative(); + + // Detect whether this native binary supports call AST extraction. + // Older published binaries produce astNodes but without call entries. + const probe = native.parseFile('/probe.js', 'foo();', false, true) as NativeResult | null; + if (probe) { + const astNodes = probe.astNodes || []; + nativeSupportsCallAst = astNodes.some((n: AstNodeLike) => n.kind === 'call'); + } }); - it.skipIf(!isNativeAvailable())('JS: native astNodes includes call kind', () => { + it.skipIf(!isNativeAvailable())('JS: native astNodes kinds are valid and well-formed', () => { const nativeResult = nativeExtract(JS_SNIPPET, '/test/sample.js'); - const astNodes = nativeResult.astNodes || nativeResult.ast_nodes || []; - const callNodes = astNodes.filter((n: AstNodeLike) => n.kind === 'call'); + const astNodes = nativeResult.astNodes || []; - // JS snippet has: console.log, fetch (×2), resp.json, Map (new is separate) - expect(callNodes.length).toBeGreaterThan(0); + // Native should produce some AST nodes (strings, regex, new, throw, await at minimum) + expect(astNodes.length).toBeGreaterThan(0); - // Verify call nodes have expected structure - for (const node of callNodes) { - expect(node.kind).toBe('call'); + // All nodes must have valid structure + const validKinds = new Set(['call', 'new', 'throw', 'await', 'string', 'regex']); + for (const node of astNodes) { + expect(validKinds).toContain(node.kind); expect(typeof node.name).toBe('string'); expect(typeof node.line).toBe('number'); } }); - it.skipIf(!isNativeAvailable())('JS: call receiver extraction', () => { + it.skipIf(!isNativeAvailable() || !nativeSupportsCallAst)( + 'JS: native astNodes includes call kind', + () => { + const nativeResult = nativeExtract(JS_SNIPPET, '/test/sample.js'); + const astNodes = nativeResult.astNodes || []; + const callNodes = astNodes.filter((n: AstNodeLike) => n.kind === 'call'); + + // JS snippet has: super, console.log, fetch (x2), resp.json + expect(callNodes.length).toBeGreaterThan(0); + + // Verify call nodes have expected structure + for (const node of callNodes) { + expect(node.kind).toBe('call'); + expect(typeof node.name).toBe('string'); + expect(typeof node.line).toBe('number'); + } + }, + ); + + it.skipIf(!isNativeAvailable() || !nativeSupportsCallAst)('JS: call receiver extraction', () => { const nativeResult = nativeExtract(JS_SNIPPET, '/test/sample.js'); - const astNodes = nativeResult.astNodes || nativeResult.ast_nodes || []; + const astNodes = nativeResult.astNodes || []; const callNodes = astNodes.filter((n: AstNodeLike) => n.kind === 'call'); // console.log() should have receiver "console" const consoleLog = callNodes.find((n: AstNodeLike) => n.name === 'console.log'); expect(consoleLog).toBeTruthy(); - expect(consoleLog.receiver).toBe('console'); + expect(consoleLog?.receiver).toBe('console'); // fetch() should have no receiver const fetchCall = callNodes.find((n: AstNodeLike) => n.name === 'fetch'); expect(fetchCall).toBeTruthy(); - expect(fetchCall.receiver).toBeFalsy(); + expect(fetchCall?.receiver).toBeFalsy(); }); - it.skipIf(!isNativeAvailable())('TS: same non-call AST node counts', () => { - const wasmResult = wasmExtract(TS_SNIPPET, '/test/sample.ts'); + it.skipIf(!isNativeAvailable())('TS: native produces well-formed AST nodes', () => { const nativeResult = nativeExtract(TS_SNIPPET, '/test/sample.ts'); - - expect(wasmResult).toBeTruthy(); expect(nativeResult).toBeTruthy(); - const wasmCounts = countByKind( - (wasmResult?.astNodes || []).filter((n: AstNodeLike) => n.kind !== 'call'), - ); - const nativeCounts = countByKind( - (nativeResult.astNodes || nativeResult.ast_nodes || []).filter( - (n: AstNodeLike) => n.kind !== 'call', - ), - ); - - expect(nativeCounts).toEqual(wasmCounts); - }); + const astNodes = nativeResult.astNodes || []; + expect(astNodes.length).toBeGreaterThan(0); - it.skipIf(!isNativeAvailable())('JS: nested calls are not double-counted', () => { - const nativeResult = nativeExtract(MULTI_CALL_SNIPPET, '/test/nested.js'); - const astNodes = nativeResult.astNodes || nativeResult.ast_nodes || []; - const callNodes = astNodes.filter((n: AstNodeLike) => n.kind === 'call'); - - // foo(bar(baz())) should produce 3 separate call nodes - const names = callNodes.map((n: AstNodeLike) => n.name).sort(); - expect(names).toContain('foo'); - expect(names).toContain('bar'); - expect(names).toContain('baz'); - expect(names).toContain('console.log'); - expect(names).toContain('obj.method'); - expect(names).toContain('helper'); - - // No duplicate lines for the nested chain - const fooLine = callNodes.find((n: AstNodeLike) => n.name === 'foo')?.line; - const barLine = callNodes.find((n: AstNodeLike) => n.name === 'bar')?.line; - const bazLine = callNodes.find((n: AstNodeLike) => n.name === 'baz')?.line; - // All on the same line but each as separate nodes - expect(fooLine).toBe(barLine); - expect(barLine).toBe(bazLine); + // Verify all nodes have valid kinds + const validKinds = new Set(['call', 'new', 'throw', 'await', 'string', 'regex']); + for (const node of astNodes) { + expect(validKinds).toContain(node.kind); + } }); - it.skipIf(!isNativeAvailable())('JS: native calls match legacy calls field count', () => { - const nativeResult = nativeExtract(JS_SNIPPET, '/test/sample.js'); - const astNodes = nativeResult.astNodes || nativeResult.ast_nodes || []; - const nativeCallNodes = astNodes.filter((n: AstNodeLike) => n.kind === 'call'); - const legacyCalls = nativeResult.calls || []; - - // Native ast_nodes call count should match the legacy calls field - expect(nativeCallNodes.length).toBe(legacyCalls.length); - }); + it.skipIf(!isNativeAvailable() || !nativeSupportsCallAst)( + 'JS: nested calls are not double-counted', + () => { + const nativeResult = nativeExtract(MULTI_CALL_SNIPPET, '/test/nested.js'); + const astNodes = nativeResult.astNodes || []; + const callNodes = astNodes.filter((n: AstNodeLike) => n.kind === 'call'); + + // foo(bar(baz())) should produce 3 separate call nodes + const names = callNodes.map((n: AstNodeLike) => n.name).sort(); + expect(names).toContain('foo'); + expect(names).toContain('bar'); + expect(names).toContain('baz'); + expect(names).toContain('console.log'); + expect(names).toContain('obj.method'); + expect(names).toContain('helper'); + + // No duplicate lines for the nested chain + const fooLine = callNodes.find((n: AstNodeLike) => n.name === 'foo')?.line; + const barLine = callNodes.find((n: AstNodeLike) => n.name === 'bar')?.line; + const bazLine = callNodes.find((n: AstNodeLike) => n.name === 'baz')?.line; + // All on the same line but each as separate nodes + expect(fooLine).toBe(barLine); + expect(barLine).toBe(bazLine); + }, + ); + + it.skipIf(!isNativeAvailable() || !nativeSupportsCallAst)( + 'JS: native calls match legacy calls field count', + () => { + const nativeResult = nativeExtract(JS_SNIPPET, '/test/sample.js'); + const astNodes = nativeResult.astNodes || []; + const nativeCallNodes = astNodes.filter((n: AstNodeLike) => n.kind === 'call'); + const legacyCalls = nativeResult.calls || []; + + // Native ast_nodes call count should match the legacy calls field + expect(nativeCallNodes.length).toBe(legacyCalls.length); + }, + ); it.skipIf(!isNativeAvailable())('empty file returns empty astNodes array (not undefined)', () => { const nativeResult = nativeExtract('// empty file\n', '/test/empty.js'); From 09bf16e3014193dcaeb482d59f229d4aad208967 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 25 Mar 2026 11:59:32 -0600 Subject: [PATCH 05/10] fix(test): move nativeSupportsCallAst guard inside test body and harden nativeExtract (#591) --- tests/engines/ast-parity.test.ts | 118 +++++++++++++++---------------- 1 file changed, 59 insertions(+), 59 deletions(-) diff --git a/tests/engines/ast-parity.test.ts b/tests/engines/ast-parity.test.ts index bb971351..6b9944ac 100644 --- a/tests/engines/ast-parity.test.ts +++ b/tests/engines/ast-parity.test.ts @@ -31,8 +31,9 @@ let native: NativeAddon | null = null; let nativeSupportsCallAst = false; function nativeExtract(code: string, filePath: string): NativeResult { + if (!native) throw new Error('nativeExtract called with native === null'); // 4th arg = include_ast_nodes = true - return native?.parseFile(filePath, code, false, true) as NativeResult; + return native.parseFile(filePath, code, false, true) as NativeResult; } // ─── Test snippets ────────────────────────────────────────────────────── @@ -128,26 +129,27 @@ describe('AST node parity (native vs WASM)', () => { } }); - it.skipIf(!isNativeAvailable() || !nativeSupportsCallAst)( - 'JS: native astNodes includes call kind', - () => { - const nativeResult = nativeExtract(JS_SNIPPET, '/test/sample.js'); - const astNodes = nativeResult.astNodes || []; - const callNodes = astNodes.filter((n: AstNodeLike) => n.kind === 'call'); - - // JS snippet has: super, console.log, fetch (x2), resp.json - expect(callNodes.length).toBeGreaterThan(0); - - // Verify call nodes have expected structure - for (const node of callNodes) { - expect(node.kind).toBe('call'); - expect(typeof node.name).toBe('string'); - expect(typeof node.line).toBe('number'); - } - }, - ); - - it.skipIf(!isNativeAvailable() || !nativeSupportsCallAst)('JS: call receiver extraction', () => { + it.skipIf(!isNativeAvailable())('JS: native astNodes includes call kind', () => { + if (!nativeSupportsCallAst) return; // runtime guard — set by beforeAll + + const nativeResult = nativeExtract(JS_SNIPPET, '/test/sample.js'); + const astNodes = nativeResult.astNodes || []; + const callNodes = astNodes.filter((n: AstNodeLike) => n.kind === 'call'); + + // JS snippet has: super, console.log, fetch (x2), resp.json + expect(callNodes.length).toBeGreaterThan(0); + + // Verify call nodes have expected structure + for (const node of callNodes) { + expect(node.kind).toBe('call'); + expect(typeof node.name).toBe('string'); + expect(typeof node.line).toBe('number'); + } + }); + + it.skipIf(!isNativeAvailable())('JS: call receiver extraction', () => { + if (!nativeSupportsCallAst) return; // runtime guard — set by beforeAll + const nativeResult = nativeExtract(JS_SNIPPET, '/test/sample.js'); const astNodes = nativeResult.astNodes || []; const callNodes = astNodes.filter((n: AstNodeLike) => n.kind === 'call'); @@ -177,44 +179,42 @@ describe('AST node parity (native vs WASM)', () => { } }); - it.skipIf(!isNativeAvailable() || !nativeSupportsCallAst)( - 'JS: nested calls are not double-counted', - () => { - const nativeResult = nativeExtract(MULTI_CALL_SNIPPET, '/test/nested.js'); - const astNodes = nativeResult.astNodes || []; - const callNodes = astNodes.filter((n: AstNodeLike) => n.kind === 'call'); - - // foo(bar(baz())) should produce 3 separate call nodes - const names = callNodes.map((n: AstNodeLike) => n.name).sort(); - expect(names).toContain('foo'); - expect(names).toContain('bar'); - expect(names).toContain('baz'); - expect(names).toContain('console.log'); - expect(names).toContain('obj.method'); - expect(names).toContain('helper'); - - // No duplicate lines for the nested chain - const fooLine = callNodes.find((n: AstNodeLike) => n.name === 'foo')?.line; - const barLine = callNodes.find((n: AstNodeLike) => n.name === 'bar')?.line; - const bazLine = callNodes.find((n: AstNodeLike) => n.name === 'baz')?.line; - // All on the same line but each as separate nodes - expect(fooLine).toBe(barLine); - expect(barLine).toBe(bazLine); - }, - ); - - it.skipIf(!isNativeAvailable() || !nativeSupportsCallAst)( - 'JS: native calls match legacy calls field count', - () => { - const nativeResult = nativeExtract(JS_SNIPPET, '/test/sample.js'); - const astNodes = nativeResult.astNodes || []; - const nativeCallNodes = astNodes.filter((n: AstNodeLike) => n.kind === 'call'); - const legacyCalls = nativeResult.calls || []; - - // Native ast_nodes call count should match the legacy calls field - expect(nativeCallNodes.length).toBe(legacyCalls.length); - }, - ); + it.skipIf(!isNativeAvailable())('JS: nested calls are not double-counted', () => { + if (!nativeSupportsCallAst) return; // runtime guard — set by beforeAll + + const nativeResult = nativeExtract(MULTI_CALL_SNIPPET, '/test/nested.js'); + const astNodes = nativeResult.astNodes || []; + const callNodes = astNodes.filter((n: AstNodeLike) => n.kind === 'call'); + + // foo(bar(baz())) should produce 3 separate call nodes + const names = callNodes.map((n: AstNodeLike) => n.name).sort(); + expect(names).toContain('foo'); + expect(names).toContain('bar'); + expect(names).toContain('baz'); + expect(names).toContain('console.log'); + expect(names).toContain('obj.method'); + expect(names).toContain('helper'); + + // No duplicate lines for the nested chain + const fooLine = callNodes.find((n: AstNodeLike) => n.name === 'foo')?.line; + const barLine = callNodes.find((n: AstNodeLike) => n.name === 'bar')?.line; + const bazLine = callNodes.find((n: AstNodeLike) => n.name === 'baz')?.line; + // All on the same line but each as separate nodes + expect(fooLine).toBe(barLine); + expect(barLine).toBe(bazLine); + }); + + it.skipIf(!isNativeAvailable())('JS: native calls match legacy calls field count', () => { + if (!nativeSupportsCallAst) return; // runtime guard — set by beforeAll + + const nativeResult = nativeExtract(JS_SNIPPET, '/test/sample.js'); + const astNodes = nativeResult.astNodes || []; + const nativeCallNodes = astNodes.filter((n: AstNodeLike) => n.kind === 'call'); + const legacyCalls = nativeResult.calls || []; + + // Native ast_nodes call count should match the legacy calls field + expect(nativeCallNodes.length).toBe(legacyCalls.length); + }); it.skipIf(!isNativeAvailable())('empty file returns empty astNodes array (not undefined)', () => { const nativeResult = nativeExtract('// empty file\n', '/test/empty.js'); From 23ad19ce20b0debc2803bddc346c653cc6ba7912 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 25 Mar 2026 11:59:52 -0600 Subject: [PATCH 06/10] fix(native): add PHP scoped_call_expression scope field to extract_call_receiver (#591) Impact: 1 functions changed, 9 affected --- crates/codegraph-core/src/extractors/helpers.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/codegraph-core/src/extractors/helpers.rs b/crates/codegraph-core/src/extractors/helpers.rs index d15fa493..09515e03 100644 --- a/crates/codegraph-core/src/extractors/helpers.rs +++ b/crates/codegraph-core/src/extractors/helpers.rs @@ -444,6 +444,10 @@ fn extract_call_name(node: &Node, source: &[u8]) -> String { /// Extract receiver from a call node (e.g. `obj` from `obj.method()`). /// Looks for a member-expression-like function child and extracts the object part. fn extract_call_receiver(node: &Node, source: &[u8]) -> Option { + // PHP: scoped_call_expression — receiver is the "scope" field (e.g. MyClass in MyClass::method()) + if let Some(scope) = node.child_by_field_name("scope") { + return Some(node_text(&scope, source).to_string()); + } // Try "function" field first (JS/TS: call_expression -> member_expression) // Then "object" (Go, Python), then "receiver" (Ruby) for field in &["function", "object", "receiver"] { From 036db50d02e26afb2de5e57c9df14a8d5311fba2 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 25 Mar 2026 12:56:37 -0600 Subject: [PATCH 07/10] fix(native): add C# member_access_expression handling to extract_call_receiver (#591) Impact: 1 functions changed, 9 affected --- crates/codegraph-core/src/extractors/helpers.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/crates/codegraph-core/src/extractors/helpers.rs b/crates/codegraph-core/src/extractors/helpers.rs index 09515e03..29a2cc6e 100644 --- a/crates/codegraph-core/src/extractors/helpers.rs +++ b/crates/codegraph-core/src/extractors/helpers.rs @@ -462,6 +462,12 @@ fn extract_call_receiver(node: &Node, source: &[u8]) -> Option { return Some(node_text(&operand, source).to_string()); } } + // C#: member_access_expression uses "expression" not "object" + if fn_node.kind() == "member_access_expression" { + if let Some(expr) = fn_node.child_by_field_name("expression") { + return Some(node_text(&expr, source).to_string()); + } + } // For Ruby/Go where the receiver is directly a field if *field == "object" || *field == "receiver" { return Some(node_text(&fn_node, source).to_string()); From df4a5a55bd24a29429faf0c00f5eb8b4264f3a82 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 25 Mar 2026 12:56:50 -0600 Subject: [PATCH 08/10] fix(ast): rename nativeHasCalls to nativeProvidedAstNodes for clarity (#591) Impact: 1 functions changed, 2 affected --- src/features/ast.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/features/ast.ts b/src/features/ast.ts index baab1d71..55307fa0 100644 --- a/src/features/ast.ts +++ b/src/features/ast.ts @@ -95,8 +95,8 @@ export async function buildAstNodes( // When native astNodes includes call entries, skip separate symbols.calls processing // to avoid duplication. Fall back to symbols.calls for WASM or older native binaries. - const nativeHasCalls = Array.isArray(symbols.astNodes); - if (symbols.calls && !nativeHasCalls) { + const nativeProvidedAstNodes = Array.isArray(symbols.astNodes); + if (symbols.calls && !nativeProvidedAstNodes) { for (const call of symbols.calls) { const parentDef = findParentDef(defs, call.line); let parentNodeId: number | null = null; From 6bd2abe34399cee639480450666b67311dc797d8 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 25 Mar 2026 12:57:03 -0600 Subject: [PATCH 09/10] fix(test): use toBeGreaterThanOrEqual for native vs legacy call count assertion (#591) --- tests/engines/ast-parity.test.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/engines/ast-parity.test.ts b/tests/engines/ast-parity.test.ts index 6b9944ac..92be177b 100644 --- a/tests/engines/ast-parity.test.ts +++ b/tests/engines/ast-parity.test.ts @@ -212,8 +212,8 @@ describe('AST node parity (native vs WASM)', () => { const nativeCallNodes = astNodes.filter((n: AstNodeLike) => n.kind === 'call'); const legacyCalls = nativeResult.calls || []; - // Native ast_nodes call count should match the legacy calls field - expect(nativeCallNodes.length).toBe(legacyCalls.length); + // Native should capture at least as many calls as the legacy field + expect(nativeCallNodes.length).toBeGreaterThanOrEqual(legacyCalls.length); }); it.skipIf(!isNativeAvailable())('empty file returns empty astNodes array (not undefined)', () => { From 172bd2e268aa4cbd6d3a67b033584f25d6f7a9ab Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 25 Mar 2026 14:53:41 -0600 Subject: [PATCH 10/10] fix(native): remove await early return and use field-name API for argument detection (#591) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the early return after pushing await nodes in the generic walker — the call_types guard already prevents double-counting, and the early return was silently dropping string/call nodes inside awaited expressions for Python, Rust, and C#. Switch argument-container detection from hardcoded kind-name matching to child_by_field_name("arguments") with kind-based fallback, matching the more robust approach used in the JS-specific walker. Impact: 1 functions changed, 8 affected --- .../codegraph-core/src/extractors/helpers.rs | 31 ++++++++++++------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/crates/codegraph-core/src/extractors/helpers.rs b/crates/codegraph-core/src/extractors/helpers.rs index 29a2cc6e..23e885ad 100644 --- a/crates/codegraph-core/src/extractors/helpers.rs +++ b/crates/codegraph-core/src/extractors/helpers.rs @@ -252,18 +252,25 @@ fn walk_ast_nodes_with_config_depth( receiver, }); // Recurse into arguments only — nested calls in args should be captured. - // Find the arguments child and walk its children. - for i in 0..node.child_count() { - if let Some(child) = node.child(i) { - let ck = child.kind(); - if ck == "arguments" || ck == "argument_list" || ck == "method_arguments" { - for j in 0..child.child_count() { - if let Some(arg) = child.child(j) { - walk_ast_nodes_with_config_depth(&arg, source, ast_nodes, config, depth + 1); - } + // Use child_by_field_name("arguments") — immune to kind-name variation across grammars. + // Falls back to kind-based matching for grammars that don't expose a field name. + let args_node = node.child_by_field_name("arguments").or_else(|| { + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + let ck = child.kind(); + if ck == "arguments" || ck == "argument_list" || ck == "method_arguments" { + return Some(child); } } } + None + }); + if let Some(args) = args_node { + for j in 0..args.child_count() { + if let Some(arg) = args.child(j) { + walk_ast_nodes_with_config_depth(&arg, source, ast_nodes, config, depth + 1); + } + } } return; } @@ -300,9 +307,9 @@ fn walk_ast_nodes_with_config_depth( text, receiver: None, }); - // Don't fall through — prevents double-counting the awaited call - // (consistent with the JS-specific walker which returns early) - return; + // Fall through to recurse children — captures strings, calls, etc. inside await expr. + // The call_types guard at the top of the function already handles `call_expression` + // nodes correctly (recurse-into-args-only), so there is no double-counting risk here. } else if config.string_types.contains(&kind) { let raw = node_text(node, source); let is_raw_string = kind.contains("raw_string");