Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
0efea90
refactor(extractors): add parser abstraction layer (Phase 7.1)
carlos-alm Mar 30, 2026
e3ae9b3
fix: address Greptile review comments (#700)
carlos-alm Mar 30, 2026
1d296e8
fix: revert subpath imports to relative imports in builder stages
carlos-alm Mar 30, 2026
d0b578d
Merge remote-tracking branch 'origin/main' into refactor/parser-abstr…
carlos-alm Mar 30, 2026
21d45cc
fix(parity): restore call AST node extraction in WASM engine (#697)
carlos-alm Mar 30, 2026
5f1a74a
Merge branch 'main' into refactor/parser-abstraction-layer
carlos-alm Mar 30, 2026
907bdf8
Merge branch 'main' into refactor/parser-abstraction-layer
carlos-alm Mar 30, 2026
173ab12
fix: make enterNode skipChildren guard unconditional for matched node…
carlos-alm Mar 30, 2026
56ef0ca
fix: pin exact call node count in ast-nodes test (#705)
carlos-alm Mar 30, 2026
0f4259e
fix: revert enterNode skipChildren guard — node.id reuse causes false…
carlos-alm Mar 30, 2026
ed45a5b
ci: retry CI run (#705)
carlos-alm Mar 30, 2026
36bc2aa
fix: add diagnostic logging for ast_nodes parity failure (#705)
carlos-alm Mar 30, 2026
92d1592
ci: add diagnostic logging to trace WASM ast_nodes CI failure (#705)
carlos-alm Mar 30, 2026
8a485a8
ci: expand diagnostic logging to trace buildAstNodes path (#705)
carlos-alm Mar 30, 2026
f2bab12
Merge remote-tracking branch 'origin/main' into refactor/parser-abstr…
carlos-alm Mar 30, 2026
2a6c71d
fix: coerce null receiver to empty string for NAPI compat (#705)
carlos-alm Mar 30, 2026
afc2ec1
fix: remove parity-diag console.error logging (#705)
carlos-alm Mar 30, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 0 additions & 11 deletions src/ast-analysis/engine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -427,17 +427,6 @@ export async function runAnalyses(

if (!doAst && !doComplexity && !doCfg && !doDataflow) return timing;

// Strip dead 'call' kind from native astNodes upfront. Call AST nodes are no
// longer extracted by the WASM visitor; native binaries still emit them until
// the Rust extractors are updated (see #701). Clear the array when only calls
// remain so the WASM visitor runs and extracts non-call kinds.
for (const [, symbols] of fileSymbols) {
if (Array.isArray(symbols.astNodes)) {
const filtered = symbols.astNodes.filter((n) => n.kind !== 'call');
symbols.astNodes = filtered.length > 0 ? (filtered as typeof symbols.astNodes) : undefined;
}
}

const extToLang = buildExtToLangMap();

// WASM pre-parse for files that need it
Expand Down
1 change: 1 addition & 0 deletions src/ast-analysis/rules/javascript.ts
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ export const dataflow: DataflowRulesConfig = makeDataflowRules({
// ─── AST Node Types ───────────────────────────────────────────────────────

export const astTypes: Record<string, string> | null = {
call_expression: 'call',
new_expression: 'new',
throw_statement: 'throw',
await_expression: 'await',
Expand Down
151 changes: 119 additions & 32 deletions src/ast-analysis/visitors/ast-store-visitor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ interface AstStoreRow {
kind: string;
name: string | null | undefined;
text: string | null;
receiver: null;
receiver: string | null;
parentNodeId: number | null;
}

Expand Down Expand Up @@ -44,6 +44,22 @@ function extractExpressionText(node: TreeSitterNode): string | null {
return truncate(node.text);
}

function extractCallName(node: TreeSitterNode): string {
for (const field of ['function', 'method', 'name']) {
const fn = node.childForFieldName(field);
if (fn) return fn.text;
}
return node.text?.split('(')[0] || '?';
}

/** Extract receiver for call expressions (e.g. "obj" in "obj.method()"). */
function extractCallReceiver(node: TreeSitterNode): string | null {
const fn = node.childForFieldName('function');
if (!fn || fn.type !== 'member_expression') return null;
const obj = fn.childForFieldName('object');
return obj ? obj.text : null;
}

function extractName(kind: string, node: TreeSitterNode): string | null {
if (kind === 'throw') {
for (let i = 0; i < node.childCount; i++) {
Expand Down Expand Up @@ -82,6 +98,7 @@ export function createAstStoreVisitor(
nodeIdMap: Map<string, number>,
): Visitor {
const rows: AstStoreRow[] = [];
const matched = new Set<number>();

function findParentDef(line: number): Definition | null {
let best: Definition | null = null;
Expand All @@ -101,45 +118,115 @@ export function createAstStoreVisitor(
return nodeIdMap.get(`${parentDef.name}|${parentDef.kind}|${parentDef.line}`) || null;
}

/** Recursively walk a subtree collecting AST nodes — used for arguments-only traversal. */
function walkSubtree(node: TreeSitterNode | null): void {
if (!node) return;
if (matched.has(node.id)) return;

const kind = astTypeMap[node.type];
if (kind === 'call') {
// Capture this call and recurse only into its arguments
collectNode(node, kind);
walkCallArguments(node);
return;
}
if (kind) {
collectNode(node, kind);
if (kind !== 'string' && kind !== 'regex') return; // skipChildren for non-leaf kinds
}
for (let i = 0; i < node.childCount; i++) {
walkSubtree(node.child(i));
}
}

/**
* Recurse into only the arguments of a call node — mirrors the native engine's
* strategy that prevents double-counting nested calls in the function field
* (e.g. chained calls like `a().b()`).
*/
function walkCallArguments(callNode: TreeSitterNode): void {
// Try field-based lookup first, fall back to kind-based matching
const argsNode =
callNode.childForFieldName('arguments') ??
findChildByKind(callNode, ['arguments', 'argument_list', 'method_arguments']);
if (!argsNode) return;
for (let i = 0; i < argsNode.childCount; i++) {
walkSubtree(argsNode.child(i));
}
}

function findChildByKind(node: TreeSitterNode, kinds: string[]): TreeSitterNode | null {
for (let i = 0; i < node.childCount; i++) {
const child = node.child(i);
if (child && kinds.includes(child.type)) return child;
}
return null;
}

function collectNode(node: TreeSitterNode, kind: string): void {
if (matched.has(node.id)) return;

const line = node.startPosition.row + 1;
let name: string | null | undefined;
let text: string | null = null;
let receiver: string | null = null;

if (kind === 'call') {
name = extractCallName(node);
text = truncate(node.text);
receiver = extractCallReceiver(node);
} else if (kind === 'new') {
name = extractNewName(node);
text = truncate(node.text);
} else if (kind === 'throw') {
name = extractName('throw', node);
text = extractExpressionText(node);
} else if (kind === 'await') {
name = extractName('await', node);
text = extractExpressionText(node);
} else if (kind === 'string') {
const content = node.text?.replace(/^['"`]|['"`]$/g, '') || '';
if (content.length < 2) return;
name = truncate(content, 100);
text = truncate(node.text);
} else if (kind === 'regex') {
name = node.text || '?';
text = truncate(node.text);
}

rows.push({
file: relPath,
line,
kind,
name,
text,
receiver,
parentNodeId: resolveParentNodeId(line),
});

matched.add(node.id);
}

return {
name: 'ast-store',

enterNode(node: TreeSitterNode, _context: VisitorContext): EnterNodeResult | undefined {
// Guard: skip re-collection but do NOT skipChildren — node.id (memory address)
// can be reused by tree-sitter, so a collision would incorrectly suppress an
// unrelated subtree. The parent call's skipChildren handles the intended case.
if (matched.has(node.id)) return;

const kind = astTypeMap[node.type];
if (!kind) return;

const line = node.startPosition.row + 1;
let name: string | null | undefined;
let text: string | null = null;

if (kind === 'new') {
name = extractNewName(node);
text = truncate(node.text);
} else if (kind === 'throw') {
name = extractName('throw', node);
text = extractExpressionText(node);
} else if (kind === 'await') {
name = extractName('await', node);
text = extractExpressionText(node);
} else if (kind === 'string') {
const content = node.text?.replace(/^['"`]|['"`]$/g, '') || '';
if (content.length < 2) return;
name = truncate(content, 100);
text = truncate(node.text);
} else if (kind === 'regex') {
name = node.text || '?';
text = truncate(node.text);
}
collectNode(node, kind);

rows.push({
file: relPath,
line,
kind,
name,
text,
receiver: null,
parentNodeId: resolveParentNodeId(line),
});
if (kind === 'call') {
// Mirror native: skip full subtree, recurse only into arguments.
// Prevents double-counting chained calls like service.getUser().getName().
walkCallArguments(node);
return { skipChildren: true };
}

if (kind !== 'string' && kind !== 'regex') {
return { skipChildren: true };
Expand Down
5 changes: 5 additions & 0 deletions src/domain/graph/builder/pipeline.ts
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,11 @@ async function runPipelineStages(ctx: PipelineContext): Promise<void> {
/* ignore close errors */
}
ctx.nativeDb = undefined;
// Also clear stale reference in engineOpts to prevent stages from
// calling methods on the closed NativeDatabase.
if (ctx.engineOpts?.nativeDb) {
ctx.engineOpts.nativeDb = undefined;
}
}

await collectFiles(ctx);
Expand Down
8 changes: 4 additions & 4 deletions src/domain/graph/builder/stages/build-edges.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
*/
import path from 'node:path';
import { performance } from 'node:perf_hooks';
import { getNodeId } from '#db/index.js';
import { debug } from '#infrastructure/logger.js';
import { loadNative } from '#infrastructure/native.js';
import { getNodeId } from '../../../../db/index.js';
import { debug } from '../../../../infrastructure/logger.js';
import { loadNative } from '../../../../infrastructure/native.js';
Comment on lines +9 to +11
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Undocumented conversion from #-alias imports to relative paths

build-edges.ts, build-structure.ts, and collect-files.ts all have their #infrastructure/…, #db/…, #shared/…, and #types imports converted to ../../../../… relative paths. This change isn't mentioned in the PR description and isn't consistent with how the rest of the codebase uses these path aliases (e.g. pipeline.ts and other stage files still use the #-aliases).

If this was done to fix a broken alias in a specific build or test context, a brief comment or follow-up ticket would help. If it was accidental, it should be reverted so import style stays uniform.

Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Investigated — the #-alias to relative-path conversion is actually consistent with the rest of the stage files. On main, finalize.ts, detect-changes.ts, resolve-imports.ts, insert-nodes.ts, parse-files.ts, and run-analyses.ts already use relative paths. Only build-edges.ts, build-structure.ts, and collect-files.ts still had the #-aliases. This PR normalizes the remaining 3 files to match the convention used by the other 6 stage files. No revert needed.

import type {
BetterSqlite3Database,
Call,
Expand All @@ -18,7 +18,7 @@ import type {
NativeAddon,
NodeRow,
TypeMapEntry,
} from '#types';
} from '../../../../types.js';
import { computeConfidence } from '../../resolve.js';
import type { PipelineContext } from '../context.js';
import { BUILTIN_RECEIVERS, batchInsertEdges } from '../helpers.js';
Expand Down
6 changes: 3 additions & 3 deletions src/domain/graph/builder/stages/build-structure.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
*/
import path from 'node:path';
import { performance } from 'node:perf_hooks';
import { debug } from '#infrastructure/logger.js';
import { normalizePath } from '#shared/constants.js';
import type { ExtractorOutput } from '#types';
import { debug } from '../../../../infrastructure/logger.js';
import { normalizePath } from '../../../../shared/constants.js';
import type { ExtractorOutput } from '../../../../types.js';
import type { PipelineContext } from '../context.js';
import { readFileSafe } from '../helpers.js';

Expand Down
4 changes: 2 additions & 2 deletions src/domain/graph/builder/stages/collect-files.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
*/
import fs from 'node:fs';
import path from 'node:path';
import { debug, info } from '#infrastructure/logger.js';
import { normalizePath } from '#shared/constants.js';
import { debug, info } from '../../../../infrastructure/logger.js';
import { normalizePath } from '../../../../shared/constants.js';
import { readJournal } from '../../journal.js';
import type { PipelineContext } from '../context.js';
import { collectFiles as collectFilesUtil } from '../helpers.js';
Expand Down
17 changes: 4 additions & 13 deletions src/features/ast.ts
Original file line number Diff line number Diff line change
Expand Up @@ -103,16 +103,14 @@ export async function buildAstNodes(

for (const [relPath, symbols] of fileSymbols) {
if (Array.isArray(symbols.astNodes)) {
// Filter out 'call' kind — dead AST node type, see JS fallback path comment.
const filtered = symbols.astNodes.filter((n) => n.kind !== 'call');
batches.push({
file: relPath,
nodes: filtered.map((n) => ({
nodes: symbols.astNodes.map((n) => ({
line: n.line,
kind: n.kind,
name: n.name,
text: n.text,
receiver: n.receiver,
receiver: n.receiver ?? '',
})),
});
} else if (symbols.calls || symbols._tree) {
Expand Down Expand Up @@ -168,16 +166,9 @@ export async function buildAstNodes(
nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id);
}

// Call AST nodes were removed — 'call' kind entries in ast_nodes are dead
// (never queried by any feature or command). symbols.calls are still used
// for call *edges* but no longer written to ast_nodes.

if (Array.isArray(symbols.astNodes)) {
// Native engine provided AST nodes (may be empty for files with no AST content).
// Filter out 'call' kind — call AST nodes are dead (never queried by any feature).
// The WASM visitor no longer extracts them; native binaries still emit them until
// the next Rust release strips them from the extractor.
for (const n of symbols.astNodes.filter((n) => n.kind !== 'call')) {
// Native engine provided AST nodes (may be empty for files with no AST content)
for (const n of symbols.astNodes) {
const parentDef = findParentDef(defs, n.line);
let parentNodeId: number | null = null;
if (parentDef) {
Expand Down
12 changes: 12 additions & 0 deletions tests/integration/build-parity.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,18 @@ describeOrSkip('Build parity: native vs WASM', () => {
it('produces identical ast_nodes', () => {
const wasmGraph = readGraph(path.join(wasmDir, '.codegraph', 'graph.db'));
const nativeGraph = readGraph(path.join(nativeDir, '.codegraph', 'graph.db'));
// Diagnostic: log counts to help debug CI-only parity failures
if (nativeGraph.astNodes.length !== wasmGraph.astNodes.length) {
console.error(
`[parity-diag] native astNodes: ${nativeGraph.astNodes.length}, wasm astNodes: ${wasmGraph.astNodes.length}`,
);
console.error(
`[parity-diag] native kinds: ${JSON.stringify([...new Set((nativeGraph.astNodes as any[]).map((n: any) => n.kind))])}`,
);
console.error(
`[parity-diag] wasm kinds: ${JSON.stringify([...new Set((wasmGraph.astNodes as any[]).map((n: any) => n.kind))])}`,
);
}
expect(nativeGraph.astNodes).toEqual(wasmGraph.astNodes);
});
});
10 changes: 8 additions & 2 deletions tests/parsers/ast-nodes.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,15 @@ function queryAllAstNodes() {
// ─── Tests ────────────────────────────────────────────────────────────

describe('buildAstNodes — JS extraction', () => {
test('call kind AST nodes are no longer stored (dead code removed)', () => {
test('captures call_expression as kind:call', () => {
const calls = queryAstNodes('call');
expect(calls.length).toBe(0);
// eval(input), result.set('data', data), console.log(result)
// Note: fetch('/api/data') is inside await — captured as kind:await, not kind:call
expect(calls.length).toBe(3);
const names = calls.map((n) => n.name);
expect(names).toContain('eval');
expect(names).toContain('result.set');
expect(names).toContain('console.log');
});

test('captures new_expression as kind:new', () => {
Expand Down
Loading