Skip to content
Prev Previous commit
Next Next commit
perf(native): fix WASM fallback bypass and batch SQL inserts
Fix interface property signatures (dotted names, single-line spans)
incorrectly triggering WASM tree creation on native builds across
engine.ts, complexity.ts, and cfg.ts. Add statement caching and
batch UPDATE optimizations for insert and role classification stages.

Native full build: 2001ms vs WASM 3116ms (1.6x faster).
Key wins: complexity 4.2x, cfg 3.2x, parse 2.4x faster.

Impact: 26 functions changed, 25 affected
  • Loading branch information
carlos-alm committed Mar 25, 2026
commit 9fb2e6bde1331d3b6213842ccef12a928026facb
127 changes: 45 additions & 82 deletions docs/roadmap/ROADMAP.md

Large diffs are not rendered by default.

43 changes: 33 additions & 10 deletions src/ast-analysis/engine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -114,20 +114,31 @@ async function ensureWasmTreesIfNeeded(
const ext = path.extname(relPath).toLowerCase();
const defs = symbols.definitions || [];

// Only consider definitions with a real function body.
// Interface/type property signatures are extracted as methods but correctly
// lack complexity/CFG data from the native engine. Exclude them by:
// 1. Single-line span (endLine === line) — type property on one line
// 2. Dotted names (e.g. "Interface.prop") — child definitions of types
const hasFuncBody = (d: {
name: string;
kind: string;
line: number;
endLine?: number | null;
}) =>
(d.kind === 'function' || d.kind === 'method') &&
d.line > 0 &&
d.endLine != null &&
d.endLine > d.line &&
!d.name.includes('.');

const needsComplexity =
doComplexity &&
COMPLEXITY_EXTENSIONS.has(ext) &&
defs.some((d) => (d.kind === 'function' || d.kind === 'method') && d.line && !d.complexity);
defs.some((d) => hasFuncBody(d) && !d.complexity);
const needsCfg =
doCfg &&
CFG_EXTENSIONS.has(ext) &&
defs.some(
(d) =>
(d.kind === 'function' || d.kind === 'method') &&
d.line &&
d.cfg !== null &&
!Array.isArray(d.cfg?.blocks),
);
defs.some((d) => hasFuncBody(d) && d.cfg !== null && !Array.isArray(d.cfg?.blocks));
const needsDataflow = doDataflow && !symbols.dataflow && DATAFLOW_EXTENSIONS.has(ext);

if (needsComplexity || needsCfg || needsDataflow) {
Expand Down Expand Up @@ -186,8 +197,17 @@ function setupVisitors(
const cRules = COMPLEXITY_RULES.get(langId);
const hRules = HALSTEAD_RULES.get(langId);
if (doComplexity && cRules) {
// Only trigger WASM complexity for definitions with real function bodies.
// Interface/type property signatures (dotted names, single-line span)
// correctly lack native complexity data and should not trigger a fallback.
const needsWasmComplexity = defs.some(
(d) => (d.kind === 'function' || d.kind === 'method') && d.line && !d.complexity,
(d) =>
(d.kind === 'function' || d.kind === 'method') &&
d.line > 0 &&
d.endLine != null &&
d.endLine > d.line &&
!d.name.includes('.') &&
!d.complexity,
);
if (needsWasmComplexity) {
complexityVisitor = createComplexityVisitor(cRules, hRules, { fileLevelWalk: true, langId });
Expand All @@ -213,7 +233,10 @@ function setupVisitors(
const needsWasmCfg = defs.some(
(d) =>
(d.kind === 'function' || d.kind === 'method') &&
d.line &&
d.line > 0 &&
d.endLine != null &&
d.endLine > d.line &&
!d.name.includes('.') &&
d.cfg !== null &&
!Array.isArray(d.cfg?.blocks),
);
Expand Down
72 changes: 57 additions & 15 deletions src/domain/graph/builder/helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -208,23 +208,64 @@ export function purgeFilesFromGraph(
}

/** Batch INSERT chunk size for multi-value INSERTs. */
const BATCH_CHUNK = 200;
const BATCH_CHUNK = 500;

// Statement caches keyed by chunk size — avoids recompiling for every batch.
const nodeStmtCache = new WeakMap<BetterSqlite3.Database, Map<number, BetterSqlite3.Statement>>();
const edgeStmtCache = new WeakMap<BetterSqlite3.Database, Map<number, BetterSqlite3.Statement>>();

function getNodeStmt(db: BetterSqlite3.Database, chunkSize: number): BetterSqlite3.Statement {
let cache = nodeStmtCache.get(db);
if (!cache) {
cache = new Map();
nodeStmtCache.set(db, cache);
}
let stmt = cache.get(chunkSize);
if (!stmt) {
const ph = '(?,?,?,?,?,?,?,?,?)';
stmt = db.prepare(
'INSERT OR IGNORE INTO nodes (name,kind,file,line,end_line,parent_id,qualified_name,scope,visibility) VALUES ' +
Array.from({ length: chunkSize }, () => ph).join(','),
);
cache.set(chunkSize, stmt);
}
return stmt;
}

function getEdgeStmt(db: BetterSqlite3.Database, chunkSize: number): BetterSqlite3.Statement {
let cache = edgeStmtCache.get(db);
if (!cache) {
cache = new Map();
edgeStmtCache.set(db, cache);
}
let stmt = cache.get(chunkSize);
if (!stmt) {
const ph = '(?,?,?,?,?)';
stmt = db.prepare(
'INSERT INTO edges (source_id,target_id,kind,confidence,dynamic) VALUES ' +
Array.from({ length: chunkSize }, () => ph).join(','),
);
cache.set(chunkSize, stmt);
}
return stmt;
}

/**
* Batch-insert node rows via multi-value INSERT statements.
* Each row: [name, kind, file, line, end_line, parent_id, qualified_name, scope, visibility]
*/
export function batchInsertNodes(db: BetterSqlite3.Database, rows: unknown[][]): void {
if (!rows.length) return;
const ph = '(?,?,?,?,?,?,?,?,?)';
for (let i = 0; i < rows.length; i += BATCH_CHUNK) {
const chunk = rows.slice(i, i + BATCH_CHUNK);
const end = Math.min(i + BATCH_CHUNK, rows.length);
const chunkSize = end - i;
const stmt = getNodeStmt(db, chunkSize);
const vals: unknown[] = [];
for (const r of chunk) vals.push(r[0], r[1], r[2], r[3], r[4], r[5], r[6], r[7], r[8]);
db.prepare(
'INSERT OR IGNORE INTO nodes (name,kind,file,line,end_line,parent_id,qualified_name,scope,visibility) VALUES ' +
chunk.map(() => ph).join(','),
).run(...vals);
for (let j = i; j < end; j++) {
const r = rows[j] as unknown[];
vals.push(r[0], r[1], r[2], r[3], r[4], r[5], r[6], r[7], r[8]);
}
stmt.run(...vals);
}
}

Expand All @@ -234,14 +275,15 @@ export function batchInsertNodes(db: BetterSqlite3.Database, rows: unknown[][]):
*/
export function batchInsertEdges(db: BetterSqlite3.Database, rows: unknown[][]): void {
if (!rows.length) return;
const ph = '(?,?,?,?,?)';
for (let i = 0; i < rows.length; i += BATCH_CHUNK) {
const chunk = rows.slice(i, i + BATCH_CHUNK);
const end = Math.min(i + BATCH_CHUNK, rows.length);
const chunkSize = end - i;
const stmt = getEdgeStmt(db, chunkSize);
const vals: unknown[] = [];
for (const r of chunk) vals.push(r[0], r[1], r[2], r[3], r[4]);
db.prepare(
'INSERT INTO edges (source_id,target_id,kind,confidence,dynamic) VALUES ' +
chunk.map(() => ph).join(','),
).run(...vals);
for (let j = i; j < end; j++) {
const r = rows[j] as unknown[];
vals.push(r[0], r[1], r[2], r[3], r[4]);
}
stmt.run(...vals);
}
}
82 changes: 49 additions & 33 deletions src/domain/graph/builder/stages/insert-nodes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import path from 'node:path';
import { performance } from 'node:perf_hooks';
import type BetterSqlite3 from 'better-sqlite3';
import { bulkNodeIdsByFile } from '../../../../db/index.js';
import type { ExtractorOutput, MetadataUpdate, NodeIdRow } from '../../../../types.js';
import type { ExtractorOutput, MetadataUpdate } from '../../../../types.js';
import type { PipelineContext } from '../context.js';
import {
batchInsertEdges,
Expand All @@ -35,6 +35,7 @@ function insertDefinitionsAndExports(
allSymbols: Map<string, ExtractorOutput>,
): void {
const phase1Rows: unknown[][] = [];
const exportKeys: unknown[][] = [];
for (const [relPath, symbols] of allSymbols) {
phase1Rows.push([relPath, 'file', relPath, 0, null, null, null, null, null]);
for (const def of symbols.definitions) {
Expand All @@ -54,38 +55,62 @@ function insertDefinitionsAndExports(
}
for (const exp of symbols.exports) {
phase1Rows.push([exp.name, exp.kind, relPath, exp.line, null, null, exp.name, null, null]);
exportKeys.push([exp.name, exp.kind, relPath, exp.line]);
}
}
batchInsertNodes(db, phase1Rows);

// Mark exported symbols
const markExported = db.prepare(
'UPDATE nodes SET exported = 1 WHERE name = ? AND kind = ? AND file = ? AND line = ?',
);
for (const [relPath, symbols] of allSymbols) {
for (const exp of symbols.exports) {
markExported.run(exp.name, exp.kind, relPath, exp.line);
// Mark exported symbols in batches
if (exportKeys.length > 0) {
const EXPORT_CHUNK = 500;
for (let i = 0; i < exportKeys.length; i += EXPORT_CHUNK) {
const end = Math.min(i + EXPORT_CHUNK, exportKeys.length);
const chunkSize = end - i;
const conditions = Array.from(
{ length: chunkSize },
() => '(name = ? AND kind = ? AND file = ? AND line = ?)',
).join(' OR ');
const vals: unknown[] = [];
for (let j = i; j < end; j++) {
const k = exportKeys[j] as unknown[];
vals.push(k[0], k[1], k[2], k[3]);
}
db.prepare(`UPDATE nodes SET exported = 1 WHERE ${conditions}`).run(...vals);
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Export-marking UPDATE statement is re-prepared on every batch iteration

db.prepare(...) is called inside the loop, so for each chunk a new SQL string is parsed and a new prepared statement is compiled. This is inconsistent with the WeakMap-based caching introduced in helpers.ts (getNodeStmt / getEdgeStmt), which was added specifically to avoid per-chunk recompilation.

For a typical build the number of distinct chunk sizes is small (usually just 1 or 2), so the overhead is modest, but the inconsistency means this UPDATE still pays a preparation cost per batch iteration that the INSERT paths no longer do. Hoisting the statement cache outside the loop (or reusing the same pattern from helpers.ts) would close the gap:

// outside the loop:
const stmtCache = new Map<number, BetterSqlite3.Statement>();

// inside:
let updateStmt = stmtCache.get(chunkSize);
if (!updateStmt) {
  updateStmt = db.prepare(`UPDATE nodes SET exported = 1 WHERE ${conditions}`);
  stmtCache.set(chunkSize, updateStmt);
}
updateStmt.run(...vals);

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed — added a Map<number, BetterSqlite3.Statement> cache (exportStmtCache) outside the loop body, keyed by chunk size. The prepared statement is now reused across iterations with the same chunk size, consistent with the WeakMap-based caching pattern in helpers.ts.

}
}
}

// ── Phase 2: Insert children (needs parent IDs) ────────────────────────
// ── Phase 2+3: Insert children and containment edges (single nodeIdMap pass) ──

function insertChildren(
function insertChildrenAndEdges(
db: BetterSqlite3.Database,
allSymbols: Map<string, ExtractorOutput>,
): void {
const childRows: unknown[][] = [];
const edgeRows: unknown[][] = [];

for (const [relPath, symbols] of allSymbols) {
// Single bulkNodeIdsByFile call per file, shared across children + edges
const nodeIdMap = new Map<string, number>();
for (const row of bulkNodeIdsByFile(db, relPath)) {
nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id);
}

const fileId = nodeIdMap.get(`${relPath}|file|0`);

for (const def of symbols.definitions) {
if (!def.children?.length) continue;
const defId = nodeIdMap.get(`${def.name}|${def.kind}|${def.line}`);

// Containment edge: file -> definition
if (fileId && defId) {
edgeRows.push([fileId, defId, 'contains', 1.0, 0]);
}

if (!def.children?.length) continue;
if (!defId) continue;

for (const child of def.children) {
// Child node
const qualifiedName = `${def.name}.${child.name}`;
childRows.push([
child.name,
Expand All @@ -101,40 +126,32 @@ function insertChildren(
}
}
}
batchInsertNodes(db, childRows);
}

// ── Phase 3: Insert containment + parameter_of edges ────────────────────
// Insert children first (so they exist for edge lookup)
batchInsertNodes(db, childRows);

function insertContainmentEdges(
db: BetterSqlite3.Database,
allSymbols: Map<string, ExtractorOutput>,
): void {
const edgeRows: unknown[][] = [];
// Now re-fetch IDs to include newly-inserted children, then add child edges
for (const [relPath, symbols] of allSymbols) {
const nodeIdMap = new Map<string, number>();
for (const row of bulkNodeIdsByFile(db, relPath)) {
nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id);
}
const fileId = nodeIdMap.get(`${relPath}|file|0`);
for (const def of symbols.definitions) {
if (!def.children?.length) continue;
const defId = nodeIdMap.get(`${def.name}|${def.kind}|${def.line}`);
if (fileId && defId) {
edgeRows.push([fileId, defId, 'contains', 1.0, 0]);
}
if (def.children?.length && defId) {
for (const child of def.children) {
const childId = nodeIdMap.get(`${child.name}|${child.kind}|${child.line}`);
if (childId) {
edgeRows.push([defId, childId, 'contains', 1.0, 0]);
if (child.kind === 'parameter') {
edgeRows.push([childId, defId, 'parameter_of', 1.0, 0]);
}
if (!defId) continue;
for (const child of def.children) {
const childId = nodeIdMap.get(`${child.name}|${child.kind}|${child.line}`);
if (childId) {
edgeRows.push([defId, childId, 'contains', 1.0, 0]);
if (child.kind === 'parameter') {
edgeRows.push([childId, defId, 'parameter_of', 1.0, 0]);
}
}
}
}
}

batchInsertEdges(db, edgeRows);
}

Expand Down Expand Up @@ -217,8 +234,7 @@ export async function insertNodes(ctx: PipelineContext): Promise<void> {

const insertAll = db.transaction(() => {
insertDefinitionsAndExports(db, allSymbols);
insertChildren(db, allSymbols);
insertContainmentEdges(db, allSymbols);
insertChildrenAndEdges(db, allSymbols);
updateFileHashes(db, allSymbols, precomputedData, metadataUpdates, rootDir, upsertHash);
});

Expand Down
13 changes: 10 additions & 3 deletions src/features/cfg.ts
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,14 @@ async function initCfgParsers(
const ext = path.extname(relPath).toLowerCase();
if (CFG_EXTENSIONS.has(ext)) {
const hasNativeCfg = symbols.definitions
.filter((d) => (d.kind === 'function' || d.kind === 'method') && d.line)
.filter(
(d) =>
(d.kind === 'function' || d.kind === 'method') &&
d.line > 0 &&
d.endLine != null &&
d.endLine > d.line &&
!d.name.includes('.'),
)
.every((d) => d.cfg === null || (d.cfg?.blocks?.length ?? 0) > 0);
if (!hasNativeCfg) {
needsFallback = true;
Expand Down Expand Up @@ -202,15 +209,15 @@ function buildVisitorCfgMap(
return nameNode ? nameNode.text : null;
},
};
const walkResults = walkWithVisitors(tree!.rootNode, [visitor], langId, walkerOpts);
const walkResults = walkWithVisitors(tree?.rootNode, [visitor], langId, walkerOpts);
// biome-ignore lint/complexity/useLiteralKeys: noPropertyAccessFromIndexSignature requires bracket notation
const cfgResults = (walkResults['cfg'] || []) as VisitorCfgResult[];
const visitorCfgByLine = new Map<number, VisitorCfgResult[]>();
for (const r of cfgResults) {
if (r.funcNode) {
const line = r.funcNode.startPosition.row + 1;
if (!visitorCfgByLine.has(line)) visitorCfgByLine.set(line, []);
visitorCfgByLine.get(line)!.push(r);
visitorCfgByLine.get(line)?.push(r);
}
}
return visitorCfgByLine;
Expand Down
Loading
Loading