Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -531,6 +531,19 @@ Codegraph ships with two parsing engines:

Both engines produce identical output. Use `--engine native|wasm|auto` to control selection (default: `auto`).

On the native path, Rust handles the entire hot pipeline end-to-end:

| Phase | What Rust does |
|-------|---------------|
| **Parse** | Parallel multi-file tree-sitter parsing via rayon (3.5× faster than WASM) |
| **Extract** | Symbols, imports, calls, classes, type maps, AST nodes — all in one pass |
| **Analyze** | Complexity (cognitive, cyclomatic, Halstead), CFG, and dataflow pre-computed per function during parse |
| **Resolve** | Import resolution with 6-level priority system and confidence scoring |
| **Edges** | Call, receiver, extends, and implements edge inference |
| **DB writes** | All inserts (nodes, edges, AST nodes, complexity, CFG, dataflow) via rusqlite — `better-sqlite3` is lazy-loaded only for the WASM fallback path |

The Rust crate (`crates/codegraph-core/`) exposes a `NativeDatabase` napi-rs class that holds a persistent `rusqlite::Connection` for the full build lifecycle, eliminating JS↔SQLite round-trips on every operation.

### Call Resolution

Calls are resolved with **qualified resolution** — method calls (`obj.method()`) are distinguished from standalone function calls, and built-in receivers (`console`, `Math`, `JSON`, `Array`, `Promise`, etc.) are filtered out automatically. Import scope is respected: a call to `foo()` only resolves to functions that are actually imported or defined in the same file, eliminating false positives from name collisions.
Expand Down
236 changes: 236 additions & 0 deletions crates/codegraph-core/src/native_db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,74 @@ pub struct BuildMetaEntry {
pub value: String,
}

// ── Bulk-insert input types ────────────────────────────────────────────

/// A single complexity metrics row for bulk insertion.
#[napi(object)]
#[derive(Debug, Clone)]
pub struct ComplexityRow {
pub node_id: i64,
pub cognitive: u32,
pub cyclomatic: u32,
pub max_nesting: u32,
pub loc: u32,
pub sloc: u32,
pub comment_lines: u32,
pub halstead_n1: u32,
pub halstead_n2: u32,
pub halstead_big_n1: u32,
pub halstead_big_n2: u32,
pub halstead_vocabulary: u32,
pub halstead_length: u32,
pub halstead_volume: f64,
pub halstead_difficulty: f64,
pub halstead_effort: f64,
pub halstead_bugs: f64,
pub maintainability_index: f64,
}

/// A CFG entry for a single function: blocks + edges.
#[napi(object)]
#[derive(Debug, Clone)]
pub struct CfgEntry {
pub node_id: i64,
pub blocks: Vec<CfgBlockRow>,
pub edges: Vec<CfgEdgeRow>,
}

/// A single CFG block for bulk insertion.
#[napi(object)]
#[derive(Debug, Clone)]
pub struct CfgBlockRow {
pub index: u32,
pub block_type: String,
pub start_line: Option<u32>,
pub end_line: Option<u32>,
pub label: Option<String>,
}

/// A single CFG edge for bulk insertion.
#[napi(object)]
#[derive(Debug, Clone)]
pub struct CfgEdgeRow {
pub source_index: u32,
pub target_index: u32,
pub kind: String,
}

/// A single dataflow edge for bulk insertion.
#[napi(object)]
#[derive(Debug, Clone)]
pub struct DataflowEdge {
pub source_id: i64,
pub target_id: i64,
pub kind: String,
pub param_index: Option<u32>,
pub expression: Option<String>,
pub line: Option<u32>,
pub confidence: f64,
}

// ── NativeDatabase class ────────────────────────────────────────────────

/// Persistent rusqlite Connection wrapper exposed to JS via napi-rs.
Expand Down Expand Up @@ -698,6 +766,174 @@ impl NativeDatabase {
Ok(ast_db::do_insert_ast_nodes(conn, &batches).unwrap_or(0))
}

/// Bulk-insert complexity metrics for functions/methods.
/// Each row maps a node_id to its complexity metrics.
/// Returns the number of rows inserted (0 on failure).
#[napi]
pub fn bulk_insert_complexity(&self, rows: Vec<ComplexityRow>) -> napi::Result<u32> {
if rows.is_empty() {
return Ok(0);
}
let conn = self.conn()?;
if !has_table(conn, "function_complexity") {
return Ok(0);
}
let tx = conn
.unchecked_transaction()
.map_err(|e| napi::Error::from_reason(format!("complexity tx failed: {e}")))?;
let mut total = 0u32;
{
let mut stmt = tx.prepare(
"INSERT OR REPLACE INTO function_complexity \
(node_id, cognitive, cyclomatic, max_nesting, \
loc, sloc, comment_lines, \
halstead_n1, halstead_n2, halstead_big_n1, halstead_big_n2, \
halstead_vocabulary, halstead_length, halstead_volume, \
halstead_difficulty, halstead_effort, halstead_bugs, \
maintainability_index) \
VALUES (?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14,?15,?16,?17,?18)",
)
.map_err(|e| napi::Error::from_reason(format!("complexity prepare failed: {e}")))?;

for r in &rows {
if stmt
.execute(params![
r.node_id,
r.cognitive,
r.cyclomatic,
r.max_nesting,
r.loc,
r.sloc,
r.comment_lines,
r.halstead_n1,
r.halstead_n2,
r.halstead_big_n1,
r.halstead_big_n2,
r.halstead_vocabulary,
r.halstead_length,
r.halstead_volume,
r.halstead_difficulty,
r.halstead_effort,
r.halstead_bugs,
r.maintainability_index,
])
.is_ok()
{
total += 1;
}
}
}
tx.commit()
.map_err(|e| napi::Error::from_reason(format!("complexity commit failed: {e}")))?;
Ok(total)
}

/// Bulk-insert CFG blocks and edges for functions/methods.
/// Returns the number of blocks inserted (0 on failure).
#[napi]
pub fn bulk_insert_cfg(&self, entries: Vec<CfgEntry>) -> napi::Result<u32> {
if entries.is_empty() {
return Ok(0);
}
let conn = self.conn()?;
if !has_table(conn, "cfg_blocks") {
return Ok(0);
}
let tx = conn
.unchecked_transaction()
.map_err(|e| napi::Error::from_reason(format!("cfg tx failed: {e}")))?;
let mut total = 0u32;
{
let mut block_stmt = tx.prepare(
"INSERT INTO cfg_blocks \
(function_node_id, block_index, block_type, start_line, end_line, label) \
VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
)
.map_err(|e| napi::Error::from_reason(format!("cfg_blocks prepare failed: {e}")))?;

let mut edge_stmt = tx.prepare(
"INSERT INTO cfg_edges \
(function_node_id, source_block_id, target_block_id, kind) \
VALUES (?1, ?2, ?3, ?4)",
)
.map_err(|e| napi::Error::from_reason(format!("cfg_edges prepare failed: {e}")))?;

for entry in &entries {
let mut block_db_ids: std::collections::HashMap<u32, i64> =
std::collections::HashMap::new();
for block in &entry.blocks {
if let Ok(_) = block_stmt.execute(params![
entry.node_id,
block.index,
&block.block_type,
block.start_line,
block.end_line,
&block.label,
]) {
block_db_ids.insert(block.index, tx.last_insert_rowid());
total += 1;
}
}
for edge in &entry.edges {
if let (Some(&src), Some(&tgt)) = (
block_db_ids.get(&edge.source_index),
block_db_ids.get(&edge.target_index),
) {
let _ = edge_stmt.execute(params![entry.node_id, src, tgt, &edge.kind]);
}
}
}
}
tx.commit()
.map_err(|e| napi::Error::from_reason(format!("cfg commit failed: {e}")))?;
Ok(total)
}

/// Bulk-insert dataflow edges (flows_to, returns, mutates).
/// Returns the number of edges inserted (0 on failure).
#[napi]
pub fn bulk_insert_dataflow(&self, edges: Vec<DataflowEdge>) -> napi::Result<u32> {
if edges.is_empty() {
return Ok(0);
}
let conn = self.conn()?;
if !has_table(conn, "dataflow") {
return Ok(0);
}
let tx = conn
.unchecked_transaction()
.map_err(|e| napi::Error::from_reason(format!("dataflow tx failed: {e}")))?;
let mut total = 0u32;
{
let mut stmt = tx.prepare(
"INSERT INTO dataflow \
(source_id, target_id, kind, param_index, expression, line, confidence) \
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
)
.map_err(|e| napi::Error::from_reason(format!("dataflow prepare failed: {e}")))?;

for e in &edges {
if stmt
.execute(params![
e.source_id,
e.target_id,
&e.kind,
e.param_index,
&e.expression,
e.line,
e.confidence,
])
.is_ok()
{
total += 1;
}
}
}
tx.commit()
.map_err(|e| napi::Error::from_reason(format!("dataflow commit failed: {e}")))?;
Ok(total)
}

/// Full role classification: queries all nodes, computes fan-in/fan-out,
/// classifies roles, and batch-updates the `role` column.
#[napi]
Expand Down
20 changes: 17 additions & 3 deletions src/ast-analysis/engine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,12 @@ async function ensureWasmTreesIfNeeded(
opts: AnalysisOpts,
rootDir: string,
): Promise<void> {
const doAst = opts.ast !== false;
const doComplexity = opts.complexity !== false;
const doCfg = opts.cfg !== false;
const doDataflow = opts.dataflow !== false;

if (!doComplexity && !doCfg && !doDataflow) return;
if (!doAst && !doComplexity && !doCfg && !doDataflow) return;

let needsWasmTrees = false;
for (const [relPath, symbols] of fileSymbols) {
Expand All @@ -131,6 +132,8 @@ async function ensureWasmTreesIfNeeded(
d.endLine > d.line &&
!d.name.includes('.');

// AST: need tree when native didn't provide non-call astNodes
const needsAst = doAst && !Array.isArray(symbols.astNodes) && WALK_EXTENSIONS.has(ext);
const needsComplexity =
doComplexity &&
COMPLEXITY_EXTENSIONS.has(ext) &&
Expand All @@ -141,7 +144,7 @@ async function ensureWasmTreesIfNeeded(
defs.some((d) => hasFuncBody(d) && d.cfg !== null && !Array.isArray(d.cfg?.blocks));
const needsDataflow = doDataflow && !symbols.dataflow && DATAFLOW_EXTENSIONS.has(ext);

if (needsComplexity || needsCfg || needsDataflow) {
if (needsAst || needsComplexity || needsCfg || needsDataflow) {
needsWasmTrees = true;
break;
}
Expand Down Expand Up @@ -180,7 +183,7 @@ function setupVisitors(
getFunctionName: (_node: TreeSitterNode) => null,
};

// AST-store visitor
// AST-store visitor (call kind already filtered in runAnalyses upfront)
let astVisitor: Visitor | null = null;
const astTypeMap = AST_TYPE_MAPS.get(langId);
if (doAst && astTypeMap && WALK_EXTENSIONS.has(ext) && !Array.isArray(symbols.astNodes)) {
Expand Down Expand Up @@ -418,6 +421,17 @@ export async function runAnalyses(

if (!doAst && !doComplexity && !doCfg && !doDataflow) return timing;

// Strip dead 'call' kind from native astNodes upfront. Call AST nodes are no
// longer extracted by the WASM visitor; native binaries still emit them until
// the Rust extractors are updated (see #701). Clear the array when only calls
// remain so the WASM visitor runs and extracts non-call kinds.
for (const [, symbols] of fileSymbols) {
if (Array.isArray(symbols.astNodes)) {
const filtered = symbols.astNodes.filter((n) => n.kind !== 'call');
symbols.astNodes = filtered.length > 0 ? (filtered as typeof symbols.astNodes) : undefined;
}
}

const extToLang = buildExtToLangMap();

// WASM pre-parse for files that need it
Expand Down
1 change: 0 additions & 1 deletion src/ast-analysis/rules/javascript.ts
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,6 @@ export const dataflow: DataflowRulesConfig = makeDataflowRules({
// ─── AST Node Types ───────────────────────────────────────────────────────

export const astTypes: Record<string, string> | null = {
call_expression: 'call',
new_expression: 'new',
throw_statement: 'throw',
await_expression: 'await',
Expand Down
Loading
Loading