diff --git a/scripts/lib/bench-config.js b/scripts/lib/bench-config.js index bd354334..55306e70 100644 --- a/scripts/lib/bench-config.js +++ b/scripts/lib/bench-config.js @@ -134,6 +134,27 @@ export async function resolveBenchmarkSource() { console.error(`Warning: failed to install native package: ${err.message}`); } + // @huggingface/transformers is a devDependency (lazy-loaded for embeddings). + // It is not installed as a transitive dep in npm mode, so install it + // explicitly so the embedding benchmark workers can import it. + try { + const localPkg = JSON.parse( + fs.readFileSync(path.resolve(path.dirname(new URL(import.meta.url).pathname.replace(/^\/([A-Z]:)/, '$1')), '..', '..', 'package.json'), 'utf8'), + ); + const hfVersion = localPkg.devDependencies?.['@huggingface/transformers']; + if (hfVersion) { + console.error(`Installing @huggingface/transformers@${hfVersion} for embedding benchmarks...`); + execFileSync('npm', ['install', `@huggingface/transformers@${hfVersion}`, '--no-audit', '--no-fund', '--no-save'], { + cwd: tmpDir, + stdio: 'pipe', + timeout: 120_000, + }); + console.error('Installed @huggingface/transformers'); + } + } catch (err) { + console.error(`Warning: failed to install @huggingface/transformers: ${err.message}`); + } + const srcDir = path.join(pkgDir, 'src'); if (!fs.existsSync(srcDir)) { diff --git a/scripts/query-benchmark.js b/scripts/query-benchmark.js index d08915f4..042c956d 100644 --- a/scripts/query-benchmark.js +++ b/scripts/query-benchmark.js @@ -111,8 +111,32 @@ function round1(n) { return Math.round(n * 10) / 10; } +// Pinned hub targets — stable function names that exist across versions. +// Auto-selecting the most-connected node makes version-to-version comparison +// meaningless when barrel/type files get added or removed. +const PINNED_HUB_CANDIDATES = ['buildGraph', 'openDb', 'loadConfig']; + function selectTargets() { const db = new Database(dbPath, { readonly: true }); + try { + + // Try pinned candidates first for a stable hub across versions + let hub = null; + for (const candidate of PINNED_HUB_CANDIDATES) { + const row = db + .prepare( + `SELECT n.name FROM nodes n + JOIN edges e ON e.source_id = n.id OR e.target_id = n.id + WHERE n.name = ? AND n.file NOT LIKE '%test%' AND n.file NOT LIKE '%spec%' + LIMIT 1`, + ) + .get(candidate); + if (row) { + hub = row.name; + break; + } + } + const rows = db .prepare( `SELECT n.name, COUNT(e.id) AS cnt @@ -123,14 +147,19 @@ function selectTargets() { ORDER BY cnt DESC`, ) .all(); - db.close(); if (rows.length === 0) throw new Error('No nodes with edges found in graph'); - const hub = rows[0].name; + // Fall back to most-connected if no pinned candidate found + if (!hub) hub = rows[0].name; + const mid = rows[Math.floor(rows.length / 2)].name; const leaf = rows[rows.length - 1].name; return { hub, mid, leaf }; + + } finally { + db.close(); + } } function benchDepths(fn, name, depths) { diff --git a/scripts/update-embedding-report.js b/scripts/update-embedding-report.js index 47e31d15..645c1844 100644 --- a/scripts/update-embedding-report.js +++ b/scripts/update-embedding-report.js @@ -26,6 +26,15 @@ if (arg) { } const entry = JSON.parse(jsonText); +// Guard: reject empty benchmark results (all workers crashed or no symbols indexed) +if (!entry.symbols || !entry.models || Object.keys(entry.models).length === 0) { + console.error( + `Embedding benchmark produced empty results (symbols=${entry.symbols}, models=${Object.keys(entry.models || {}).length}). ` + + 'Skipping report update to avoid overwriting valid data. Check benchmark worker logs.', + ); + process.exit(1); +} + // ── Paths ──────────────────────────────────────────────────────────────── const reportPath = path.join(root, 'generated', 'benchmarks', 'EMBEDDING-BENCHMARKS.md'); diff --git a/src/domain/analysis/impact.js b/src/domain/analysis/impact.js index c2ea3540..2ce1dbbf 100644 --- a/src/domain/analysis/impact.js +++ b/src/domain/analysis/impact.js @@ -24,6 +24,19 @@ import { findMatchingNodes } from './symbol-lookup.js'; const INTERFACE_LIKE_KINDS = new Set(['interface', 'trait']); +/** + * Check whether the graph contains any 'implements' edges. + * Cached per db handle so the query runs at most once per connection. + */ +const _hasImplementsCache = new WeakMap(); +function hasImplementsEdges(db) { + if (_hasImplementsCache.has(db)) return _hasImplementsCache.get(db); + const row = db.prepare("SELECT 1 FROM edges WHERE kind = 'implements' LIMIT 1").get(); + const result = !!row; + _hasImplementsCache.set(db, result); + return result; +} + /** * BFS traversal to find transitive callers of a node. * When an interface/trait node is encountered (either as the start node or @@ -40,6 +53,9 @@ export function bfsTransitiveCallers( startId, { noTests = false, maxDepth = 3, includeImplementors = true, onVisit } = {}, ) { + // Skip all implementor lookups when the graph has no implements edges + const resolveImplementors = includeImplementors && hasImplementsEdges(db); + const visited = new Set([startId]); const levels = {}; let frontier = [startId]; @@ -47,7 +63,7 @@ export function bfsTransitiveCallers( // Seed: if start node is an interface/trait, include its implementors at depth 1. // Implementors go into a separate list so their callers appear at depth 2, not depth 1. const implNextFrontier = []; - if (includeImplementors) { + if (resolveImplementors) { const startNode = findNodeById(db, startId); if (startNode && INTERFACE_LIKE_KINDS.has(startNode.kind)) { const impls = findImplementors(db, startId); @@ -88,7 +104,7 @@ export function bfsTransitiveCallers( // If a caller is an interface/trait, also pull in its implementors // Implementors are one extra hop away, so record at d+1 - if (includeImplementors && INTERFACE_LIKE_KINDS.has(c.kind)) { + if (resolveImplementors && INTERFACE_LIKE_KINDS.has(c.kind)) { const impls = findImplementors(db, c.id); for (const impl of impls) { if (!visited.has(impl.id) && (!noTests || !isTestFile(impl.file))) { diff --git a/src/infrastructure/config.js b/src/infrastructure/config.js index e8439ab0..7f62083b 100644 --- a/src/infrastructure/config.js +++ b/src/infrastructure/config.js @@ -130,12 +130,19 @@ export const DEFAULTS = { }, }; +// Per-cwd config cache — avoids re-reading the config file on every query call. +// The config file rarely changes within a single process lifetime. +const _configCache = new Map(); + /** * Load project configuration from a .codegraphrc.json or similar file. - * Returns merged config with defaults. + * Returns merged config with defaults. Results are cached per cwd. */ export function loadConfig(cwd) { cwd = cwd || process.cwd(); + const cached = _configCache.get(cwd); + if (cached) return structuredClone(cached); + for (const name of CONFIG_FILES) { const filePath = path.join(cwd, name); if (fs.existsSync(filePath)) { @@ -148,13 +155,26 @@ export function loadConfig(cwd) { merged.query.excludeTests = Boolean(config.excludeTests); } delete merged.excludeTests; - return resolveSecrets(applyEnvOverrides(merged)); + const result = resolveSecrets(applyEnvOverrides(merged)); + _configCache.set(cwd, structuredClone(result)); + return result; } catch (err) { debug(`Failed to parse config ${filePath}: ${err.message}`); } } } - return resolveSecrets(applyEnvOverrides({ ...DEFAULTS })); + const defaults = resolveSecrets(applyEnvOverrides({ ...DEFAULTS })); + _configCache.set(cwd, structuredClone(defaults)); + return defaults; +} + +/** + * Clear the config cache. Intended for long-running processes that need to + * pick up on-disk config changes, and for test isolation when tests share + * the same cwd. + */ +export function clearConfigCache() { + _configCache.clear(); } const ENV_LLM_MAP = {