Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 38 additions & 10 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -153,11 +153,12 @@ enabled. The test structure:

```
tests/
integration/ # buildGraph + full query commands
graph/ # Cycle detection, DOT/Mermaid export
parsers/ # Language parser extraction (one file per language)
search/ # Semantic search + embeddings
fixtures/ # Sample projects used by tests
integration/ # buildGraph + full query commands
graph/ # Cycle detection, DOT/Mermaid export
parsers/ # Language parser extraction (one file per language)
search/ # Semantic search + embeddings
benchmarks/resolution/ # Call resolution precision/recall (per-language fixtures)
fixtures/ # Sample projects used by tests
```

- Integration tests create temporary copies of fixture projects for isolation
Expand All @@ -166,18 +167,45 @@ tests/

## Regression Benchmarks

Two regression benchmark scripts live in `scripts/`. These are **not** unit
tests — they measure performance metrics that reviewers use to judge whether a
change is acceptable. If your PR touches code covered by a benchmark, you
**must** run it before and after your changes and include the results in the PR
description.
Several regression benchmarks track codegraph's accuracy and performance across
versions. Some live in `scripts/` (run manually), while the resolution benchmark
runs automatically as part of `npm test`. If your PR touches code covered by a
benchmark, you **must** run it before and after your changes and include the
results in the PR description.

| Benchmark | What it measures | When to run |
|-----------|-----------------|-------------|
| `node scripts/benchmark.js` | Build speed (native vs WASM), query latency | Changes to `builder.js`, `parser.js`, `queries.js`, `resolve.js`, `db.js`, or the native engine |
| `node scripts/embedding-benchmark.js` | Search recall (Hit@1/3/5/10) across models | Changes to `embedder.js` or embedding strategies |
| `node scripts/query-benchmark.js` | Query depth scaling, diff-impact latency | Changes to `queries.js`, `resolve.js`, or `db.js` |
| `node scripts/incremental-benchmark.js` | Incremental build, import resolution throughput | Changes to `builder.js`, `resolve.js`, `parser.js`, or `journal.js` |
| `npx vitest run tests/benchmarks/resolution/` | Call resolution precision/recall per language | Changes to `build-edges.js`, `resolve.js`, `parser.js`, or any extractor |

### Resolution precision/recall benchmark

The resolution benchmark (`tests/benchmarks/resolution/`) measures how
accurately codegraph resolves call edges. It uses hand-annotated fixture projects
with an `expected-edges.json` manifest per language that declares every call edge
that should be detected.

The benchmark runner builds the graph for each fixture, compares resolved edges
against the manifest, and reports:

- **Precision** — what fraction of resolved edges are correct (no false positives)
- **Recall** — what fraction of expected edges were found (no false negatives)
- **Per-mode breakdown** — separate recall for `static`, `receiver-typed`, and
`interface-dispatched` resolution modes

**CI gate:** The benchmark runs as part of `npm test`. If precision or recall
drops below the configured thresholds for any language, the test fails.

**Adding a new language fixture:**

1. Create `tests/benchmarks/resolution/fixtures/<language>/` with source files
2. Add an `expected-edges.json` manifest (see the JSON schema at
`tests/benchmarks/resolution/expected-edges.schema.json`)
3. Add thresholds in `resolution-benchmark.test.js` → `THRESHOLDS`
4. The benchmark runner auto-discovers fixtures with an `expected-edges.json`

### How to report results

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,7 @@ Codegraph also extracts symbols from common callback patterns: Commander `.comma

## 📊 Performance

Self-measured on every release via CI ([build benchmarks](generated/benchmarks/BUILD-BENCHMARKS.md) | [embedding benchmarks](generated/benchmarks/EMBEDDING-BENCHMARKS.md)):
Self-measured on every release via CI ([build benchmarks](generated/benchmarks/BUILD-BENCHMARKS.md) | [embedding benchmarks](generated/benchmarks/EMBEDDING-BENCHMARKS.md) | [query benchmarks](generated/benchmarks/QUERY-BENCHMARKS.md) | [incremental benchmarks](generated/benchmarks/INCREMENTAL-BENCHMARKS.md) | [resolution precision/recall](tests/benchmarks/resolution/)):

| Metric | Latest |
|---|---|
Expand Down
55 changes: 55 additions & 0 deletions tests/benchmarks/resolution/expected-edges.schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"title": "Expected Call Edges Manifest",
"description": "Hand-annotated call edges for resolution precision/recall benchmarks",
"type": "object",
"required": ["language", "edges"],
"properties": {
"language": {
"type": "string",
"description": "Language identifier matching the fixture directory name"
},
"description": {
"type": "string"
},
"edges": {
"type": "array",
"items": {
"type": "object",
"required": ["source", "target", "kind", "mode"],
"properties": {
"source": {
"type": "object",
"required": ["name", "file"],
"properties": {
"name": { "type": "string", "description": "Function/method name" },
"file": { "type": "string", "description": "Filename (basename only)" }
}
},
"target": {
"type": "object",
"required": ["name", "file"],
"properties": {
"name": { "type": "string", "description": "Function/method name" },
"file": { "type": "string", "description": "Filename (basename only)" }
}
},
"kind": {
"type": "string",
"enum": ["calls"],
"description": "Edge kind — currently only 'calls'"
},
"mode": {
"type": "string",
"enum": ["static", "receiver-typed", "interface-dispatched"],
"description": "Resolution mode that should produce this edge"
},
"notes": {
"type": "string",
"description": "Human-readable explanation of why this edge is expected"
}
}
}
}
}
}
112 changes: 112 additions & 0 deletions tests/benchmarks/resolution/fixtures/javascript/expected-edges.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
{
"$schema": "../../expected-edges.schema.json",
"language": "javascript",
"description": "Hand-annotated call edges for JavaScript resolution benchmark",
"edges": [
{
"source": { "name": "UserService.createUser", "file": "service.js" },
"target": { "name": "normalize", "file": "validators.js" },
"kind": "calls",
"mode": "static",
"notes": "Direct imported function call from method"
},
{
"source": { "name": "UserService.createUser", "file": "service.js" },
"target": { "name": "validate", "file": "validators.js" },
"kind": "calls",
"mode": "static",
"notes": "Direct imported function call from method"
},
{
"source": { "name": "UserService.createUser", "file": "service.js" },
"target": { "name": "Logger.error", "file": "logger.js" },
"kind": "calls",
"mode": "receiver-typed",
"notes": "this.logger.error() — receiver-typed via constructor assignment"
},
{
"source": { "name": "UserService.createUser", "file": "service.js" },
"target": { "name": "Logger.info", "file": "logger.js" },
"kind": "calls",
"mode": "receiver-typed",
"notes": "this.logger.info() — receiver-typed via constructor assignment"
},
{
"source": { "name": "UserService.deleteUser", "file": "service.js" },
"target": { "name": "Logger.warn", "file": "logger.js" },
"kind": "calls",
"mode": "receiver-typed",
"notes": "this.logger.warn() — receiver-typed via constructor assignment"
},
{
"source": { "name": "Logger.info", "file": "logger.js" },
"target": { "name": "Logger._write", "file": "logger.js" },
"kind": "calls",
"mode": "static",
"notes": "this._write() — same-class method call"
},
{
"source": { "name": "Logger.warn", "file": "logger.js" },
"target": { "name": "Logger._write", "file": "logger.js" },
"kind": "calls",
"mode": "static",
"notes": "this._write() — same-class method call"
},
{
"source": { "name": "Logger.error", "file": "logger.js" },
"target": { "name": "Logger._write", "file": "logger.js" },
"kind": "calls",
"mode": "static",
"notes": "this._write() — same-class method call"
},
{
"source": { "name": "validate", "file": "validators.js" },
"target": { "name": "checkLength", "file": "validators.js" },
"kind": "calls",
"mode": "static",
"notes": "Same-file function call"
},
{
"source": { "name": "normalize", "file": "validators.js" },
"target": { "name": "trimWhitespace", "file": "validators.js" },
"kind": "calls",
"mode": "static",
"notes": "Same-file function call"
},
{
"source": { "name": "main", "file": "index.js" },
"target": { "name": "buildService", "file": "service.js" },
"kind": "calls",
"mode": "static",
"notes": "Direct imported function call"
},
{
"source": { "name": "main", "file": "index.js" },
"target": { "name": "UserService.createUser", "file": "service.js" },
"kind": "calls",
"mode": "receiver-typed",
"notes": "svc.createUser() — receiver typed via buildService() return"
},
{
"source": { "name": "main", "file": "index.js" },
"target": { "name": "validate", "file": "validators.js" },
"kind": "calls",
"mode": "static",
"notes": "Direct imported function call"
},
{
"source": { "name": "main", "file": "index.js" },
"target": { "name": "UserService.deleteUser", "file": "service.js" },
"kind": "calls",
"mode": "receiver-typed",
"notes": "svc.deleteUser() — receiver typed via buildService() return"
},
{
"source": { "name": "directInstantiation", "file": "index.js" },
"target": { "name": "UserService.createUser", "file": "service.js" },
"kind": "calls",
"mode": "receiver-typed",
"notes": "svc.createUser() — receiver typed via new UserService()"
}
]
}
15 changes: 15 additions & 0 deletions tests/benchmarks/resolution/fixtures/javascript/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import { buildService, UserService } from './service.js';
import { validate } from './validators.js';

export function main() {
const svc = buildService();
const result = svc.createUser({ name: 'Alice' });
if (result && validate(result)) {
svc.deleteUser(1);
}
}

export function directInstantiation() {
const svc = new UserService();
return svc.createUser({ name: 'Bob' });
}
21 changes: 21 additions & 0 deletions tests/benchmarks/resolution/fixtures/javascript/logger.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
export class Logger {
constructor(prefix) {
this.prefix = prefix;
}

info(msg) {
this._write('INFO', msg);
}

warn(msg) {
this._write('WARN', msg);
}

error(msg) {
this._write('ERROR', msg);
}

_write(level, msg) {
console.log(`[${this.prefix}] ${level}: ${msg}`);
}
}
27 changes: 27 additions & 0 deletions tests/benchmarks/resolution/fixtures/javascript/service.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import { Logger } from './logger.js';
import { normalize, validate } from './validators.js';

export class UserService {
constructor() {
this.logger = new Logger('UserService');
}

createUser(data) {
const clean = normalize(data);
if (!validate(clean)) {
this.logger.error('Validation failed');
return null;
}
this.logger.info('User created');
return clean;
}

deleteUser(id) {
this.logger.warn(`Deleting user ${id}`);
return true;
}
}

export function buildService() {
return new UserService();
}
15 changes: 15 additions & 0 deletions tests/benchmarks/resolution/fixtures/javascript/validators.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
export function validate(data) {
return data != null && typeof data.name === 'string' && checkLength(data.name);
}

export function normalize(data) {
return { ...data, name: trimWhitespace(data.name) };
}

function checkLength(str) {
return str.length > 0 && str.length < 256;
}

function trimWhitespace(str) {
return str.trim();
}
Loading
Loading