From 3481ef08bd36d1ff59d2285603601c97bf1e0ef6 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 30 Mar 2026 08:53:54 -0600 Subject: [PATCH 1/9] chore: release v3.6.0 --- CHANGELOG.md | 27 +++++++++++++++++++++++++ README.md | 34 ++++++++++++++++++++------------ crates/codegraph-core/Cargo.toml | 2 +- docs/roadmap/BACKLOG.md | 2 +- docs/roadmap/ROADMAP.md | 10 ++++++---- package-lock.json | 13 ++++++++++-- package.json | 2 +- 7 files changed, 68 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e67f74ac..5bd47f12 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,33 @@ All notable changes to this project will be documented in this file. See [commit-and-tag-version](https://github.com/absolute-version/commit-and-tag-version) for commit guidelines. +## [3.6.0](https://github.com/optave/ops-codegraph-tool/compare/v3.5.0...v3.6.0) (2026-03-30) + +**Six new languages and a parser abstraction layer.** This release adds first-class support for C, C++, Kotlin, Swift, Scala, and Bash — bringing the total supported languages to 14. A new parser abstraction layer decouples language extractors from tree-sitter internals, making it straightforward to add more languages. The native Rust engine gains batched query methods for the read path, WAL corruption is fixed when native and JS connections overlap, and WASM call-AST extraction is restored for full engine parity. + +### Features + +* add C, C++, Kotlin, Swift, Scala, Bash language support ([#708](https://github.com/optave/ops-codegraph-tool/pull/708)) + +### Bug Fixes + +* **parity:** restore call AST node extraction in WASM engine ([#705](https://github.com/optave/ops-codegraph-tool/pull/705)) +* **native:** suspend JS connection around native writes to prevent WAL corruption ([#704](https://github.com/optave/ops-codegraph-tool/pull/704)) +* native visibility crash and dual-SQLite WAL corruption in benchmarks ([#689](https://github.com/optave/ops-codegraph-tool/pull/689)) +* **ci:** resolve visibility null crash and sequence dataflow annotation ([#693](https://github.com/optave/ops-codegraph-tool/pull/693)) +* **publish:** update repository URLs for npm provenance ([#682](https://github.com/optave/ops-codegraph-tool/pull/682)) + +### Performance + +* **queries:** batched native Rust query methods for read path ([#698](https://github.com/optave/ops-codegraph-tool/pull/698)) + +### Refactors + +* **extractors:** parser abstraction layer (Phase 7.1) ([#700](https://github.com/optave/ops-codegraph-tool/pull/700)) +* **native:** extract generic walk_tree to eliminate walk_node_depth duplication ([#703](https://github.com/optave/ops-codegraph-tool/pull/703)) +* remove dead WASM call-AST extraction and pre-3.2 edge shim ([#686](https://github.com/optave/ops-codegraph-tool/pull/686)) +* Titan audit — decompose, reduce complexity, remove dead code ([#699](https://github.com/optave/ops-codegraph-tool/pull/699)) + ## [3.5.0](https://github.com/optave/ops-codegraph-tool/compare/v3.4.1...v3.5.0) (2026-03-29) **Full rusqlite database migration and sub-100ms incremental rebuilds.** This release completes the migration of all SQLite operations from better-sqlite3 to native Rust/rusqlite via napi-rs, delivering major performance gains across the entire build pipeline. Incremental rebuilds drop from 466ms to 67–80ms, and bulk inserts for nodes, edges, roles, AST nodes, CFG, and dataflow all run through the native engine. better-sqlite3 is now lazy-loaded only as a fallback. Path aliases are restored with TS 6.x-compatible subpath imports, and several WASM/native parity bugs are fixed. diff --git a/README.md b/README.md index aec62699..ba1a1f16 100644 --- a/README.md +++ b/README.md @@ -80,7 +80,7 @@ No config files, no Docker, no JVM, no API keys, no accounts. Point your agent a | Capability | codegraph | [joern](https://github.com/joernio/joern) | [narsil-mcp](https://github.com/postrv/narsil-mcp) | [cpg](https://github.com/Fraunhofer-AISEC/cpg) | [axon](https://github.com/harshkedia177/axon) | [GitNexus](https://github.com/abhigyanpatwari/GitNexus) | |---|:---:|:---:|:---:|:---:|:---:|:---:| -| Languages | **11** | ~12 | **32** | ~10 | 3 | 13 | +| Languages | **17** | ~12 | **32** | ~10 | 3 | 13 | | MCP server | **Yes** | — | **Yes** | **Yes** | **Yes** | **Yes** | | Dataflow + CFG + AST querying | **Yes** | **Yes** | **Yes**¹ | **Yes** | — | — | | Hybrid search (BM25 + semantic) | **Yes** | — | — | — | **Yes** | **Yes** | @@ -106,7 +106,7 @@ No config files, no Docker, no JVM, no API keys, no accounts. Point your agent a | **💥** | **Git diff impact** | `codegraph diff-impact` shows changed functions, their callers, and full blast radius — enriched with historically coupled files from git co-change analysis. Ships with a GitHub Actions workflow | | **🌐** | **Multi-language, one graph** | JS/TS + Python + Go + Rust + Java + C# + PHP + Ruby + HCL in a single graph — agents don't need per-language tools | | **🧠** | **Hybrid search** | BM25 keyword + semantic embeddings fused via RRF — `hybrid` (default), `semantic`, or `keyword` mode; multi-query via `"auth; token; JWT"` | -| **🔬** | **Dataflow + CFG** | Track how data flows through functions (`flows_to`, `returns`, `mutates`) and visualize intraprocedural control flow graphs for all 11 languages | +| **🔬** | **Dataflow + CFG** | Track how data flows through functions (`flows_to`, `returns`, `mutates`) and visualize intraprocedural control flow graphs for all 17 languages | | **🔓** | **Fully local, zero cost** | No API keys, no accounts, no network calls. Optionally bring your own LLM provider — your code only goes where you choose | --- @@ -186,7 +186,7 @@ cd codegraph && npm install && npm link | 🧠 | **Semantic search** | Embeddings-powered natural language search with multi-query RRF ranking | | 👀 | **Watch mode** | Incrementally update the graph as files change | | ⚡ | **Always fresh** | Three-tier incremental detection — sub-second rebuilds even on large codebases | -| 🔬 | **Data flow analysis** | Intraprocedural parameter tracking, return consumers, argument flows, and mutation detection — all 11 languages | +| 🔬 | **Data flow analysis** | Intraprocedural parameter tracking, return consumers, argument flows, and mutation detection — all 17 languages | | 🧮 | **Complexity metrics** | Cognitive, cyclomatic, nesting depth, Halstead, and Maintainability Index per function | | 🏘️ | **Community detection** | Leiden clustering to discover natural module boundaries and architectural drift | | 📜 | **Manifesto rule engine** | Configurable pass/fail rules with warn/fail thresholds for CI gates via `check` (exit code 1 on fail) | @@ -199,8 +199,8 @@ cd codegraph && npm install && npm link | ✅ | **CI validation predicates** | `check` command with configurable gates: complexity, blast radius, cycles, boundary violations — exit code 0/1 for CI | | 📋 | **Composite audit** | Single `audit` command combining explain + impact + health metrics per function — one call instead of 3-4 | | 🚦 | **Triage queue** | `triage` merges connectivity, hotspots, roles, and complexity into a ranked audit priority queue | -| 🔬 | **Dataflow analysis** | Track how data moves through functions with `flows_to`, `returns`, and `mutates` edges — all 11 languages, included by default, skip with `--no-dataflow` | -| 🧩 | **Control flow graph** | Intraprocedural CFG construction for all 11 languages — `cfg` command with text/DOT/Mermaid output, included by default, skip with `--no-cfg` | +| 🔬 | **Dataflow analysis** | Track how data moves through functions with `flows_to`, `returns`, and `mutates` edges — all 17 languages, included by default, skip with `--no-dataflow` | +| 🧩 | **Control flow graph** | Intraprocedural CFG construction for all 17 languages — `cfg` command with text/DOT/Mermaid output, included by default, skip with `--no-cfg` | | 🔎 | **AST node querying** | Stored queryable AST nodes (calls, `new`, string, regex, throw, await) — `ast` command with SQL GLOB pattern matching | | 🧬 | **Expanded node/edge types** | `parameter`, `property`, `constant` node kinds with `parent_id` for sub-declaration queries; `contains`, `parameter_of`, `receiver` edge kinds | | 📊 | **Exports analysis** | `exports ` shows all exported symbols with per-symbol consumers, re-export detection, and counts | @@ -320,7 +320,7 @@ codegraph ast -k call # Filter by kind: call, new, string, regex codegraph ast -k throw --file src/ # Combine kind and file filters ``` -> **Note:** Dataflow and CFG are included by default for all 11 languages. Use `--no-dataflow` / `--no-cfg` for faster builds. +> **Note:** Dataflow and CFG are included by default for all 17 languages. Use `--no-dataflow` / `--no-cfg` for faster builds. ### Audit, Triage & Batch @@ -482,11 +482,18 @@ codegraph registry remove # Unregister | ![C#](https://img.shields.io/badge/-C%23-512BD4?style=flat-square&logo=dotnet&logoColor=white) | `.cs` | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | ![PHP](https://img.shields.io/badge/-PHP-777BB4?style=flat-square&logo=php&logoColor=white) | `.php`, `.phtml` | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | ![Ruby](https://img.shields.io/badge/-Ruby-CC342D?style=flat-square&logo=ruby&logoColor=white) | `.rb`, `.rake`, `.gemspec` | ✓ | ✓ | ✓ | ✓ | —³ | ✓ | +| ![C](https://img.shields.io/badge/-C-A8B9CC?style=flat-square&logo=c&logoColor=black) | `.c`, `.h` | ✓ | ✓ | ✓ | —⁴ | —⁴ | ✓ | +| ![C++](https://img.shields.io/badge/-C++-00599C?style=flat-square&logo=cplusplus&logoColor=white) | `.cpp`, `.hpp`, `.cc`, `.cxx` | ✓ | ✓ | ✓ | ✓ | — | ✓ | +| ![Kotlin](https://img.shields.io/badge/-Kotlin-7F52FF?style=flat-square&logo=kotlin&logoColor=white) | `.kt`, `.kts` | ✓ | ✓ | ✓ | ✓ | — | ✓ | +| ![Swift](https://img.shields.io/badge/-Swift-F05138?style=flat-square&logo=swift&logoColor=white) | `.swift` | ✓ | ✓ | ✓ | ✓ | — | ✓ | +| ![Scala](https://img.shields.io/badge/-Scala-DC322F?style=flat-square&logo=scala&logoColor=white) | `.scala`, `.sc` | ✓ | ✓ | ✓ | ✓ | — | ✓ | +| ![Bash](https://img.shields.io/badge/-Bash-4EAA25?style=flat-square&logo=gnubash&logoColor=white) | `.sh`, `.bash` | ✓ | ✓ | ✓ | —⁴ | —⁴ | ✓ | | ![Terraform](https://img.shields.io/badge/-Terraform-844FBA?style=flat-square&logo=terraform&logoColor=white) | `.tf`, `.hcl` | ✓ | —³ | —³ | —³ | —³ | —³ | > ¹ **Heritage** = `extends`, `implements`, `include`/`extend` (Ruby), trait `impl` (Rust), receiver methods (Go). > ² **Type Inference** extracts a per-file type map from annotations (`const x: Router`, `MyType x`, `x: MyType`) and `new` expressions, enabling the edge resolver to connect `x.method()` → `Type.method()`. > ³ Not applicable — Ruby is dynamically typed; Terraform/HCL is declarative (no functions, classes, or type system). +> ⁴ Not applicable — C and Bash have no class/inheritance system. > All languages have full **parity** between the native Rust engine and the WASM fallback. ## ⚙️ How It Works @@ -786,13 +793,14 @@ See **[ROADMAP.md](docs/roadmap/ROADMAP.md)** for the full development roadmap a 6. ~~**Resolution Accuracy**~~ — **Complete** (v3.3.1) — type inference, receiver type tracking, dead role sub-categories, resolution benchmarks, `package.json` exports, monorepo workspace resolution 7. ~~**TypeScript Migration**~~ — **Complete** (v3.4.0) — all 271 source files migrated from JS to TS, zero `.js` remaining 8. ~~**Native Analysis Acceleration**~~ — **Complete** (v3.5.0) — all build phases in Rust/rusqlite, sub-100ms incremental rebuilds, better-sqlite3 lazy-loaded as fallback only -9. **Expanded Language Support** — 23 new languages in 4 batches (11 → 34) -10. **Runtime & Extensibility** — event-driven pipeline, plugin system, query caching, pagination -11. **Quality, Security & Technical Debt** — supply-chain security (SBOM, SLSA), CI coverage gates, timer cleanup, tech debt kill list -12. **Intelligent Embeddings** — LLM-generated descriptions, enhanced embeddings, module summaries -13. **Natural Language Queries** — `codegraph ask` command, conversational sessions -14. **GitHub Integration & CI** — reusable GitHub Action, LLM-enhanced PR review, SARIF output -15. **Advanced Features** — dead code detection, monorepo support, agentic search +9. **Expanded Language Support** — **In Progress** (v3.6.0) — Batch 1 shipped (C, C++, Kotlin, Swift, Scala, Bash); 17 remaining in 3 batches (17 → 34) +10. **Analysis Depth** — TypeScript-native resolution, inter-procedural type propagation, field-based points-to analysis +11. **Runtime & Extensibility** — event-driven pipeline, plugin system, query caching, pagination +12. **Quality, Security & Technical Debt** — supply-chain security (SBOM, SLSA), CI coverage gates, timer cleanup, tech debt kill list +13. **Intelligent Embeddings** — LLM-generated descriptions, enhanced embeddings, module summaries +14. **Natural Language Queries** — `codegraph ask` command, conversational sessions +15. **GitHub Integration & CI** — reusable GitHub Action, LLM-enhanced PR review, SARIF output +16. **Advanced Features** — dead code detection, monorepo support, agentic search ## 🤝 Contributing diff --git a/crates/codegraph-core/Cargo.toml b/crates/codegraph-core/Cargo.toml index 457f1e2a..91a672e6 100644 --- a/crates/codegraph-core/Cargo.toml +++ b/crates/codegraph-core/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "codegraph-core" -version = "3.5.0" +version = "3.6.0" edition = "2021" license = "Apache-2.0" diff --git a/docs/roadmap/BACKLOG.md b/docs/roadmap/BACKLOG.md index 03f9c160..b1aa3a5d 100644 --- a/docs/roadmap/BACKLOG.md +++ b/docs/roadmap/BACKLOG.md @@ -1,6 +1,6 @@ # Codegraph Feature Backlog -**Last updated:** 2026-03-29 +**Last updated:** 2026-03-30 **Source:** Features derived from [COMPETITIVE_ANALYSIS.md](../../generated/competitive/COMPETITIVE_ANALYSIS.md) and internal roadmap discussions. --- diff --git a/docs/roadmap/ROADMAP.md b/docs/roadmap/ROADMAP.md index a4f35cdb..9a1b0327 100644 --- a/docs/roadmap/ROADMAP.md +++ b/docs/roadmap/ROADMAP.md @@ -1,6 +1,6 @@ # Codegraph Roadmap -> **Current version:** 3.5.0 | **Status:** Active development | **Updated:** 2026-03-30 +> **Current version:** 3.6.0 | **Status:** Active development | **Updated:** 2026-03-30 Codegraph is a strong local-first code graph CLI. This roadmap describes planned improvements across fourteen phases -- closing gaps with commercial code intelligence platforms while preserving codegraph's core strengths: fully local, open source, zero cloud dependency by default. @@ -20,8 +20,8 @@ Codegraph is a strong local-first code graph CLI. This roadmap describes planned | [**4**](#phase-4--resolution-accuracy) | Resolution Accuracy | Dead role sub-categories, receiver type tracking, interface/trait implementation edges, resolution precision/recall benchmarks, `package.json` exports field, monorepo workspace resolution | **Complete** (v3.3.1) | | [**5**](#phase-5--typescript-migration) | TypeScript Migration | Project setup, core type definitions, leaf -> core -> orchestration module migration, test migration | **Complete** (v3.4.0) | | [**6**](#phase-6--native-analysis-acceleration) | Native Analysis Acceleration | Rust extraction for AST/CFG/dataflow/complexity; batch SQLite inserts; incremental rebuilds; native DB write pipeline; full rusqlite migration so native engine never touches better-sqlite3 | **Complete** (v3.5.0) | -| [**7**](#phase-7--analysis-depth) | Analysis Depth | TypeScript-native resolution, inter-procedural type propagation, field-based points-to analysis, enhanced dynamic dispatch, barrel file resolution, precision/recall CI gates | Planned | -| [**8**](#phase-8--expanded-language-support) | Expanded Language Support | Parser abstraction layer, 23 new languages in 4 batches (11 → 34), dual-engine support | Planned | +| [**7**](#phase-7--expanded-language-support) | Expanded Language Support | Parser abstraction layer, 23 new languages in 4 batches (11 → 34), dual-engine support | **In Progress** (v3.6.0) | +| [**8**](#phase-8--analysis-depth) | Analysis Depth | TypeScript-native resolution, inter-procedural type propagation, field-based points-to analysis, enhanced dynamic dispatch, barrel file resolution, precision/recall CI gates | Planned | | [**9**](#phase-9--runtime--extensibility) | Runtime & Extensibility | Event-driven pipeline, unified engine strategy, subgraph export filtering, transitive confidence, query caching, configuration profiles, pagination, plugin system | Planned | | [**10**](#phase-10--quality-security--technical-debt) | Quality, Security & Technical Debt | Supply-chain security, test quality gates, architectural debt cleanup | Planned | | [**11**](#phase-11--intelligent-embeddings) | Intelligent Embeddings | LLM-generated descriptions, enhanced embeddings, build-time semantic metadata, module summaries | Planned | @@ -1315,10 +1315,12 @@ Extract shared patterns from existing extractors into reusable helpers to reduce - `extractBodyMembers` replaces 5 body-iteration patterns (Rust struct/enum, Java enum, C# enum, PHP enum) - `stripQuotes` + `lastPathSegment` replace inline `.replace(/"/g, '')` and `.split('.').pop()` patterns across 7 extractors -### 7.2 -- Batch 1: High Demand +### 7.2 -- Batch 1: High Demand ✅ Major languages with official or widely-adopted tree-sitter grammars (millions of crate downloads). +- ✅ All 6 languages shipped in v3.6.0 ([#708](https://github.com/optave/ops-codegraph-tool/pull/708)) + | Language | Extensions | Grammar | Org | Notes | |----------|-----------|---------|-----|-------| | C | `.c`, `.h` | `tree-sitter-c` | Official | 3.9M crate downloads | diff --git a/package-lock.json b/package-lock.json index 14670082..9f4fdead 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@optave/codegraph", - "version": "3.5.0", + "version": "3.6.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@optave/codegraph", - "version": "3.5.0", + "version": "3.6.0", "license": "Apache-2.0", "dependencies": { "better-sqlite3": "^12.6.2", @@ -1282,6 +1282,9 @@ "cpu": [ "arm64" ], + "libc": [ + "glibc" + ], "license": "Apache-2.0", "optional": true, "os": [ @@ -1295,6 +1298,9 @@ "cpu": [ "x64" ], + "libc": [ + "glibc" + ], "license": "Apache-2.0", "optional": true, "os": [ @@ -1308,6 +1314,9 @@ "cpu": [ "x64" ], + "libc": [ + "musl" + ], "license": "Apache-2.0", "optional": true, "os": [ diff --git a/package.json b/package.json index fe177e6e..5866b683 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@optave/codegraph", - "version": "3.5.0", + "version": "3.6.0", "description": "Local code graph CLI — parse codebases with tree-sitter, build dependency graphs, query them", "type": "module", "main": "dist/index.js", From 78840a1e7bc76cd218cefb5997e5cff2cd03ea47 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 30 Mar 2026 09:28:38 -0600 Subject: [PATCH 2/9] fix(docs): correct language count and update stale descriptions - CHANGELOG: fix total language count from 14 to 17 - README: add 6 new languages to multi-language differentiator row - ROADMAP: update Phase 7 overview to reflect Batch 1 completion --- CHANGELOG.md | 2 +- README.md | 2 +- docs/roadmap/ROADMAP.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5bd47f12..6a1fca19 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file. See [commit ## [3.6.0](https://github.com/optave/ops-codegraph-tool/compare/v3.5.0...v3.6.0) (2026-03-30) -**Six new languages and a parser abstraction layer.** This release adds first-class support for C, C++, Kotlin, Swift, Scala, and Bash — bringing the total supported languages to 14. A new parser abstraction layer decouples language extractors from tree-sitter internals, making it straightforward to add more languages. The native Rust engine gains batched query methods for the read path, WAL corruption is fixed when native and JS connections overlap, and WASM call-AST extraction is restored for full engine parity. +**Six new languages and a parser abstraction layer.** This release adds first-class support for C, C++, Kotlin, Swift, Scala, and Bash — bringing the total supported languages to 17. A new parser abstraction layer decouples language extractors from tree-sitter internals, making it straightforward to add more languages. The native Rust engine gains batched query methods for the read path, WAL corruption is fixed when native and JS connections overlap, and WASM call-AST extraction is restored for full engine parity. ### Features diff --git a/README.md b/README.md index ba1a1f16..880600ef 100644 --- a/README.md +++ b/README.md @@ -104,7 +104,7 @@ No config files, no Docker, no JVM, no API keys, no accounts. Point your agent a | **🔬** | **Function-level, not just files** | Traces `handleAuth()` → `validateToken()` → `decryptJWT()` and shows 14 callers across 9 files break if `decryptJWT` changes | | **⚡** | **Always-fresh graph** | Three-tier change detection: journal (O(changed)) → mtime+size (O(n) stats) → hash (O(changed) reads). Sub-second rebuilds — agents work with current data | | **💥** | **Git diff impact** | `codegraph diff-impact` shows changed functions, their callers, and full blast radius — enriched with historically coupled files from git co-change analysis. Ships with a GitHub Actions workflow | -| **🌐** | **Multi-language, one graph** | JS/TS + Python + Go + Rust + Java + C# + PHP + Ruby + HCL in a single graph — agents don't need per-language tools | +| **🌐** | **Multi-language, one graph** | JS/TS + Python + Go + Rust + Java + C# + PHP + Ruby + C + C++ + Kotlin + Swift + Scala + Bash + HCL in a single graph — agents don't need per-language tools | | **🧠** | **Hybrid search** | BM25 keyword + semantic embeddings fused via RRF — `hybrid` (default), `semantic`, or `keyword` mode; multi-query via `"auth; token; JWT"` | | **🔬** | **Dataflow + CFG** | Track how data flows through functions (`flows_to`, `returns`, `mutates`) and visualize intraprocedural control flow graphs for all 17 languages | | **🔓** | **Fully local, zero cost** | No API keys, no accounts, no network calls. Optionally bring your own LLM provider — your code only goes where you choose | diff --git a/docs/roadmap/ROADMAP.md b/docs/roadmap/ROADMAP.md index 9a1b0327..61188102 100644 --- a/docs/roadmap/ROADMAP.md +++ b/docs/roadmap/ROADMAP.md @@ -20,7 +20,7 @@ Codegraph is a strong local-first code graph CLI. This roadmap describes planned | [**4**](#phase-4--resolution-accuracy) | Resolution Accuracy | Dead role sub-categories, receiver type tracking, interface/trait implementation edges, resolution precision/recall benchmarks, `package.json` exports field, monorepo workspace resolution | **Complete** (v3.3.1) | | [**5**](#phase-5--typescript-migration) | TypeScript Migration | Project setup, core type definitions, leaf -> core -> orchestration module migration, test migration | **Complete** (v3.4.0) | | [**6**](#phase-6--native-analysis-acceleration) | Native Analysis Acceleration | Rust extraction for AST/CFG/dataflow/complexity; batch SQLite inserts; incremental rebuilds; native DB write pipeline; full rusqlite migration so native engine never touches better-sqlite3 | **Complete** (v3.5.0) | -| [**7**](#phase-7--expanded-language-support) | Expanded Language Support | Parser abstraction layer, 23 new languages in 4 batches (11 → 34), dual-engine support | **In Progress** (v3.6.0) | +| [**7**](#phase-7--expanded-language-support) | Expanded Language Support | Parser abstraction layer, 23 new languages in 4 batches (11 → 34), dual-engine support — Batch 1 (6 languages) shipped in v3.6.0; 17 remaining in 3 batches (17 → 34) | **In Progress** (v3.6.0) | | [**8**](#phase-8--analysis-depth) | Analysis Depth | TypeScript-native resolution, inter-procedural type propagation, field-based points-to analysis, enhanced dynamic dispatch, barrel file resolution, precision/recall CI gates | Planned | | [**9**](#phase-9--runtime--extensibility) | Runtime & Extensibility | Event-driven pipeline, unified engine strategy, subgraph export filtering, transitive confidence, query caching, configuration profiles, pagination, plugin system | Planned | | [**10**](#phase-10--quality-security--technical-debt) | Quality, Security & Technical Debt | Supply-chain security, test quality gates, architectural debt cleanup | Planned | From 50f193f2a047f8b94c3d27161bb12058c5cb9e5b Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 30 Mar 2026 11:40:55 -0600 Subject: [PATCH 3/9] feat: add Elixir, Lua, Dart, Zig, Haskell, OCaml language support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Batch 2 languages from the ROADMAP Phase 7 plan. Each language includes dual-engine support (WASM + native Rust extractors), AST configs, and parser tests. Language details: - Elixir (.ex, .exs): modules, functions, protocols, imports/use/require - Lua (.lua): functions, methods, require() imports, table patterns - Dart (.dart): classes, enums, mixins, extensions, imports, inheritance - Zig (.zig): functions, structs, enums, unions, @import, test decls - Haskell (.hs): functions, data/newtype/type, typeclasses, imports - OCaml (.ml, .mli): let bindings, modules, types, open, applications Notable grammar quirks handled: - Elixir: all constructs are generic `call` nodes, distinguished by identifier text (defmodule, def, defp, use, import, etc.) - Dart: no call_expression node — calls detected via selector/argument_part - Zig: structs/enums are anonymous, named by parent variable_declaration - Haskell: grammar misspells type_synomym (missing 'n') - OCaml: sub-grammar under grammars/ocaml, Rust export LANGUAGE_OCAML - Dart Rust crate uses old language() function instead of LANGUAGE constant --- crates/codegraph-core/Cargo.toml | 6 + crates/codegraph-core/src/extractors/dart.rs | 268 +++++++++++++++ .../codegraph-core/src/extractors/elixir.rs | 226 ++++++++++++ .../codegraph-core/src/extractors/haskell.rs | 231 +++++++++++++ .../codegraph-core/src/extractors/helpers.rs | 66 ++++ crates/codegraph-core/src/extractors/lua.rs | 147 ++++++++ crates/codegraph-core/src/extractors/mod.rs | 24 ++ crates/codegraph-core/src/extractors/ocaml.rs | 248 ++++++++++++++ crates/codegraph-core/src/extractors/zig.rs | 322 ++++++++++++++++++ crates/codegraph-core/src/parser_registry.rs | 24 ++ package-lock.json | 135 +++++++- package.json | 6 + scripts/build-wasm.ts | 6 + src/domain/parser.ts | 54 +++ src/extractors/dart.ts | 310 +++++++++++++++++ src/extractors/elixir.ts | 243 +++++++++++++ src/extractors/haskell.ts | 241 +++++++++++++ src/extractors/index.ts | 6 + src/extractors/lua.ts | 169 +++++++++ src/extractors/ocaml.ts | 259 ++++++++++++++ src/extractors/zig.ts | 283 +++++++++++++++ src/types.ts | 8 +- tests/parsers/dart.test.ts | 53 +++ tests/parsers/elixir.test.ts | 59 ++++ tests/parsers/haskell.test.ts | 56 +++ tests/parsers/lua.test.ts | 55 +++ tests/parsers/ocaml.test.ts | 55 +++ tests/parsers/zig.test.ts | 70 ++++ 28 files changed, 3620 insertions(+), 10 deletions(-) create mode 100644 crates/codegraph-core/src/extractors/dart.rs create mode 100644 crates/codegraph-core/src/extractors/elixir.rs create mode 100644 crates/codegraph-core/src/extractors/haskell.rs create mode 100644 crates/codegraph-core/src/extractors/lua.rs create mode 100644 crates/codegraph-core/src/extractors/ocaml.rs create mode 100644 crates/codegraph-core/src/extractors/zig.rs create mode 100644 src/extractors/dart.ts create mode 100644 src/extractors/elixir.ts create mode 100644 src/extractors/haskell.ts create mode 100644 src/extractors/lua.ts create mode 100644 src/extractors/ocaml.ts create mode 100644 src/extractors/zig.ts create mode 100644 tests/parsers/dart.test.ts create mode 100644 tests/parsers/elixir.test.ts create mode 100644 tests/parsers/haskell.test.ts create mode 100644 tests/parsers/lua.test.ts create mode 100644 tests/parsers/ocaml.test.ts create mode 100644 tests/parsers/zig.test.ts diff --git a/crates/codegraph-core/Cargo.toml b/crates/codegraph-core/Cargo.toml index 91a672e6..87ca5844 100644 --- a/crates/codegraph-core/Cargo.toml +++ b/crates/codegraph-core/Cargo.toml @@ -29,6 +29,12 @@ tree-sitter-swift = "0.7" tree-sitter-scala = "0.25" tree-sitter-bash = "0.23" tree-sitter-hcl = "1" +tree-sitter-elixir = "0.3" +tree-sitter-lua = "0.2" +tree-sitter-dart = "0.0.4" +tree-sitter-zig = "1" +tree-sitter-haskell = "0.23" +tree-sitter-ocaml = "0.24" rayon = "1" # `bundled` embeds a second SQLite copy (better-sqlite3 already bundles one). # This is intentional: Windows CI lacks a system SQLite, and WAL coordination diff --git a/crates/codegraph-core/src/extractors/dart.rs b/crates/codegraph-core/src/extractors/dart.rs new file mode 100644 index 00000000..d689f307 --- /dev/null +++ b/crates/codegraph-core/src/extractors/dart.rs @@ -0,0 +1,268 @@ +use tree_sitter::{Node, Tree}; +use crate::cfg::build_function_cfg; +use crate::complexity::compute_all_metrics; +use crate::types::*; +use super::helpers::*; +use super::SymbolExtractor; + +pub struct DartExtractor; + +impl SymbolExtractor for DartExtractor { + fn extract(&self, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols { + let mut symbols = FileSymbols::new(file_path.to_string()); + walk_tree(&tree.root_node(), source, &mut symbols, match_dart_node); + walk_ast_nodes_with_config(&tree.root_node(), source, &mut symbols.ast_nodes, &DART_AST_CONFIG); + symbols + } +} + +fn match_dart_node(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) { + match node.kind() { + "class_definition" => handle_dart_class(node, source, symbols), + "enum_declaration" => handle_dart_enum(node, source, symbols), + "mixin_declaration" => handle_dart_mixin(node, source, symbols), + "extension_declaration" => handle_dart_extension(node, source, symbols), + "function_signature" => { + if !is_inside_class(node) { + handle_dart_function_sig(node, source, symbols); + } + } + "library_import" => handle_dart_import(node, source, symbols), + "constructor_invocation" | "new_expression" => handle_dart_constructor_call(node, source, symbols), + "type_alias" => handle_dart_type_alias(node, source, symbols), + _ => {} + } +} + +fn handle_dart_class(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = match node.child_by_field_name("name") { + Some(n) => n, + None => return, + }; + let class_name = node_text(&name_node, source).to_string(); + + // Extract methods + if let Some(body) = node.child_by_field_name("body").or_else(|| find_child(node, "class_body")) { + extract_dart_class_methods(&body, &class_name, source, symbols); + } + + // Extract inheritance + if let Some(superclass) = node.child_by_field_name("superclass") { + if let Some(type_name) = find_child(&superclass, "type_identifier") + .or_else(|| find_child(&superclass, "identifier")) + { + symbols.classes.push(ClassRelation { + name: class_name.clone(), + extends: Some(node_text(&type_name, source).to_string()), + implements: None, + line: start_line(node), + }); + } + } + if let Some(interfaces) = node.child_by_field_name("interfaces") { + for i in 0..interfaces.child_count() { + if let Some(child) = interfaces.child(i) { + let type_name = if child.kind() == "type_identifier" { + Some(child) + } else { + find_child(&child, "type_identifier").or_else(|| find_child(&child, "identifier")) + }; + if let Some(tn) = type_name { + symbols.classes.push(ClassRelation { + name: class_name.clone(), + extends: None, + implements: Some(node_text(&tn, source).to_string()), + line: start_line(node), + }); + } + } + } + } + + symbols.definitions.push(Definition { + name: class_name, + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); +} + +fn extract_dart_class_methods(body: &Node, class_name: &str, source: &[u8], symbols: &mut FileSymbols) { + for i in 0..body.child_count() { + if let Some(member) = body.child(i) { + match member.kind() { + "method_signature" | "function_signature" => { + if let Some(fn_name) = extract_dart_fn_name(&member, source) { + symbols.definitions.push(Definition { + name: format!("{}.{}", class_name, fn_name), + kind: "method".to_string(), + line: start_line(&member), + end_line: Some(end_line(&member)), + decorators: None, + complexity: compute_all_metrics(&member, source, "dart"), + cfg: build_function_cfg(&member, "dart", source), + children: None, + }); + } + } + _ => {} + } + } + } +} + +fn extract_dart_fn_name(node: &Node, source: &[u8]) -> Option { + if let Some(name) = node.child_by_field_name("name") { + return Some(node_text(&name, source).to_string()); + } + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + match child.kind() { + "function_signature" | "getter_signature" | "setter_signature" | "constructor_signature" => { + if let Some(name) = child.child_by_field_name("name") { + return Some(node_text(&name, source).to_string()); + } + } + _ => {} + } + } + } + None +} + +fn handle_dart_enum(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = match node.child_by_field_name("name") { + Some(n) => n, + None => return, + }; + + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "enum".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); +} + +fn handle_dart_mixin(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = match find_child(node, "identifier") { + Some(n) => n, + None => return, + }; + + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); +} + +fn handle_dart_extension(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = match node.child_by_field_name("name") { + Some(n) => n, + None => return, + }; + + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); +} + +fn handle_dart_function_sig(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = match node.child_by_field_name("name") { + Some(n) => n, + None => return, + }; + + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "function".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: compute_all_metrics(node, source, "dart"), + cfg: build_function_cfg(node, "dart", source), + children: None, + }); +} + +fn handle_dart_import(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let spec = match find_child(node, "import_specification") { + Some(s) => s, + None => return, + }; + + let uri = find_child(&spec, "configurable_uri") + .or_else(|| find_child(&spec, "uri")); + if let Some(uri) = uri { + let raw = node_text(&uri, source); + let source_path = raw.trim_matches(|c| c == '\'' || c == '"').to_string(); + symbols.imports.push(Import::new( + source_path, + vec![], + start_line(node), + )); + } +} + +fn handle_dart_constructor_call(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = find_child(node, "type_identifier") + .or_else(|| find_child(node, "identifier")); + if let Some(name) = name_node { + symbols.calls.push(Call { + name: node_text(&name, source).to_string(), + line: start_line(node), + dynamic: None, + receiver: None, + }); + } +} + +fn handle_dart_type_alias(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = find_child(node, "type_identifier") + .or_else(|| find_child(node, "identifier")); + if let Some(name) = name_node { + symbols.definitions.push(Definition { + name: node_text(&name, source).to_string(), + kind: "type".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); + } +} + +fn is_inside_class(node: &Node) -> bool { + let mut current = node.parent(); + while let Some(parent) = current { + match parent.kind() { + "class_body" | "class_definition" | "enum_body" | "mixin_declaration" => return true, + _ => {} + } + current = parent.parent(); + } + false +} diff --git a/crates/codegraph-core/src/extractors/elixir.rs b/crates/codegraph-core/src/extractors/elixir.rs new file mode 100644 index 00000000..85432143 --- /dev/null +++ b/crates/codegraph-core/src/extractors/elixir.rs @@ -0,0 +1,226 @@ +use tree_sitter::{Node, Tree}; +use crate::cfg::build_function_cfg; +use crate::complexity::compute_all_metrics; +use crate::types::*; +use super::helpers::*; +use super::SymbolExtractor; + +pub struct ElixirExtractor; + +impl SymbolExtractor for ElixirExtractor { + fn extract(&self, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols { + let mut symbols = FileSymbols::new(file_path.to_string()); + walk_tree(&tree.root_node(), source, &mut symbols, match_elixir_node); + walk_ast_nodes_with_config(&tree.root_node(), source, &mut symbols.ast_nodes, &ELIXIR_AST_CONFIG); + symbols + } +} + +fn match_elixir_node(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) { + if node.kind() != "call" { + return; + } + + let target = match node.child_by_field_name("target").or_else(|| node.child(0)) { + Some(t) => t, + None => return, + }; + + if target.kind() == "identifier" { + let keyword = node_text(&target, source); + match keyword { + "defmodule" => handle_defmodule(node, source, symbols), + "def" | "defp" => handle_def_function(node, source, symbols, keyword), + "defprotocol" => handle_defprotocol(node, source, symbols), + "defimpl" => handle_defimpl(node, source, symbols), + "import" | "use" | "require" | "alias" => handle_elixir_import(node, source, symbols, keyword), + _ => { + symbols.calls.push(Call { + name: keyword.to_string(), + line: start_line(node), + dynamic: None, + receiver: None, + }); + } + } + } else if target.kind() == "dot" { + handle_dot_call(node, &target, source, symbols); + } +} + +fn handle_defmodule(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let args = match find_child(node, "arguments") { + Some(a) => a, + None => return, + }; + let alias_node = match find_child(&args, "alias") { + Some(a) => a, + None => return, + }; + let name = node_text(&alias_node, source).to_string(); + + symbols.definitions.push(Definition { + name, + kind: "module".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); +} + +fn handle_def_function(node: &Node, source: &[u8], symbols: &mut FileSymbols, keyword: &str) { + let args = match find_child(node, "arguments") { + Some(a) => a, + None => return, + }; + + // Function name is either in a nested call or a direct identifier + let fn_name = extract_elixir_fn_name(&args, source); + let fn_name = match fn_name { + Some(n) => n, + None => return, + }; + + // Find parent module + let parent_module = find_elixir_parent_module(node, source); + let full_name = match &parent_module { + Some(m) => format!("{}.{}", m, fn_name), + None => fn_name, + }; + + let visibility = if keyword == "defp" { Some("private".to_string()) } else { Some("public".to_string()) }; + + symbols.definitions.push(Definition { + name: full_name, + kind: "function".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: compute_all_metrics(node, source, "elixir"), + cfg: build_function_cfg(node, "elixir", source), + children: None, + }); +} + +fn extract_elixir_fn_name<'a>(args: &Node<'a>, source: &'a [u8]) -> Option { + for i in 0..args.child_count() { + if let Some(child) = args.child(i) { + if child.kind() == "call" { + if let Some(target) = child.child_by_field_name("target").or_else(|| child.child(0)) { + if target.kind() == "identifier" { + return Some(node_text(&target, source).to_string()); + } + } + } + if child.kind() == "identifier" { + return Some(node_text(&child, source).to_string()); + } + } + } + None +} + +fn find_elixir_parent_module<'a>(node: &Node<'a>, source: &[u8]) -> Option { + let mut current = node.parent(); + while let Some(parent) = current { + if parent.kind() == "do_block" { + if let Some(gp) = parent.parent() { + if gp.kind() == "call" { + if let Some(target) = gp.child_by_field_name("target").or_else(|| gp.child(0)) { + if target.kind() == "identifier" && node_text(&target, source) == "defmodule" { + if let Some(args) = find_child(&gp, "arguments") { + if let Some(alias) = find_child(&args, "alias") { + return Some(node_text(&alias, source).to_string()); + } + } + } + } + } + } + } + current = parent.parent(); + } + None +} + +fn handle_defprotocol(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let args = match find_child(node, "arguments") { + Some(a) => a, + None => return, + }; + let alias_node = match find_child(&args, "alias") { + Some(a) => a, + None => return, + }; + + symbols.definitions.push(Definition { + name: node_text(&alias_node, source).to_string(), + kind: "interface".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); +} + +fn handle_defimpl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let args = match find_child(node, "arguments") { + Some(a) => a, + None => return, + }; + let alias_node = match find_child(&args, "alias") { + Some(a) => a, + None => return, + }; + + symbols.definitions.push(Definition { + name: node_text(&alias_node, source).to_string(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); +} + +fn handle_elixir_import(node: &Node, source: &[u8], symbols: &mut FileSymbols, keyword: &str) { + let args = match find_child(node, "arguments") { + Some(a) => a, + None => return, + }; + let alias_node = match find_child(&args, "alias") { + Some(a) => a, + None => return, + }; + + symbols.imports.push(Import::new( + node_text(&alias_node, source).to_string(), + vec![keyword.to_string()], + start_line(node), + )); +} + +fn handle_dot_call(node: &Node, dot_node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let right = find_child(dot_node, "identifier"); + let left = find_child(dot_node, "alias"); + + let name = match right { + Some(r) => node_text(&r, source).to_string(), + None => return, + }; + let receiver = left.map(|l| node_text(&l, source).to_string()); + + symbols.calls.push(Call { + name, + line: start_line(node), + dynamic: None, + receiver, + }); +} diff --git a/crates/codegraph-core/src/extractors/haskell.rs b/crates/codegraph-core/src/extractors/haskell.rs new file mode 100644 index 00000000..7d223712 --- /dev/null +++ b/crates/codegraph-core/src/extractors/haskell.rs @@ -0,0 +1,231 @@ +use tree_sitter::{Node, Tree}; +use crate::cfg::build_function_cfg; +use crate::complexity::compute_all_metrics; +use crate::types::*; +use super::helpers::*; +use super::SymbolExtractor; + +pub struct HaskellExtractor; + +impl SymbolExtractor for HaskellExtractor { + fn extract(&self, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols { + let mut symbols = FileSymbols::new(file_path.to_string()); + walk_tree(&tree.root_node(), source, &mut symbols, match_haskell_node); + walk_ast_nodes_with_config(&tree.root_node(), source, &mut symbols.ast_nodes, &HASKELL_AST_CONFIG); + symbols + } +} + +fn match_haskell_node(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) { + match node.kind() { + "function" => handle_haskell_function(node, source, symbols), + "bind" => handle_haskell_bind(node, source, symbols), + "data_type" => handle_haskell_data_type(node, source, symbols), + "newtype" => handle_haskell_newtype(node, source, symbols), + "type_synomym" => handle_haskell_type_synonym(node, source, symbols), + "class" => handle_haskell_class(node, source, symbols), + "instance" => handle_haskell_instance(node, source, symbols), + "import" => handle_haskell_import(node, source, symbols), + "apply" => handle_haskell_apply(node, source, symbols), + _ => {} + } +} + +fn handle_haskell_function(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = match node.child_by_field_name("name") { + Some(n) => n, + None => return, + }; + + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "function".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: compute_all_metrics(node, source, "haskell"), + cfg: build_function_cfg(node, "haskell", source), + children: None, + }); +} + +fn handle_haskell_bind(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = match node.child_by_field_name("name") { + Some(n) => n, + None => return, + }; + + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "variable".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); +} + +fn handle_haskell_data_type(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = match node.child_by_field_name("name") { + Some(n) => n, + None => return, + }; + let name = node_text(&name_node, source).to_string(); + + let mut children = Vec::new(); + if let Some(constructors) = node.child_by_field_name("constructors") { + for i in 0..constructors.child_count() { + if let Some(ctor) = constructors.child(i) { + if ctor.kind() == "data_constructor" || ctor.kind() == "gadt_constructor" { + let ctor_name = find_child(&ctor, "constructor") + .or_else(|| find_child(&ctor, "constructor_operator")); + if let Some(cn) = ctor_name { + children.push(child_def( + node_text(&cn, source).to_string(), + "property", + start_line(&ctor), + )); + } + } + } + } + } + + symbols.definitions.push(Definition { + name, + kind: "type".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: opt_children(children), + }); +} + +fn handle_haskell_newtype(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = match node.child_by_field_name("name") { + Some(n) => n, + None => return, + }; + + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "type".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); +} + +fn handle_haskell_type_synonym(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = match node.child_by_field_name("name") { + Some(n) => n, + None => return, + }; + + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "type".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); +} + +fn handle_haskell_class(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = match node.child_by_field_name("name") { + Some(n) => n, + None => return, + }; + + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); +} + +fn handle_haskell_instance(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = match node.child_by_field_name("name") { + Some(n) => n, + None => return, + }; + + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); +} + +fn handle_haskell_import(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let module_node = match node.child_by_field_name("module") { + Some(n) => n, + None => return, + }; + + let source_name = node_text(&module_node, source).to_string(); + let mut names = Vec::new(); + + if let Some(alias) = node.child_by_field_name("alias") { + names.push(node_text(&alias, source).to_string()); + } + + if let Some(import_list) = node.child_by_field_name("names") { + for i in 0..import_list.child_count() { + if let Some(item) = import_list.child(i) { + match item.kind() { + "variable" | "constructor" | "type" => { + names.push(node_text(&item, source).to_string()); + } + _ => {} + } + } + } + } + + if names.is_empty() { + let last = source_name.split('.').last().unwrap_or(&source_name).to_string(); + names.push(last); + } + + symbols.imports.push(Import::new(source_name, names, start_line(node))); +} + +fn handle_haskell_apply(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let func_node = match node.child_by_field_name("function") { + Some(n) => n, + None => return, + }; + + match func_node.kind() { + "variable" | "constructor" | "identifier" | "qualified_variable" | "qualified_constructor" => { + symbols.calls.push(Call { + name: node_text(&func_node, source).to_string(), + line: start_line(node), + dynamic: None, + receiver: None, + }); + } + _ => {} + } +} diff --git a/crates/codegraph-core/src/extractors/helpers.rs b/crates/codegraph-core/src/extractors/helpers.rs index 441e1b50..ded12687 100644 --- a/crates/codegraph-core/src/extractors/helpers.rs +++ b/crates/codegraph-core/src/extractors/helpers.rs @@ -316,6 +316,72 @@ pub const BASH_AST_CONFIG: LangAstConfig = LangAstConfig { string_prefixes: &[], }; +pub const ELIXIR_AST_CONFIG: LangAstConfig = LangAstConfig { + call_types: &["call"], + new_types: &[], + throw_types: &[], + await_types: &[], + string_types: &["string"], + regex_types: &["sigil"], + quote_chars: &['"'], + string_prefixes: &[], +}; + +pub const LUA_AST_CONFIG: LangAstConfig = LangAstConfig { + call_types: &["function_call"], + new_types: &[], + throw_types: &[], + await_types: &[], + string_types: &["string"], + regex_types: &[], + quote_chars: &['\'', '"'], + string_prefixes: &[], +}; + +pub const DART_AST_CONFIG: LangAstConfig = LangAstConfig { + call_types: &["selector"], + new_types: &["new_expression", "constructor_invocation"], + throw_types: &["throw_expression"], + await_types: &["await_expression"], + string_types: &["string_literal"], + regex_types: &[], + quote_chars: &['\'', '"'], + string_prefixes: &[], +}; + +pub const ZIG_AST_CONFIG: LangAstConfig = LangAstConfig { + call_types: &["call_expression", "builtin_function"], + new_types: &[], + throw_types: &[], + await_types: &[], + string_types: &["string_literal"], + regex_types: &[], + quote_chars: &['"'], + string_prefixes: &[], +}; + +pub const HASKELL_AST_CONFIG: LangAstConfig = LangAstConfig { + call_types: &["apply"], + new_types: &[], + throw_types: &[], + await_types: &[], + string_types: &["string", "char"], + regex_types: &[], + quote_chars: &['"', '\''], + string_prefixes: &[], +}; + +pub const OCAML_AST_CONFIG: LangAstConfig = LangAstConfig { + call_types: &["application_expression"], + new_types: &[], + throw_types: &[], + await_types: &[], + string_types: &["string"], + regex_types: &[], + quote_chars: &['"'], + string_prefixes: &[], +}; + // ── Generic AST node walker ────────────────────────────────────────────────── /// Node types that represent identifiers across languages. diff --git a/crates/codegraph-core/src/extractors/lua.rs b/crates/codegraph-core/src/extractors/lua.rs new file mode 100644 index 00000000..2dccded3 --- /dev/null +++ b/crates/codegraph-core/src/extractors/lua.rs @@ -0,0 +1,147 @@ +use tree_sitter::{Node, Tree}; +use crate::cfg::build_function_cfg; +use crate::complexity::compute_all_metrics; +use crate::types::*; +use super::helpers::*; +use super::SymbolExtractor; + +pub struct LuaExtractor; + +impl SymbolExtractor for LuaExtractor { + fn extract(&self, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols { + let mut symbols = FileSymbols::new(file_path.to_string()); + walk_tree(&tree.root_node(), source, &mut symbols, match_lua_node); + walk_ast_nodes_with_config(&tree.root_node(), source, &mut symbols.ast_nodes, &LUA_AST_CONFIG); + symbols + } +} + +fn match_lua_node(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) { + match node.kind() { + "function_declaration" => handle_lua_function_decl(node, source, symbols), + "function_call" => handle_lua_function_call(node, source, symbols), + _ => {} + } +} + +fn handle_lua_function_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = match node.child_by_field_name("name") { + Some(n) => n, + None => return, + }; + + let (name, kind) = match name_node.kind() { + "method_index_expression" => { + let table = name_node.child_by_field_name("table"); + let method = name_node.child_by_field_name("method"); + match (table, method) { + (Some(t), Some(m)) => ( + format!("{}.{}", node_text(&t, source), node_text(&m, source)), + "method", + ), + _ => (node_text(&name_node, source).to_string(), "function"), + } + } + "dot_index_expression" => { + let table = name_node.child_by_field_name("table"); + let field = name_node.child_by_field_name("field"); + match (table, field) { + (Some(t), Some(f)) => ( + format!("{}.{}", node_text(&t, source), node_text(&f, source)), + "method", + ), + _ => (node_text(&name_node, source).to_string(), "function"), + } + } + _ => (node_text(&name_node, source).to_string(), "function"), + }; + + let params = extract_lua_params(node, source); + + symbols.definitions.push(Definition { + name, + kind: kind.to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: compute_all_metrics(node, source, "lua"), + cfg: build_function_cfg(node, "lua", source), + children: opt_children(params), + }); +} + +fn extract_lua_params(func_node: &Node, source: &[u8]) -> Vec { + let mut params = Vec::new(); + if let Some(param_list) = func_node.child_by_field_name("parameters") { + for i in 0..param_list.child_count() { + if let Some(child) = param_list.child(i) { + if child.kind() == "identifier" { + params.push(child_def( + node_text(&child, source).to_string(), + "parameter", + start_line(&child), + )); + } + } + } + } + params +} + +fn handle_lua_function_call(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = match node.child_by_field_name("name") { + Some(n) => n, + None => return, + }; + + // Check for require() as import + if name_node.kind() == "identifier" && node_text(&name_node, source) == "require" { + if let Some(args) = node.child_by_field_name("arguments") { + if let Some(str_arg) = find_child(&args, "string") { + let raw = node_text(&str_arg, source); + let source_path = raw.trim_matches(|c| c == '\'' || c == '"').to_string(); + symbols.imports.push(Import::new( + source_path, + vec!["require".to_string()], + start_line(node), + )); + return; + } + } + } + + match name_node.kind() { + "method_index_expression" => { + let method = name_node.child_by_field_name("method"); + let table = name_node.child_by_field_name("table"); + if let Some(m) = method { + symbols.calls.push(Call { + name: node_text(&m, source).to_string(), + line: start_line(node), + dynamic: None, + receiver: table.map(|t| node_text(&t, source).to_string()), + }); + } + } + "dot_index_expression" => { + let field = name_node.child_by_field_name("field"); + let table = name_node.child_by_field_name("table"); + if let Some(f) = field { + symbols.calls.push(Call { + name: node_text(&f, source).to_string(), + line: start_line(node), + dynamic: None, + receiver: table.map(|t| node_text(&t, source).to_string()), + }); + } + } + _ => { + symbols.calls.push(Call { + name: node_text(&name_node, source).to_string(), + line: start_line(node), + dynamic: None, + receiver: None, + }); + } + } +} diff --git a/crates/codegraph-core/src/extractors/mod.rs b/crates/codegraph-core/src/extractors/mod.rs index 0a9984db..64700059 100644 --- a/crates/codegraph-core/src/extractors/mod.rs +++ b/crates/codegraph-core/src/extractors/mod.rs @@ -2,18 +2,24 @@ pub mod bash; pub mod c; pub mod cpp; pub mod csharp; +pub mod dart; +pub mod elixir; pub mod go; +pub mod haskell; pub mod hcl; pub mod helpers; pub mod java; pub mod javascript; pub mod kotlin; +pub mod lua; +pub mod ocaml; pub mod php; pub mod python; pub mod ruby; pub mod rust_lang; pub mod scala; pub mod swift; +pub mod zig; use crate::parser_registry::LanguageKind; use crate::types::FileSymbols; @@ -102,5 +108,23 @@ pub fn extract_symbols_with_opts( LanguageKind::Bash => { bash::BashExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes) } + LanguageKind::Elixir => { + elixir::ElixirExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes) + } + LanguageKind::Lua => { + lua::LuaExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes) + } + LanguageKind::Dart => { + dart::DartExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes) + } + LanguageKind::Zig => { + zig::ZigExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes) + } + LanguageKind::Haskell => { + haskell::HaskellExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes) + } + LanguageKind::Ocaml => { + ocaml::OcamlExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes) + } } } diff --git a/crates/codegraph-core/src/extractors/ocaml.rs b/crates/codegraph-core/src/extractors/ocaml.rs new file mode 100644 index 00000000..d13e77be --- /dev/null +++ b/crates/codegraph-core/src/extractors/ocaml.rs @@ -0,0 +1,248 @@ +use tree_sitter::{Node, Tree}; +use crate::cfg::build_function_cfg; +use crate::complexity::compute_all_metrics; +use crate::types::*; +use super::helpers::*; +use super::SymbolExtractor; + +pub struct OcamlExtractor; + +impl SymbolExtractor for OcamlExtractor { + fn extract(&self, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols { + let mut symbols = FileSymbols::new(file_path.to_string()); + walk_tree(&tree.root_node(), source, &mut symbols, match_ocaml_node); + walk_ast_nodes_with_config(&tree.root_node(), source, &mut symbols.ast_nodes, &OCAML_AST_CONFIG); + symbols + } +} + +fn match_ocaml_node(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) { + match node.kind() { + "value_definition" => handle_ocaml_value_def(node, source, symbols), + "module_definition" => handle_ocaml_module_def(node, source, symbols), + "type_definition" => handle_ocaml_type_def(node, source, symbols), + "class_definition" => handle_ocaml_class_def(node, source, symbols), + "open_module" => handle_ocaml_open(node, source, symbols), + "application_expression" => handle_ocaml_application(node, source, symbols), + _ => {} + } +} + +fn handle_ocaml_value_def(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + if child.kind() == "let_binding" { + handle_ocaml_let_binding(&child, source, symbols); + } + } + } +} + +fn handle_ocaml_let_binding(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let pattern = match node.child_by_field_name("pattern").or_else(|| node.child(0)) { + Some(p) => p, + None => return, + }; + + let name = extract_ocaml_pattern_name(&pattern, source); + let name = match name { + Some(n) => n, + None => return, + }; + + let has_params = has_ocaml_params(node); + + if has_params { + symbols.definitions.push(Definition { + name, + kind: "function".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: compute_all_metrics(node, source, "ocaml"), + cfg: build_function_cfg(node, "ocaml", source), + children: None, + }); + } else { + symbols.definitions.push(Definition { + name, + kind: "variable".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); + } +} + +fn extract_ocaml_pattern_name(pattern: &Node, source: &[u8]) -> Option { + match pattern.kind() { + "value_name" | "identifier" => Some(node_text(pattern, source).to_string()), + "parenthesized_operator" => Some(node_text(pattern, source).to_string()), + _ => { + find_child(pattern, "value_name") + .or_else(|| find_child(pattern, "identifier")) + .map(|n| node_text(&n, source).to_string()) + } + } +} + +fn has_ocaml_params(let_binding: &Node) -> bool { + for i in 0..let_binding.child_count() { + if let Some(child) = let_binding.child(i) { + if child.kind() == "parameter" || child.kind() == "value_pattern" { + return true; + } + } + } + false +} + +fn handle_ocaml_module_def(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let binding = match find_child(node, "module_binding") { + Some(b) => b, + None => return, + }; + + let name_node = binding.child_by_field_name("name") + .or_else(|| find_child(&binding, "module_name")) + .or_else(|| find_child(&binding, "identifier")); + if let Some(name) = name_node { + symbols.definitions.push(Definition { + name: node_text(&name, source).to_string(), + kind: "module".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); + } +} + +fn handle_ocaml_type_def(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + if child.kind() != "type_binding" { + continue; + } + + let name_node = child.child_by_field_name("name") + .or_else(|| find_child(&child, "type_constructor")) + .or_else(|| find_child(&child, "identifier")); + if let Some(name) = name_node { + let mut children = Vec::new(); + extract_ocaml_type_constructors(&child, source, &mut children); + + symbols.definitions.push(Definition { + name: node_text(&name, source).to_string(), + kind: "type".to_string(), + line: start_line(&child), + end_line: Some(end_line(&child)), + decorators: None, + complexity: None, + cfg: None, + children: opt_children(children), + }); + } + } + } +} + +fn extract_ocaml_type_constructors(type_binding: &Node, source: &[u8], children: &mut Vec) { + for i in 0..type_binding.child_count() { + if let Some(child) = type_binding.child(i) { + if child.kind() == "constructor_declaration" { + let name = find_child(&child, "constructor_name") + .or_else(|| find_child(&child, "identifier")); + if let Some(n) = name { + children.push(child_def( + node_text(&n, source).to_string(), + "property", + start_line(&child), + )); + } + } + } + } +} + +fn handle_ocaml_class_def(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let binding = match find_child(node, "class_binding") { + Some(b) => b, + None => return, + }; + + let name_node = binding.child_by_field_name("name") + .or_else(|| find_child(&binding, "identifier")); + if let Some(name) = name_node { + symbols.definitions.push(Definition { + name: node_text(&name, source).to_string(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); + } +} + +fn handle_ocaml_open(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let mut module_name: Option = None; + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + match child.kind() { + "module_path" | "module_name" | "extended_module_path" | "constructor_name" => { + module_name = Some(node_text(&child, source).to_string()); + break; + } + _ => {} + } + } + } + + if let Some(name) = module_name { + let last = name.split('.').last().unwrap_or(&name).to_string(); + symbols.imports.push(Import::new(name, vec![last], start_line(node))); + } +} + +fn handle_ocaml_application(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let func_node = match node.child(0) { + Some(n) => n, + None => return, + }; + + match func_node.kind() { + "value_path" | "value_name" | "identifier" => { + symbols.calls.push(Call { + name: node_text(&func_node, source).to_string(), + line: start_line(node), + dynamic: None, + receiver: None, + }); + } + "field_get_expression" => { + let field = func_node.child_by_field_name("field") + .or_else(|| find_child(&func_node, "value_name")) + .or_else(|| find_child(&func_node, "identifier")); + let record = func_node.child(0); + if let Some(f) = field { + symbols.calls.push(Call { + name: node_text(&f, source).to_string(), + line: start_line(node), + dynamic: None, + receiver: record.and_then(|r| { + if r.id() != f.id() { Some(node_text(&r, source).to_string()) } else { None } + }), + }); + } + } + _ => {} + } +} diff --git a/crates/codegraph-core/src/extractors/zig.rs b/crates/codegraph-core/src/extractors/zig.rs new file mode 100644 index 00000000..d165179b --- /dev/null +++ b/crates/codegraph-core/src/extractors/zig.rs @@ -0,0 +1,322 @@ +use tree_sitter::{Node, Tree}; +use crate::cfg::build_function_cfg; +use crate::complexity::compute_all_metrics; +use crate::types::*; +use super::helpers::*; +use super::SymbolExtractor; + +pub struct ZigExtractor; + +impl SymbolExtractor for ZigExtractor { + fn extract(&self, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols { + let mut symbols = FileSymbols::new(file_path.to_string()); + walk_tree(&tree.root_node(), source, &mut symbols, match_zig_node); + walk_ast_nodes_with_config(&tree.root_node(), source, &mut symbols.ast_nodes, &ZIG_AST_CONFIG); + symbols + } +} + +fn match_zig_node(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) { + match node.kind() { + "function_declaration" => handle_zig_function(node, source, symbols), + "variable_declaration" => handle_zig_variable(node, source, symbols), + "call_expression" => handle_zig_call(node, source, symbols), + "builtin_function" => handle_zig_builtin(node, source, symbols), + "test_declaration" => handle_zig_test(node, source, symbols), + _ => {} + } +} + +fn handle_zig_function(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = match node.child_by_field_name("name") { + Some(n) => n, + None => return, + }; + + let parent_struct = find_zig_parent_struct(node, source); + let name_text = node_text(&name_node, source); + let (full_name, kind) = match &parent_struct { + Some(s) => (format!("{}.{}", s, name_text), "method"), + None => (name_text.to_string(), "function"), + }; + + let params = extract_zig_params(node, source); + let is_pub = is_zig_pub(node, source); + + symbols.definitions.push(Definition { + name: full_name, + kind: kind.to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: compute_all_metrics(node, source, "zig"), + cfg: build_function_cfg(node, "zig", source), + children: opt_children(params), + }); +} + +fn extract_zig_params(func_node: &Node, source: &[u8]) -> Vec { + let mut params = Vec::new(); + if let Some(param_list) = func_node.child_by_field_name("parameters") { + for i in 0..param_list.child_count() { + if let Some(child) = param_list.child(i) { + if child.kind() == "parameter" { + if let Some(name_node) = find_child(&child, "identifier") { + params.push(child_def( + node_text(&name_node, source).to_string(), + "parameter", + start_line(&child), + )); + } + } + } + } + } + params +} + +fn handle_zig_variable(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = match find_child(node, "identifier") { + Some(n) => n, + None => return, + }; + let name = node_text(&name_node, source).to_string(); + + // Check for struct/enum/union + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + match child.kind() { + "struct_declaration" => { + let members = extract_zig_container_fields(&child, source); + symbols.definitions.push(Definition { + name, + kind: "struct".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: opt_children(members), + }); + return; + } + "enum_declaration" => { + symbols.definitions.push(Definition { + name, + kind: "enum".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); + return; + } + "union_declaration" => { + symbols.definitions.push(Definition { + name, + kind: "struct".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); + return; + } + _ => {} + } + } + } + + // Check for @import + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + if child.kind() == "builtin_function" { + if let Some(builtin_id) = find_child(&child, "builtin_identifier") { + if node_text(&builtin_id, source) == "@import" { + if let Some(args) = find_child(&child, "arguments") { + for j in 0..args.child_count() { + if let Some(arg) = args.child(j) { + if arg.kind() == "string_literal" || arg.kind() == "string" { + let raw = node_text(&arg, source); + let source_path = raw.trim_matches('"').to_string(); + symbols.imports.push(Import::new( + source_path, + vec![name], + start_line(node), + )); + return; + } + } + } + } + } + } + } + } + } + + // Regular const/var + let is_const = node_has_child_text(node, source, "const"); + symbols.definitions.push(Definition { + name, + kind: if is_const { "constant" } else { "variable" }.to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); +} + +fn extract_zig_container_fields(container: &Node, source: &[u8]) -> Vec { + let mut fields = Vec::new(); + for i in 0..container.child_count() { + if let Some(child) = container.child(i) { + if child.kind() == "container_field" { + let name_node = child.child_by_field_name("name") + .or_else(|| find_child(&child, "identifier")); + if let Some(n) = name_node { + fields.push(child_def( + node_text(&n, source).to_string(), + "property", + start_line(&child), + )); + } + } + } + } + fields +} + +fn handle_zig_call(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let func_node = match node.child_by_field_name("function").or_else(|| node.child(0)) { + Some(n) => n, + None => return, + }; + + match func_node.kind() { + "field_expression" | "field_access" => { + let field = func_node.child_by_field_name("field") + .or_else(|| func_node.child_by_field_name("member")); + let value = func_node.child(0); + if let Some(f) = field { + symbols.calls.push(Call { + name: node_text(&f, source).to_string(), + line: start_line(node), + dynamic: None, + receiver: value.map(|v| node_text(&v, source).to_string()), + }); + } + } + _ => { + symbols.calls.push(Call { + name: node_text(&func_node, source).to_string(), + line: start_line(node), + dynamic: None, + receiver: None, + }); + } + } +} + +fn handle_zig_builtin(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let builtin_id = match find_child(node, "builtin_identifier") { + Some(n) => n, + None => return, + }; + + let name = node_text(&builtin_id, source); + if name == "@import" && node.parent().map(|p| p.kind()) != Some("variable_declaration") { + if let Some(args) = find_child(node, "arguments") { + for i in 0..args.child_count() { + if let Some(arg) = args.child(i) { + if arg.kind() == "string_literal" || arg.kind() == "string" { + let raw = node_text(&arg, source); + let source_path = raw.trim_matches('"').to_string(); + symbols.imports.push(Import::new( + source_path, + vec!["@import".to_string()], + start_line(node), + )); + return; + } + } + } + } + } + + symbols.calls.push(Call { + name: name.to_string(), + line: start_line(node), + dynamic: None, + receiver: None, + }); +} + +fn handle_zig_test(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let mut name = "test".to_string(); + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + if child.kind() == "string_literal" || child.kind() == "identifier" { + name = node_text(&child, source).trim_matches('"').to_string(); + break; + } + } + } + + symbols.definitions.push(Definition { + name, + kind: "function".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); +} + +fn find_zig_parent_struct<'a>(node: &Node<'a>, source: &[u8]) -> Option { + let mut current = node.parent(); + while let Some(parent) = current { + if parent.kind() == "struct_declaration" || parent.kind() == "union_declaration" { + // The name is in the grandparent variable_declaration + if let Some(gp) = parent.parent() { + if gp.kind() == "variable_declaration" { + if let Some(name_node) = find_child(&gp, "identifier") { + return Some(node_text(&name_node, source).to_string()); + } + } + } + } + current = parent.parent(); + } + None +} + +fn is_zig_pub(node: &Node, source: &[u8]) -> bool { + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + if node_text(&child, source) == "pub" { + return true; + } + } + } + false +} + +fn node_has_child_text(node: &Node, source: &[u8], text: &str) -> bool { + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + if node_text(&child, source) == text { + return true; + } + } + } + false +} diff --git a/crates/codegraph-core/src/parser_registry.rs b/crates/codegraph-core/src/parser_registry.rs index ea2a64dc..bf8994a3 100644 --- a/crates/codegraph-core/src/parser_registry.rs +++ b/crates/codegraph-core/src/parser_registry.rs @@ -20,6 +20,12 @@ pub enum LanguageKind { Swift, Scala, Bash, + Elixir, + Lua, + Dart, + Zig, + Haskell, + Ocaml, } impl LanguageKind { @@ -44,6 +50,12 @@ impl LanguageKind { Self::Swift => "swift", Self::Scala => "scala", Self::Bash => "bash", + Self::Elixir => "elixir", + Self::Lua => "lua", + Self::Dart => "dart", + Self::Zig => "zig", + Self::Haskell => "haskell", + Self::Ocaml => "ocaml", } } @@ -76,6 +88,12 @@ impl LanguageKind { "swift" => Some(Self::Swift), "scala" => Some(Self::Scala), "sh" | "bash" => Some(Self::Bash), + "ex" | "exs" => Some(Self::Elixir), + "lua" => Some(Self::Lua), + "dart" => Some(Self::Dart), + "zig" => Some(Self::Zig), + "hs" => Some(Self::Haskell), + "ml" | "mli" => Some(Self::Ocaml), _ => None, } } @@ -100,6 +118,12 @@ impl LanguageKind { Self::Swift => tree_sitter_swift::LANGUAGE.into(), Self::Scala => tree_sitter_scala::LANGUAGE.into(), Self::Bash => tree_sitter_bash::LANGUAGE.into(), + Self::Elixir => tree_sitter_elixir::LANGUAGE.into(), + Self::Lua => tree_sitter_lua::LANGUAGE.into(), + Self::Dart => tree_sitter_dart::language().into(), + Self::Zig => tree_sitter_zig::LANGUAGE.into(), + Self::Haskell => tree_sitter_haskell::LANGUAGE.into(), + Self::Ocaml => tree_sitter_ocaml::LANGUAGE_OCAML.into(), } } } diff --git a/package-lock.json b/package-lock.json index 9f4fdead..ae0b0ed2 100644 --- a/package-lock.json +++ b/package-lock.json @@ -22,6 +22,8 @@ "@commitlint/config-conventional": "^20.0", "@huggingface/transformers": "^3.8.1", "@tree-sitter-grammars/tree-sitter-hcl": "^1.2.0", + "@tree-sitter-grammars/tree-sitter-lua": "^0.4.1", + "@tree-sitter-grammars/tree-sitter-zig": "^1.1.2", "@types/better-sqlite3": "^7.6.13", "@vitest/coverage-v8": "^4.0.18", "commit-and-tag-version": "^12.5", @@ -31,10 +33,14 @@ "tree-sitter-c-sharp": "^0.23.1", "tree-sitter-cli": "^0.26.5", "tree-sitter-cpp": "^0.23.4", + "tree-sitter-dart": "^1.0.0", + "tree-sitter-elixir": "^0.3.5", "tree-sitter-go": "^0.25.0", + "tree-sitter-haskell": "^0.23.1", "tree-sitter-java": "^0.23.5", "tree-sitter-javascript": "^0.25.0", "tree-sitter-kotlin": "^0.3.8", + "tree-sitter-ocaml": "^0.24.2", "tree-sitter-php": "^0.24.2", "tree-sitter-python": "^0.25.0", "tree-sitter-ruby": "^0.23.1", @@ -1282,9 +1288,6 @@ "cpu": [ "arm64" ], - "libc": [ - "glibc" - ], "license": "Apache-2.0", "optional": true, "os": [ @@ -1298,9 +1301,6 @@ "cpu": [ "x64" ], - "libc": [ - "glibc" - ], "license": "Apache-2.0", "optional": true, "os": [ @@ -1314,9 +1314,6 @@ "cpu": [ "x64" ], - "libc": [ - "musl" - ], "license": "Apache-2.0", "optional": true, "os": [ @@ -1738,6 +1735,46 @@ } } }, + "node_modules/@tree-sitter-grammars/tree-sitter-lua": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/@tree-sitter-grammars/tree-sitter-lua/-/tree-sitter-lua-0.4.1.tgz", + "integrity": "sha512-EwagFaU6ZveVk18/Y8qUhZkkiBKnQ7dSCHbm//TUroLVKy3i1rOYGy/cNHtSkAb1eDvS1HhCLybH2S541Cya/g==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.5.0", + "node-gyp-build": "^4.8.4" + }, + "peerDependencies": { + "tree-sitter": "^0.22.4" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, + "node_modules/@tree-sitter-grammars/tree-sitter-zig": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@tree-sitter-grammars/tree-sitter-zig/-/tree-sitter-zig-1.1.2.tgz", + "integrity": "sha512-J0L31HZ2isy3F5zb2g5QWQOv2r/pbruQNL9ADhuQv2pn5BQOzxt80WcEJaYXBeuJ8GHxVT42slpCna8k1c8LOw==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.3.0", + "node-gyp-build": "^4.8.4" + }, + "peerDependencies": { + "tree-sitter": "^0.22.1" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, "node_modules/@tybys/wasm-util": { "version": "0.10.1", "resolved": "https://registry.npmjs.org/@tybys/wasm-util/-/wasm-util-0.10.1.tgz", @@ -5482,6 +5519,13 @@ "license": "MIT", "optional": true }, + "node_modules/nan": { + "version": "2.26.2", + "resolved": "https://registry.npmjs.org/nan/-/nan-2.26.2.tgz", + "integrity": "sha512-0tTvBTYkt3tdGw22nrAy50x7gpbGCCFH3AFcyS5WiUu7Eu4vWlri1woE6qHBSfy11vksDqkiwjOnlR7WV8G1Hw==", + "dev": true, + "license": "MIT" + }, "node_modules/nanoid": { "version": "3.3.11", "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.11.tgz", @@ -7110,6 +7154,39 @@ } } }, + "node_modules/tree-sitter-dart": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/tree-sitter-dart/-/tree-sitter-dart-1.0.0.tgz", + "integrity": "sha512-Ve5YMPJjjGW9LEsO+MngAOibQsw5obFp+bUT41pvwdcXWRwJImOWs3eaPi6AubEiBmc09qvhdvxeIXvxlhMnug==", + "dev": true, + "hasInstallScript": true, + "license": "ISC", + "dependencies": { + "nan": "^2.15.0" + } + }, + "node_modules/tree-sitter-elixir": { + "version": "0.3.5", + "resolved": "https://registry.npmjs.org/tree-sitter-elixir/-/tree-sitter-elixir-0.3.5.tgz", + "integrity": "sha512-xozQMvYK0aSolcQZAx2d84Xe/YMWFuRPYFlLVxO01bM2GITh5jyiIp0TqPCQa8754UzRAI7A83hZmfiYub5TZQ==", + "dev": true, + "hasInstallScript": true, + "license": "Apache-2.0", + "dependencies": { + "node-addon-api": "^7.1.0", + "node-gyp-build": "^4.8.0" + }, + "peerDependencies": { + "tree-sitter": "^0.21.0" + } + }, + "node_modules/tree-sitter-elixir/node_modules/node-addon-api": { + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-7.1.1.tgz", + "integrity": "sha512-5m3bsyrjFWE1xf7nz7YXdN4udnVtXK6/Yfgn5qnahL6bCkf2yKt4k3nuTKAtT4r3IG8JNR2ncsIMdZuAzJjHQQ==", + "dev": true, + "license": "MIT" + }, "node_modules/tree-sitter-go": { "version": "0.25.0", "resolved": "https://registry.npmjs.org/tree-sitter-go/-/tree-sitter-go-0.25.0.tgz", @@ -7130,6 +7207,26 @@ } } }, + "node_modules/tree-sitter-haskell": { + "version": "0.23.1", + "resolved": "https://registry.npmjs.org/tree-sitter-haskell/-/tree-sitter-haskell-0.23.1.tgz", + "integrity": "sha512-qG4CYhejveu9DLMLEGBz/n9/TTeGSFLC6wniwOgG6m8/v7Dng8qR0ob0EVG7+XH+9WiOxohpGA23EhceWuxY4w==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.2.2", + "node-gyp-build": "^4.8.2" + }, + "peerDependencies": { + "tree-sitter": "^0.21.1" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, "node_modules/tree-sitter-java": { "version": "0.23.5", "resolved": "https://registry.npmjs.org/tree-sitter-java/-/tree-sitter-java-0.23.5.tgz", @@ -7197,6 +7294,26 @@ "dev": true, "license": "MIT" }, + "node_modules/tree-sitter-ocaml": { + "version": "0.24.2", + "resolved": "https://registry.npmjs.org/tree-sitter-ocaml/-/tree-sitter-ocaml-0.24.2.tgz", + "integrity": "sha512-H0RAeCepIyXyTPCQra6yMd7Bn5ZBYkIaddzdLNwVZpM9mCe2e8av+3O6Ojl7Z8YHrV/kYsfHvI2y+Hh7qzcYQQ==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.3.0", + "node-gyp-build": "^4.8.4" + }, + "peerDependencies": { + "tree-sitter": "^0.22.4" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, "node_modules/tree-sitter-php": { "version": "0.24.2", "resolved": "https://registry.npmjs.org/tree-sitter-php/-/tree-sitter-php-0.24.2.tgz", diff --git a/package.json b/package.json index 5866b683..f2d26e18 100644 --- a/package.json +++ b/package.json @@ -144,19 +144,25 @@ "@commitlint/config-conventional": "^20.0", "@huggingface/transformers": "^3.8.1", "@tree-sitter-grammars/tree-sitter-hcl": "^1.2.0", + "@tree-sitter-grammars/tree-sitter-lua": "^0.4.1", + "@tree-sitter-grammars/tree-sitter-zig": "^1.1.2", "@types/better-sqlite3": "^7.6.13", "@vitest/coverage-v8": "^4.0.18", "commit-and-tag-version": "^12.5", "husky": "^9.1", "tree-sitter-bash": "^0.25.1", + "tree-sitter-dart": "^1.0.0", + "tree-sitter-elixir": "^0.3.5", "tree-sitter-c": "^0.24.1", "tree-sitter-c-sharp": "^0.23.1", "tree-sitter-cpp": "^0.23.4", "tree-sitter-cli": "^0.26.5", "tree-sitter-go": "^0.25.0", + "tree-sitter-haskell": "^0.23.1", "tree-sitter-java": "^0.23.5", "tree-sitter-javascript": "^0.25.0", "tree-sitter-kotlin": "^0.3.8", + "tree-sitter-ocaml": "^0.24.2", "tree-sitter-php": "^0.24.2", "tree-sitter-python": "^0.25.0", "tree-sitter-ruby": "^0.23.1", diff --git a/scripts/build-wasm.ts b/scripts/build-wasm.ts index 692a6e56..da10d30b 100644 --- a/scripts/build-wasm.ts +++ b/scripts/build-wasm.ts @@ -40,6 +40,12 @@ const grammars = [ { name: 'tree-sitter-swift', pkg: 'tree-sitter-swift', sub: null }, { name: 'tree-sitter-scala', pkg: 'tree-sitter-scala', sub: null }, { name: 'tree-sitter-bash', pkg: 'tree-sitter-bash', sub: null }, + { name: 'tree-sitter-elixir', pkg: 'tree-sitter-elixir', sub: null }, + { name: 'tree-sitter-lua', pkg: '@tree-sitter-grammars/tree-sitter-lua', sub: null }, + { name: 'tree-sitter-dart', pkg: 'tree-sitter-dart', sub: null }, + { name: 'tree-sitter-zig', pkg: '@tree-sitter-grammars/tree-sitter-zig', sub: null }, + { name: 'tree-sitter-haskell', pkg: 'tree-sitter-haskell', sub: null }, + { name: 'tree-sitter-ocaml', pkg: 'tree-sitter-ocaml', sub: 'grammars/ocaml' }, ]; let failed = 0; diff --git a/src/domain/parser.ts b/src/domain/parser.ts index 5ec638b3..bc7c4543 100644 --- a/src/domain/parser.ts +++ b/src/domain/parser.ts @@ -20,10 +20,15 @@ export { extractCppSymbols, extractCSharpSymbols, extractCSymbols, + extractDartSymbols, + extractElixirSymbols, extractGoSymbols, + extractHaskellSymbols, extractHCLSymbols, extractJavaSymbols, extractKotlinSymbols, + extractLuaSymbols, + extractOCamlSymbols, extractPHPSymbols, extractPythonSymbols, extractRubySymbols, @@ -31,6 +36,7 @@ export { extractScalaSymbols, extractSwiftSymbols, extractSymbols, + extractZigSymbols, } from '../extractors/index.js'; import { @@ -38,10 +44,15 @@ import { extractCppSymbols, extractCSharpSymbols, extractCSymbols, + extractDartSymbols, + extractElixirSymbols, extractGoSymbols, + extractHaskellSymbols, extractHCLSymbols, extractJavaSymbols, extractKotlinSymbols, + extractLuaSymbols, + extractOCamlSymbols, extractPHPSymbols, extractPythonSymbols, extractRubySymbols, @@ -49,6 +60,7 @@ import { extractScalaSymbols, extractSwiftSymbols, extractSymbols, + extractZigSymbols, } from '../extractors/index.js'; const __dirname = path.dirname(fileURLToPath(import.meta.url)); @@ -480,6 +492,48 @@ export const LANGUAGE_REGISTRY: LanguageRegistryEntry[] = [ extractor: extractBashSymbols, required: false, }, + { + id: 'elixir', + extensions: ['.ex', '.exs'], + grammarFile: 'tree-sitter-elixir.wasm', + extractor: extractElixirSymbols, + required: false, + }, + { + id: 'lua', + extensions: ['.lua'], + grammarFile: 'tree-sitter-lua.wasm', + extractor: extractLuaSymbols, + required: false, + }, + { + id: 'dart', + extensions: ['.dart'], + grammarFile: 'tree-sitter-dart.wasm', + extractor: extractDartSymbols, + required: false, + }, + { + id: 'zig', + extensions: ['.zig'], + grammarFile: 'tree-sitter-zig.wasm', + extractor: extractZigSymbols, + required: false, + }, + { + id: 'haskell', + extensions: ['.hs'], + grammarFile: 'tree-sitter-haskell.wasm', + extractor: extractHaskellSymbols, + required: false, + }, + { + id: 'ocaml', + extensions: ['.ml', '.mli'], + grammarFile: 'tree-sitter-ocaml.wasm', + extractor: extractOCamlSymbols, + required: false, + }, ]; const _extToLang: Map = new Map(); diff --git a/src/extractors/dart.ts b/src/extractors/dart.ts new file mode 100644 index 00000000..4e2b76ed --- /dev/null +++ b/src/extractors/dart.ts @@ -0,0 +1,310 @@ +import type { + Call, + ExtractorOutput, + SubDeclaration, + TreeSitterNode, + TreeSitterTree, +} from '../types.js'; +import { extractModifierVisibility, findChild, nodeEndLine } from './helpers.js'; + +/** + * Extract symbols from Dart files. + */ +export function extractDartSymbols(tree: TreeSitterTree, _filePath: string): ExtractorOutput { + const ctx: ExtractorOutput = { + definitions: [], + calls: [], + imports: [], + classes: [], + exports: [], + typeMap: new Map(), + }; + + walkDartNode(tree.rootNode, ctx); + return ctx; +} + +function walkDartNode(node: TreeSitterNode, ctx: ExtractorOutput): void { + switch (node.type) { + case 'class_definition': + handleDartClass(node, ctx); + break; + case 'enum_declaration': + handleDartEnum(node, ctx); + break; + case 'mixin_declaration': + handleDartMixin(node, ctx); + break; + case 'extension_declaration': + handleDartExtension(node, ctx); + break; + case 'function_signature': + handleDartFunction(node, ctx); + break; + case 'method_signature': + handleDartMethodSig(node, ctx); + break; + case 'library_import': + handleDartImport(node, ctx); + break; + case 'constructor_invocation': + case 'new_expression': + handleDartConstructorCall(node, ctx); + break; + case 'type_alias': + handleDartTypeAlias(node, ctx); + break; + case 'selector': + handleDartSelector(node, ctx); + break; + } + + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child) walkDartNode(child, ctx); + } +} + +function handleDartClass(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const name = nameNode.text; + const children: SubDeclaration[] = []; + + const body = node.childForFieldName('body') || findChild(node, 'class_body'); + if (body) { + extractDartClassMembers(body, name, ctx, children); + } + + ctx.definitions.push({ + name, + kind: 'class', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: children.length > 0 ? children : undefined, + }); + + extractDartInheritance(node, name, ctx); +} + +function extractDartClassMembers( + body: TreeSitterNode, + className: string, + ctx: ExtractorOutput, + children: SubDeclaration[], +): void { + for (let i = 0; i < body.childCount; i++) { + const member = body.child(i); + if (!member) continue; + + if (member.type === 'method_signature' || member.type === 'function_signature') { + const fnName = extractDartFunctionName(member); + if (fnName) { + ctx.definitions.push({ + name: `${className}.${fnName}`, + kind: 'method', + line: member.startPosition.row + 1, + endLine: nodeEndLine(member), + }); + } + } else if (member.type === 'declaration') { + // Field declarations + for (let j = 0; j < member.childCount; j++) { + const decl = member.child(j); + if (decl?.type === 'identifier') { + children.push({ + name: decl.text, + kind: 'property', + line: member.startPosition.row + 1, + }); + break; + } + } + } + } +} + +function extractDartFunctionName(node: TreeSitterNode): string | null { + const nameNode = node.childForFieldName('name'); + if (nameNode) return nameNode.text; + + // Walk children for function_signature inside method_signature + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child) continue; + if ( + child.type === 'function_signature' || + child.type === 'getter_signature' || + child.type === 'setter_signature' || + child.type === 'constructor_signature' + ) { + const name = child.childForFieldName('name'); + if (name) return name.text; + } + } + return null; +} + +function handleDartEnum(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + + ctx.definitions.push({ + name: nameNode.text, + kind: 'enum', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +function handleDartMixin(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = findChild(node, 'identifier'); + if (!nameNode) return; + + ctx.definitions.push({ + name: nameNode.text, + kind: 'class', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +function handleDartExtension(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + + ctx.definitions.push({ + name: nameNode.text, + kind: 'class', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +function handleDartFunction(node: TreeSitterNode, ctx: ExtractorOutput): void { + // Skip methods already emitted by class handler + if (isInsideDartClass(node)) return; + + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + + ctx.definitions.push({ + name: nameNode.text, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +function handleDartMethodSig(node: TreeSitterNode, ctx: ExtractorOutput): void { + if (isInsideDartClass(node)) return; + const fnName = extractDartFunctionName(node); + if (!fnName) return; + + ctx.definitions.push({ + name: fnName, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +function isInsideDartClass(node: TreeSitterNode): boolean { + let current = node.parent; + while (current) { + if ( + current.type === 'class_body' || + current.type === 'class_definition' || + current.type === 'enum_body' || + current.type === 'mixin_declaration' + ) { + return true; + } + current = current.parent; + } + return false; +} + +function handleDartImport(node: TreeSitterNode, ctx: ExtractorOutput): void { + const spec = findChild(node, 'import_specification'); + if (!spec) return; + + const uri = findChild(spec, 'configurable_uri') || findChild(spec, 'uri'); + if (!uri) return; + + const source = uri.text.replace(/^['"]|['"]$/g, ''); + const names: string[] = []; + + // Check for `as` alias + const alias = findChild(spec, 'identifier'); + if (alias) names.push(alias.text); + + ctx.imports.push({ + source, + names: names.length > 0 ? names : [source.split('/').pop() || source], + line: node.startPosition.row + 1, + }); +} + +function handleDartConstructorCall(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = findChild(node, 'type_identifier') || findChild(node, 'identifier'); + if (!nameNode) return; + + ctx.calls.push({ + name: nameNode.text, + line: node.startPosition.row + 1, + }); +} + +function handleDartSelector(node: TreeSitterNode, ctx: ExtractorOutput): void { + // selector with argument_part represents a function call + const argPart = findChild(node, 'argument_part'); + if (!argPart) return; + + // Look for the identifier this selector belongs to + const unconditional = findChild(node, 'unconditional_assignable_selector'); + if (unconditional) { + const id = findChild(unconditional, 'identifier'); + if (id) { + ctx.calls.push({ name: id.text, line: node.startPosition.row + 1 }); + } + } +} + +function handleDartTypeAlias(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = findChild(node, 'type_identifier') || findChild(node, 'identifier'); + if (!nameNode) return; + + ctx.definitions.push({ + name: nameNode.text, + kind: 'type', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +function extractDartInheritance(node: TreeSitterNode, name: string, ctx: ExtractorOutput): void { + const superclass = node.childForFieldName('superclass'); + if (superclass) { + const typeName = + findChild(superclass, 'type_identifier') || findChild(superclass, 'identifier'); + if (typeName) { + ctx.classes.push({ name, extends: typeName.text, line: node.startPosition.row + 1 }); + } + } + + const interfaces = node.childForFieldName('interfaces'); + if (interfaces) { + for (let i = 0; i < interfaces.childCount; i++) { + const iface = interfaces.child(i); + if (!iface) continue; + const typeName = + iface.type === 'type_identifier' + ? iface + : findChild(iface, 'type_identifier') || findChild(iface, 'identifier'); + if (typeName) { + ctx.classes.push({ name, implements: typeName.text, line: node.startPosition.row + 1 }); + } + } + } +} diff --git a/src/extractors/elixir.ts b/src/extractors/elixir.ts new file mode 100644 index 00000000..3a7de9b6 --- /dev/null +++ b/src/extractors/elixir.ts @@ -0,0 +1,243 @@ +import type { + Call, + ExtractorOutput, + SubDeclaration, + TreeSitterNode, + TreeSitterTree, +} from '../types.js'; +import { findChild, nodeEndLine } from './helpers.js'; + +/** + * Extract symbols from Elixir files. + * + * Elixir's tree-sitter grammar represents most constructs as generic `call` nodes. + * We distinguish modules, functions, imports etc. by the call target's identifier text. + */ +export function extractElixirSymbols(tree: TreeSitterTree, _filePath: string): ExtractorOutput { + const ctx: ExtractorOutput = { + definitions: [], + calls: [], + imports: [], + classes: [], + exports: [], + typeMap: new Map(), + }; + + walkElixirNode(tree.rootNode, ctx, null); + return ctx; +} + +function walkElixirNode( + node: TreeSitterNode, + ctx: ExtractorOutput, + currentModule: string | null, +): void { + if (node.type === 'call') { + handleElixirCall(node, ctx, currentModule); + } + + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child) walkElixirNode(child, ctx, currentModule); + } +} + +function handleElixirCall( + node: TreeSitterNode, + ctx: ExtractorOutput, + currentModule: string | null, +): void { + const target = node.childForFieldName('target'); + if (!target) return; + + if (target.type === 'identifier') { + const keyword = target.text; + switch (keyword) { + case 'defmodule': + handleDefmodule(node, ctx); + return; + case 'def': + case 'defp': + handleDefFunction(node, ctx, currentModule, keyword === 'defp' ? 'private' : 'public'); + return; + case 'defprotocol': + handleDefprotocol(node, ctx); + return; + case 'defimpl': + handleDefimpl(node, ctx); + return; + case 'import': + case 'use': + case 'require': + case 'alias': + handleElixirImport(node, ctx, keyword); + return; + default: + // Regular function call + ctx.calls.push({ name: keyword, line: node.startPosition.row + 1 }); + return; + } + } + + if (target.type === 'dot') { + handleDotCall(node, target, ctx); + } +} + +function handleDefmodule(node: TreeSitterNode, ctx: ExtractorOutput): void { + const args = findChild(node, 'arguments'); + if (!args) return; + const aliasNode = findChild(args, 'alias'); + if (!aliasNode) return; + const name = aliasNode.text; + + const children: SubDeclaration[] = []; + const doBlock = findChild(node, 'do_block'); + if (doBlock) { + collectModuleMembers(doBlock, ctx, name, children); + } + + ctx.definitions.push({ + name, + kind: 'module', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: children.length > 0 ? children : undefined, + }); +} + +function collectModuleMembers( + doBlock: TreeSitterNode, + ctx: ExtractorOutput, + moduleName: string, + children: SubDeclaration[], +): void { + for (let i = 0; i < doBlock.childCount; i++) { + const child = doBlock.child(i); + if (!child || child.type !== 'call') continue; + const target = child.childForFieldName('target'); + if (!target || target.type !== 'identifier') continue; + + if (target.text === 'def' || target.text === 'defp') { + const fnName = extractFunctionName(child); + if (fnName) { + children.push({ + name: fnName, + kind: 'property', + line: child.startPosition.row + 1, + }); + } + } + } +} + +function handleDefFunction( + node: TreeSitterNode, + ctx: ExtractorOutput, + currentModule: string | null, + visibility: 'public' | 'private', +): void { + const fnName = extractFunctionName(node); + if (!fnName) return; + + const fullName = currentModule ? `${currentModule}.${fnName}` : fnName; + const params = extractElixirParams(node); + + ctx.definitions.push({ + name: fullName, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + visibility, + children: params.length > 0 ? params : undefined, + }); +} + +function extractFunctionName(defCallNode: TreeSitterNode): string | null { + const args = findChild(defCallNode, 'arguments'); + if (!args) return null; + + for (let i = 0; i < args.childCount; i++) { + const child = args.child(i); + if (!child) continue; + if (child.type === 'call') { + const target = child.childForFieldName('target'); + if (target?.type === 'identifier') return target.text; + } + if (child.type === 'identifier') return child.text; + } + return null; +} + +function extractElixirParams(defCallNode: TreeSitterNode): SubDeclaration[] { + const params: SubDeclaration[] = []; + const args = findChild(defCallNode, 'arguments'); + if (!args) return params; + + for (let i = 0; i < args.childCount; i++) { + const child = args.child(i); + if (!child || child.type !== 'call') continue; + const innerArgs = findChild(child, 'arguments'); + if (!innerArgs) continue; + for (let j = 0; j < innerArgs.childCount; j++) { + const param = innerArgs.child(j); + if (!param) continue; + if (param.type === 'identifier') { + params.push({ name: param.text, kind: 'parameter', line: param.startPosition.row + 1 }); + } + } + } + return params; +} + +function handleDefprotocol(node: TreeSitterNode, ctx: ExtractorOutput): void { + const args = findChild(node, 'arguments'); + if (!args) return; + const aliasNode = findChild(args, 'alias'); + if (!aliasNode) return; + + ctx.definitions.push({ + name: aliasNode.text, + kind: 'interface', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +function handleDefimpl(node: TreeSitterNode, ctx: ExtractorOutput): void { + const args = findChild(node, 'arguments'); + if (!args) return; + const aliasNode = findChild(args, 'alias'); + if (!aliasNode) return; + + ctx.definitions.push({ + name: aliasNode.text, + kind: 'class', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +function handleElixirImport(node: TreeSitterNode, ctx: ExtractorOutput, keyword: string): void { + const args = findChild(node, 'arguments'); + if (!args) return; + const aliasNode = findChild(args, 'alias'); + if (!aliasNode) return; + + ctx.imports.push({ + source: aliasNode.text, + names: [keyword], + line: node.startPosition.row + 1, + }); +} + +function handleDotCall(node: TreeSitterNode, dotNode: TreeSitterNode, ctx: ExtractorOutput): void { + const call: Call = { name: '', line: node.startPosition.row + 1 }; + const right = findChild(dotNode, 'identifier'); + const left = findChild(dotNode, 'alias'); + + if (right) call.name = right.text; + if (left) call.receiver = left.text; + + if (call.name) ctx.calls.push(call); +} diff --git a/src/extractors/haskell.ts b/src/extractors/haskell.ts new file mode 100644 index 00000000..e6f427d3 --- /dev/null +++ b/src/extractors/haskell.ts @@ -0,0 +1,241 @@ +import type { + Call, + ExtractorOutput, + SubDeclaration, + TreeSitterNode, + TreeSitterTree, +} from '../types.js'; +import { findChild, nodeEndLine } from './helpers.js'; + +/** + * Extract symbols from Haskell files. + * + * Note: tree-sitter-haskell uses `type_synomym` (misspelled) for type aliases. + */ +export function extractHaskellSymbols(tree: TreeSitterTree, _filePath: string): ExtractorOutput { + const ctx: ExtractorOutput = { + definitions: [], + calls: [], + imports: [], + classes: [], + exports: [], + typeMap: new Map(), + }; + + walkHaskellNode(tree.rootNode, ctx); + return ctx; +} + +function walkHaskellNode(node: TreeSitterNode, ctx: ExtractorOutput): void { + switch (node.type) { + case 'function': + handleHaskellFunction(node, ctx); + break; + case 'bind': + handleHaskellBind(node, ctx); + break; + case 'data_type': + handleHaskellDataType(node, ctx); + break; + case 'newtype': + handleHaskellNewtype(node, ctx); + break; + case 'type_synomym': + handleHaskellTypeSynonym(node, ctx); + break; + case 'class': + handleHaskellClass(node, ctx); + break; + case 'instance': + handleHaskellInstance(node, ctx); + break; + case 'import': + handleHaskellImport(node, ctx); + break; + case 'apply': + handleHaskellApply(node, ctx); + break; + } + + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child) walkHaskellNode(child, ctx); + } +} + +function handleHaskellFunction(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + + const params = extractHaskellParams(node); + + ctx.definitions.push({ + name: nameNode.text, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, + }); +} + +function extractHaskellParams(funcNode: TreeSitterNode): SubDeclaration[] { + const params: SubDeclaration[] = []; + // Haskell function patterns are positional children + for (let i = 0; i < funcNode.childCount; i++) { + const child = funcNode.child(i); + if (!child) continue; + if (child.type === 'patterns' || child.type === 'parameter') { + for (let j = 0; j < child.childCount; j++) { + const pat = child.child(j); + if (pat && (pat.type === 'variable' || pat.type === 'identifier')) { + params.push({ name: pat.text, kind: 'parameter', line: pat.startPosition.row + 1 }); + } + } + } + if (child.type === 'variable' && i > 0) { + // Pattern parameters after the function name + params.push({ name: child.text, kind: 'parameter', line: child.startPosition.row + 1 }); + } + } + return params; +} + +function handleHaskellBind(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + + ctx.definitions.push({ + name: nameNode.text, + kind: 'variable', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +function handleHaskellDataType(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + const name = nameNode.text; + + const children: SubDeclaration[] = []; + // Extract constructors + const constructors = node.childForFieldName('constructors'); + if (constructors) { + for (let i = 0; i < constructors.childCount; i++) { + const ctor = constructors.child(i); + if (!ctor) continue; + if (ctor.type === 'data_constructor' || ctor.type === 'gadt_constructor') { + const ctorName = findChild(ctor, 'constructor') || findChild(ctor, 'constructor_operator'); + if (ctorName) { + children.push({ + name: ctorName.text, + kind: 'property', + line: ctor.startPosition.row + 1, + }); + } + } + } + } + + ctx.definitions.push({ + name, + kind: 'type', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: children.length > 0 ? children : undefined, + }); +} + +function handleHaskellNewtype(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + + ctx.definitions.push({ + name: nameNode.text, + kind: 'type', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +function handleHaskellTypeSynonym(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + + ctx.definitions.push({ + name: nameNode.text, + kind: 'type', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +function handleHaskellClass(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + + ctx.definitions.push({ + name: nameNode.text, + kind: 'class', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +function handleHaskellInstance(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + + ctx.definitions.push({ + name: nameNode.text, + kind: 'class', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +function handleHaskellImport(node: TreeSitterNode, ctx: ExtractorOutput): void { + const moduleNode = node.childForFieldName('module'); + if (!moduleNode) return; + + const source = moduleNode.text; + const names: string[] = []; + + const alias = node.childForFieldName('alias'); + if (alias) names.push(alias.text); + + const importList = node.childForFieldName('names'); + if (importList) { + for (let i = 0; i < importList.childCount; i++) { + const item = importList.child(i); + if ( + item && + (item.type === 'variable' || item.type === 'constructor' || item.type === 'type') + ) { + names.push(item.text); + } + } + } + + ctx.imports.push({ + source, + names: names.length > 0 ? names : [source.split('.').pop() || source], + line: node.startPosition.row + 1, + }); +} + +function handleHaskellApply(node: TreeSitterNode, ctx: ExtractorOutput): void { + const funcNode = node.childForFieldName('function'); + if (!funcNode) return; + + // Only record named function applications, not complex expressions + if ( + funcNode.type === 'variable' || + funcNode.type === 'constructor' || + funcNode.type === 'identifier' + ) { + ctx.calls.push({ name: funcNode.text, line: node.startPosition.row + 1 }); + } else if (funcNode.type === 'qualified_variable' || funcNode.type === 'qualified_constructor') { + ctx.calls.push({ name: funcNode.text, line: node.startPosition.row + 1 }); + } +} diff --git a/src/extractors/index.ts b/src/extractors/index.ts index 65b7e1d9..65fd3087 100644 --- a/src/extractors/index.ts +++ b/src/extractors/index.ts @@ -2,14 +2,20 @@ export { extractBashSymbols } from './bash.js'; export { extractCSymbols } from './c.js'; export { extractCppSymbols } from './cpp.js'; export { extractCSharpSymbols } from './csharp.js'; +export { extractDartSymbols } from './dart.js'; +export { extractElixirSymbols } from './elixir.js'; export { extractGoSymbols } from './go.js'; +export { extractHaskellSymbols } from './haskell.js'; export { extractHCLSymbols } from './hcl.js'; export { extractJavaSymbols } from './java.js'; export { extractSymbols } from './javascript.js'; export { extractKotlinSymbols } from './kotlin.js'; +export { extractLuaSymbols } from './lua.js'; +export { extractOCamlSymbols } from './ocaml.js'; export { extractPHPSymbols } from './php.js'; export { extractPythonSymbols } from './python.js'; export { extractRubySymbols } from './ruby.js'; export { extractRustSymbols } from './rust.js'; export { extractScalaSymbols } from './scala.js'; export { extractSwiftSymbols } from './swift.js'; +export { extractZigSymbols } from './zig.js'; diff --git a/src/extractors/lua.ts b/src/extractors/lua.ts new file mode 100644 index 00000000..c2d0dddc --- /dev/null +++ b/src/extractors/lua.ts @@ -0,0 +1,169 @@ +import type { + Call, + ExtractorOutput, + SubDeclaration, + TreeSitterNode, + TreeSitterTree, +} from '../types.js'; +import { findChild, nodeEndLine } from './helpers.js'; + +/** + * Extract symbols from Lua files. + */ +export function extractLuaSymbols(tree: TreeSitterTree, _filePath: string): ExtractorOutput { + const ctx: ExtractorOutput = { + definitions: [], + calls: [], + imports: [], + classes: [], + exports: [], + typeMap: new Map(), + }; + + walkLuaNode(tree.rootNode, ctx); + return ctx; +} + +function walkLuaNode(node: TreeSitterNode, ctx: ExtractorOutput): void { + switch (node.type) { + case 'function_declaration': + handleLuaFunctionDecl(node, ctx); + break; + case 'variable_declaration': + handleLuaVariableDecl(node, ctx); + break; + case 'function_call': + handleLuaFunctionCall(node, ctx); + break; + } + + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child) walkLuaNode(child, ctx); + } +} + +function handleLuaFunctionDecl(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + + let name: string; + let kind: 'function' | 'method' = 'function'; + + if (nameNode.type === 'method_index_expression') { + const table = nameNode.childForFieldName('table'); + const method = nameNode.childForFieldName('method'); + if (table && method) { + name = `${table.text}.${method.text}`; + kind = 'method'; + } else { + name = nameNode.text; + } + } else if (nameNode.type === 'dot_index_expression') { + const table = nameNode.childForFieldName('table'); + const field = nameNode.childForFieldName('field'); + if (table && field) { + name = `${table.text}.${field.text}`; + kind = 'method'; + } else { + name = nameNode.text; + } + } else { + name = nameNode.text; + } + + const params = extractLuaParams(node); + + ctx.definitions.push({ + name, + kind, + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, + }); +} + +function extractLuaParams(funcNode: TreeSitterNode): SubDeclaration[] { + const params: SubDeclaration[] = []; + const paramList = funcNode.childForFieldName('parameters'); + if (!paramList) return params; + + for (let i = 0; i < paramList.childCount; i++) { + const param = paramList.child(i); + if (!param || param.type !== 'identifier') continue; + params.push({ name: param.text, kind: 'parameter', line: param.startPosition.row + 1 }); + } + return params; +} + +function handleLuaVariableDecl(node: TreeSitterNode, ctx: ExtractorOutput): void { + // Check for require calls in the assignment + const assignment = findChild(node, 'assignment_statement'); + if (assignment) { + checkForRequire(assignment, ctx); + } +} + +function checkForRequire(node: TreeSitterNode, ctx: ExtractorOutput): void { + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child) continue; + if (child.type === 'function_call') { + const nameNode = child.childForFieldName('name'); + if (nameNode && nameNode.type === 'identifier' && nameNode.text === 'require') { + const args = child.childForFieldName('arguments'); + if (args) { + const strArg = findChild(args, 'string'); + if (strArg) { + const source = strArg.text.replace(/^['"]|['"]$/g, ''); + ctx.imports.push({ + source, + names: ['require'], + line: child.startPosition.row + 1, + }); + } + } + } + } + } +} + +function handleLuaFunctionCall(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + + // Check for require() as import + if (nameNode.type === 'identifier' && nameNode.text === 'require') { + const args = node.childForFieldName('arguments'); + if (args) { + const strArg = findChild(args, 'string'); + if (strArg) { + const source = strArg.text.replace(/^['"]|['"]$/g, ''); + ctx.imports.push({ + source, + names: ['require'], + line: node.startPosition.row + 1, + }); + return; + } + } + } + + const call: Call = { name: '', line: node.startPosition.row + 1 }; + + if (nameNode.type === 'method_index_expression') { + const table = nameNode.childForFieldName('table'); + const method = nameNode.childForFieldName('method'); + if (method) call.name = method.text; + if (table) call.receiver = table.text; + } else if (nameNode.type === 'dot_index_expression') { + const table = nameNode.childForFieldName('table'); + const field = nameNode.childForFieldName('field'); + if (field) call.name = field.text; + if (table) call.receiver = table.text; + } else { + call.name = nameNode.text; + } + + if (call.name) ctx.calls.push(call); +} diff --git a/src/extractors/ocaml.ts b/src/extractors/ocaml.ts new file mode 100644 index 00000000..33d8d294 --- /dev/null +++ b/src/extractors/ocaml.ts @@ -0,0 +1,259 @@ +import type { + Call, + ExtractorOutput, + SubDeclaration, + TreeSitterNode, + TreeSitterTree, +} from '../types.js'; +import { findChild, nodeEndLine } from './helpers.js'; + +/** + * Extract symbols from OCaml files. + */ +export function extractOCamlSymbols(tree: TreeSitterTree, _filePath: string): ExtractorOutput { + const ctx: ExtractorOutput = { + definitions: [], + calls: [], + imports: [], + classes: [], + exports: [], + typeMap: new Map(), + }; + + walkOCamlNode(tree.rootNode, ctx); + return ctx; +} + +function walkOCamlNode(node: TreeSitterNode, ctx: ExtractorOutput): void { + switch (node.type) { + case 'value_definition': + handleOCamlValueDef(node, ctx); + break; + case 'let_binding': + // Only handle top-level let bindings not inside value_definition + if (node.parent?.type !== 'value_definition') { + handleOCamlLetBinding(node, ctx); + } + break; + case 'module_definition': + handleOCamlModuleDef(node, ctx); + break; + case 'type_definition': + handleOCamlTypeDef(node, ctx); + break; + case 'class_definition': + handleOCamlClassDef(node, ctx); + break; + case 'open_module': + handleOCamlOpen(node, ctx); + break; + case 'application_expression': + handleOCamlApplication(node, ctx); + break; + } + + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child) walkOCamlNode(child, ctx); + } +} + +function handleOCamlValueDef(node: TreeSitterNode, ctx: ExtractorOutput): void { + // value_definition contains one or more let_bindings + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child && child.type === 'let_binding') { + handleOCamlLetBinding(child, ctx); + } + } +} + +function handleOCamlLetBinding(node: TreeSitterNode, ctx: ExtractorOutput): void { + // let_binding has a pattern (the name) and optionally a body + const pattern = node.childForFieldName('pattern'); + if (!pattern) return; + + // Check if this is a function (has parameter children) + const hasParams = hasOCamlParams(node); + const name = extractOCamlPatternName(pattern); + if (!name) return; + + if (hasParams) { + const params = extractOCamlParams(node); + ctx.definitions.push({ + name, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, + }); + } else { + ctx.definitions.push({ + name, + kind: 'variable', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + } +} + +function extractOCamlPatternName(pattern: TreeSitterNode): string | null { + if (pattern.type === 'value_name' || pattern.type === 'identifier') { + return pattern.text; + } + // Operator definitions like `let (+) a b = ...` + if (pattern.type === 'parenthesized_operator') { + return pattern.text; + } + const nameNode = findChild(pattern, 'value_name') || findChild(pattern, 'identifier'); + return nameNode ? nameNode.text : null; +} + +function hasOCamlParams(letBinding: TreeSitterNode): boolean { + for (let i = 0; i < letBinding.childCount; i++) { + const child = letBinding.child(i); + if (!child) continue; + if (child.type === 'parameter' || child.type === 'value_pattern') return true; + } + return false; +} + +function extractOCamlParams(letBinding: TreeSitterNode): SubDeclaration[] { + const params: SubDeclaration[] = []; + for (let i = 0; i < letBinding.childCount; i++) { + const child = letBinding.child(i); + if (!child) continue; + if (child.type === 'parameter' || child.type === 'value_pattern') { + const name = extractOCamlPatternName(child); + if (name) { + params.push({ name, kind: 'parameter', line: child.startPosition.row + 1 }); + } + } + } + return params; +} + +function handleOCamlModuleDef(node: TreeSitterNode, ctx: ExtractorOutput): void { + const binding = findChild(node, 'module_binding'); + if (!binding) return; + + const nameNode = + binding.childForFieldName('name') || + findChild(binding, 'module_name') || + findChild(binding, 'identifier'); + if (!nameNode) return; + + ctx.definitions.push({ + name: nameNode.text, + kind: 'module', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +function handleOCamlTypeDef(node: TreeSitterNode, ctx: ExtractorOutput): void { + // type_definition contains one or more type_bindings + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child || child.type !== 'type_binding') continue; + + const nameNode = + child.childForFieldName('name') || + findChild(child, 'type_constructor') || + findChild(child, 'identifier'); + if (!nameNode) continue; + + const children: SubDeclaration[] = []; + extractOCamlTypeConstructors(child, children); + + ctx.definitions.push({ + name: nameNode.text, + kind: 'type', + line: child.startPosition.row + 1, + endLine: nodeEndLine(child), + children: children.length > 0 ? children : undefined, + }); + } +} + +function extractOCamlTypeConstructors( + typeBinding: TreeSitterNode, + children: SubDeclaration[], +): void { + for (let i = 0; i < typeBinding.childCount; i++) { + const child = typeBinding.child(i); + if (!child) continue; + if (child.type === 'constructor_declaration') { + const nameNode = findChild(child, 'constructor_name') || findChild(child, 'identifier'); + if (nameNode) { + children.push({ name: nameNode.text, kind: 'property', line: child.startPosition.row + 1 }); + } + } + } +} + +function handleOCamlClassDef(node: TreeSitterNode, ctx: ExtractorOutput): void { + const binding = findChild(node, 'class_binding'); + if (!binding) return; + + const nameNode = binding.childForFieldName('name') || findChild(binding, 'identifier'); + if (!nameNode) return; + + ctx.definitions.push({ + name: nameNode.text, + kind: 'class', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +function handleOCamlOpen(node: TreeSitterNode, ctx: ExtractorOutput): void { + // open_module contains a module_path + let moduleName: string | null = null; + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child) continue; + if ( + child.type === 'module_path' || + child.type === 'module_name' || + child.type === 'extended_module_path' || + child.type === 'constructor_name' + ) { + moduleName = child.text; + break; + } + } + if (!moduleName) return; + + ctx.imports.push({ + source: moduleName, + names: [moduleName.split('.').pop() || moduleName], + line: node.startPosition.row + 1, + }); +} + +function handleOCamlApplication(node: TreeSitterNode, ctx: ExtractorOutput): void { + // application_expression: first child is the function, rest are arguments + const funcNode = node.child(0); + if (!funcNode) return; + + if ( + funcNode.type === 'value_path' || + funcNode.type === 'value_name' || + funcNode.type === 'identifier' + ) { + ctx.calls.push({ name: funcNode.text, line: node.startPosition.row + 1 }); + } else if (funcNode.type === 'field_get_expression') { + // Module.function calls + const field = + funcNode.childForFieldName('field') || + findChild(funcNode, 'value_name') || + findChild(funcNode, 'identifier'); + const record = funcNode.child(0); + if (field) { + const call: Call = { name: field.text, line: node.startPosition.row + 1 }; + if (record && record !== field) call.receiver = record.text; + ctx.calls.push(call); + } + } +} diff --git a/src/extractors/zig.ts b/src/extractors/zig.ts new file mode 100644 index 00000000..3eb10f5b --- /dev/null +++ b/src/extractors/zig.ts @@ -0,0 +1,283 @@ +import type { + Call, + ExtractorOutput, + SubDeclaration, + TreeSitterNode, + TreeSitterTree, +} from '../types.js'; +import { findChild, nodeEndLine } from './helpers.js'; + +/** + * Extract symbols from Zig files. + * + * Zig's structs/enums/unions are anonymous — their names come from the + * enclosing `variable_declaration` (e.g. `const Foo = struct { ... };`). + */ +export function extractZigSymbols(tree: TreeSitterTree, _filePath: string): ExtractorOutput { + const ctx: ExtractorOutput = { + definitions: [], + calls: [], + imports: [], + classes: [], + exports: [], + typeMap: new Map(), + }; + + walkZigNode(tree.rootNode, ctx); + return ctx; +} + +function walkZigNode(node: TreeSitterNode, ctx: ExtractorOutput): void { + switch (node.type) { + case 'function_declaration': + handleZigFunction(node, ctx); + break; + case 'variable_declaration': + handleZigVariable(node, ctx); + break; + case 'call_expression': + handleZigCallExpression(node, ctx); + break; + case 'builtin_function': + handleZigBuiltin(node, ctx); + break; + case 'test_declaration': + handleZigTest(node, ctx); + break; + } + + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child) walkZigNode(child, ctx); + } +} + +function handleZigFunction(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return; + + const params = extractZigParams(node); + + ctx.definitions.push({ + name: nameNode.text, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, + visibility: isZigPub(node) ? 'public' : 'private', + }); +} + +function extractZigParams(funcNode: TreeSitterNode): SubDeclaration[] { + const params: SubDeclaration[] = []; + const paramList = funcNode.childForFieldName('parameters'); + if (!paramList) return params; + + for (let i = 0; i < paramList.childCount; i++) { + const param = paramList.child(i); + if (!param || param.type !== 'parameter') continue; + const nameNode = findChild(param, 'identifier'); + if (nameNode) { + params.push({ name: nameNode.text, kind: 'parameter', line: param.startPosition.row + 1 }); + } + } + return params; +} + +function handleZigVariable(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = findChild(node, 'identifier'); + if (!nameNode) return; + const name = nameNode.text; + + // Check if this is a struct/enum/union definition + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child) continue; + + if (child.type === 'struct_declaration') { + const members = extractZigContainerFields(child); + ctx.definitions.push({ + name, + kind: 'struct', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: members.length > 0 ? members : undefined, + visibility: isZigPub(node) ? 'public' : undefined, + }); + extractZigContainerMethods(child, name, ctx); + return; + } + if (child.type === 'enum_declaration') { + ctx.definitions.push({ + name, + kind: 'enum', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + visibility: isZigPub(node) ? 'public' : undefined, + }); + return; + } + if (child.type === 'union_declaration') { + ctx.definitions.push({ + name, + kind: 'struct', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + visibility: isZigPub(node) ? 'public' : undefined, + }); + return; + } + } + + // Check for @import + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child) continue; + if (child.type === 'builtin_function') { + const builtinId = findChild(child, 'builtin_identifier'); + if (builtinId?.text === '@import') { + const args = findChild(child, 'arguments'); + if (args) { + const strArg = findChild(args, 'string_literal') || findChild(args, 'string'); + if (strArg) { + const source = strArg.text.replace(/^"|"$/g, ''); + ctx.imports.push({ + source, + names: [name], + line: node.startPosition.row + 1, + }); + return; + } + } + } + } + } + + // Regular constant/variable + const isConst = hasChildText(node, 'const'); + ctx.definitions.push({ + name, + kind: isConst ? 'constant' : 'variable', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +function extractZigContainerFields(container: TreeSitterNode): SubDeclaration[] { + const fields: SubDeclaration[] = []; + for (let i = 0; i < container.childCount; i++) { + const child = container.child(i); + if (!child || child.type !== 'container_field') continue; + const nameNode = child.childForFieldName('name') || findChild(child, 'identifier'); + if (nameNode) { + fields.push({ name: nameNode.text, kind: 'property', line: child.startPosition.row + 1 }); + } + } + return fields; +} + +function extractZigContainerMethods( + container: TreeSitterNode, + parentName: string, + ctx: ExtractorOutput, +): void { + for (let i = 0; i < container.childCount; i++) { + const child = container.child(i); + if (!child || child.type !== 'function_declaration') continue; + const nameNode = child.childForFieldName('name'); + if (nameNode) { + ctx.definitions.push({ + name: `${parentName}.${nameNode.text}`, + kind: 'method', + line: child.startPosition.row + 1, + endLine: nodeEndLine(child), + visibility: isZigPub(child) ? 'public' : 'private', + }); + } + } +} + +function handleZigCallExpression(node: TreeSitterNode, ctx: ExtractorOutput): void { + const funcNode = node.childForFieldName('function'); + if (!funcNode) return; + + const call: Call = { name: '', line: node.startPosition.row + 1 }; + + if (funcNode.type === 'field_expression' || funcNode.type === 'field_access') { + const field = funcNode.childForFieldName('field') || funcNode.childForFieldName('member'); + const value = funcNode.childForFieldName('value') || funcNode.child(0); + if (field) call.name = field.text; + if (value) call.receiver = value.text; + } else { + call.name = funcNode.text; + } + + if (call.name) ctx.calls.push(call); +} + +function handleZigBuiltin(node: TreeSitterNode, ctx: ExtractorOutput): void { + const builtinId = findChild(node, 'builtin_identifier'); + if (!builtinId) return; + + // Treat @import as import (when standalone, not in variable_declaration) + if (builtinId.text === '@import' && node.parent?.type !== 'variable_declaration') { + const args = findChild(node, 'arguments'); + if (args) { + const strArg = findChild(args, 'string_literal') || findChild(args, 'string'); + if (strArg) { + const source = strArg.text.replace(/^"|"$/g, ''); + ctx.imports.push({ + source, + names: ['@import'], + line: node.startPosition.row + 1, + }); + } + } + return; + } + + // Other builtins are calls + ctx.calls.push({ name: builtinId.text, line: node.startPosition.row + 1 }); +} + +function handleZigTest(node: TreeSitterNode, ctx: ExtractorOutput): void { + let name = 'test'; + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child) continue; + if (child.type === 'string_literal' || child.type === 'string') { + // Extract the string content child if available, otherwise strip quotes + const content = findChild(child, 'string_content'); + name = content ? content.text : child.text.replace(/^"|"$/g, ''); + break; + } + if (child.type === 'identifier') { + name = child.text; + break; + } + } + + ctx.definitions.push({ + name, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +function isZigPub(node: TreeSitterNode): boolean { + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child && child.type === 'pub') return true; + if (child && child.text === 'pub') return true; + } + return false; +} + +function hasChildText(node: TreeSitterNode, text: string): boolean { + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child && child.text === text) return true; + } + return false; +} diff --git a/src/types.ts b/src/types.ts index 2a0751f5..200685b1 100644 --- a/src/types.ts +++ b/src/types.ts @@ -90,7 +90,13 @@ export type LanguageId = | 'kotlin' | 'swift' | 'scala' - | 'bash'; + | 'bash' + | 'elixir' + | 'lua' + | 'dart' + | 'zig' + | 'haskell' + | 'ocaml'; /** Engine mode selector. */ export type EngineMode = 'native' | 'wasm' | 'auto'; diff --git a/tests/parsers/dart.test.ts b/tests/parsers/dart.test.ts new file mode 100644 index 00000000..26fc8556 --- /dev/null +++ b/tests/parsers/dart.test.ts @@ -0,0 +1,53 @@ +import { beforeAll, describe, expect, it } from 'vitest'; +import { createParsers, extractDartSymbols } from '../../src/domain/parser.js'; + +describe('Dart parser', () => { + let parsers: any; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parseDart(code) { + const parser = parsers.get('dart'); + if (!parser) throw new Error('Dart parser not available'); + const tree = parser.parse(code); + return extractDartSymbols(tree, 'test.dart'); + } + + it('extracts class definitions', () => { + const symbols = parseDart(`class User { +}`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'User', kind: 'class' }), + ); + }); + + it('extracts enum definitions', () => { + const symbols = parseDart(`enum Color { red, green, blue }`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'Color', kind: 'enum' }), + ); + }); + + it('extracts class inheritance', () => { + const symbols = parseDart(`class Admin extends User { +}`); + expect(symbols.classes).toContainEqual( + expect.objectContaining({ name: 'Admin', extends: 'User' }), + ); + }); + + it('extracts import statements', () => { + const symbols = parseDart(`import 'dart:io'; +import 'package:flutter/material.dart';`); + expect(symbols.imports.length).toBeGreaterThanOrEqual(1); + }); + + it('extracts constructor calls', () => { + const symbols = parseDart(`var user = User("Alice");`); + // Constructor calls may or may not be detected depending on the grammar + // This test verifies the parser doesn't crash on constructor syntax + expect(symbols).toBeDefined(); + }); +}); diff --git a/tests/parsers/elixir.test.ts b/tests/parsers/elixir.test.ts new file mode 100644 index 00000000..be38474a --- /dev/null +++ b/tests/parsers/elixir.test.ts @@ -0,0 +1,59 @@ +import { beforeAll, describe, expect, it } from 'vitest'; +import { createParsers, extractElixirSymbols } from '../../src/domain/parser.js'; + +describe('Elixir parser', () => { + let parsers: any; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parseElixir(code) { + const parser = parsers.get('elixir'); + if (!parser) throw new Error('Elixir parser not available'); + const tree = parser.parse(code); + return extractElixirSymbols(tree, 'test.ex'); + } + + it('extracts module definitions', () => { + const symbols = parseElixir(`defmodule MyApp.User do +end`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'MyApp.User', kind: 'module' }), + ); + }); + + it('extracts function definitions', () => { + const symbols = parseElixir(`defmodule Greeter do + def greet(name) do + "Hello" + end +end`); + expect(symbols.definitions).toContainEqual(expect.objectContaining({ kind: 'function' })); + }); + + it('extracts protocol definitions', () => { + const symbols = parseElixir(`defprotocol Printable do + def print(data) +end`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'Printable', kind: 'interface' }), + ); + }); + + it('extracts imports (use/import/require)', () => { + const symbols = parseElixir(`use GenServer +import Enum +require Logger`); + expect(symbols.imports.length).toBeGreaterThanOrEqual(1); + }); + + it('extracts function calls', () => { + const symbols = parseElixir(`defmodule Foo do + def bar do + IO.puts("hello") + end +end`); + expect(symbols.calls).toContainEqual(expect.objectContaining({ name: 'puts', receiver: 'IO' })); + }); +}); diff --git a/tests/parsers/haskell.test.ts b/tests/parsers/haskell.test.ts new file mode 100644 index 00000000..91872162 --- /dev/null +++ b/tests/parsers/haskell.test.ts @@ -0,0 +1,56 @@ +import { beforeAll, describe, expect, it } from 'vitest'; +import { createParsers, extractHaskellSymbols } from '../../src/domain/parser.js'; + +describe('Haskell parser', () => { + let parsers: any; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parseHaskell(code) { + const parser = parsers.get('haskell'); + if (!parser) throw new Error('Haskell parser not available'); + const tree = parser.parse(code); + return extractHaskellSymbols(tree, 'Test.hs'); + } + + it('extracts function declarations', () => { + const symbols = parseHaskell(`greet name = "Hello " ++ name`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'greet', kind: 'function' }), + ); + }); + + it('extracts data type declarations', () => { + const symbols = parseHaskell(`data Color = Red | Green | Blue`); + expect(symbols.definitions).toContainEqual(expect.objectContaining({ kind: 'type' })); + }); + + it('extracts newtype declarations', () => { + const symbols = parseHaskell(`newtype Name = Name String`); + expect(symbols.definitions).toContainEqual(expect.objectContaining({ kind: 'type' })); + }); + + it('extracts type aliases', () => { + const symbols = parseHaskell(`type Point = (Double, Double)`); + expect(symbols.definitions).toContainEqual(expect.objectContaining({ kind: 'type' })); + }); + + it('extracts class declarations', () => { + const symbols = parseHaskell(`class Printable a where + prettyPrint :: a -> String`); + expect(symbols.definitions).toContainEqual(expect.objectContaining({ kind: 'class' })); + }); + + it('extracts import statements', () => { + const symbols = parseHaskell(`import Data.List +import qualified Data.Map as Map`); + expect(symbols.imports.length).toBeGreaterThanOrEqual(1); + }); + + it('extracts function applications as calls', () => { + const symbols = parseHaskell(`main = putStrLn "Hello"`); + expect(symbols.calls).toContainEqual(expect.objectContaining({ name: 'putStrLn' })); + }); +}); diff --git a/tests/parsers/lua.test.ts b/tests/parsers/lua.test.ts new file mode 100644 index 00000000..7872c9ff --- /dev/null +++ b/tests/parsers/lua.test.ts @@ -0,0 +1,55 @@ +import { beforeAll, describe, expect, it } from 'vitest'; +import { createParsers, extractLuaSymbols } from '../../src/domain/parser.js'; + +describe('Lua parser', () => { + let parsers: any; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parseLua(code) { + const parser = parsers.get('lua'); + if (!parser) throw new Error('Lua parser not available'); + const tree = parser.parse(code); + return extractLuaSymbols(tree, 'test.lua'); + } + + it('extracts function declarations', () => { + const symbols = parseLua(`function greet(name) + return "Hello " .. name +end`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'greet', kind: 'function' }), + ); + }); + + it('extracts local function declarations', () => { + const symbols = parseLua(`local function helper(x) + return x + 1 +end`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'helper', kind: 'function' }), + ); + }); + + it('extracts method declarations (colon syntax)', () => { + const symbols = parseLua(`function MyClass:init(name) + self.name = name +end`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'MyClass.init', kind: 'method' }), + ); + }); + + it('extracts require calls as imports', () => { + const symbols = parseLua(`local json = require("cjson")`); + expect(symbols.imports).toContainEqual(expect.objectContaining({ source: 'cjson' })); + }); + + it('extracts function calls', () => { + const symbols = parseLua(`print("hello") +string.format("%s", name)`); + expect(symbols.calls).toContainEqual(expect.objectContaining({ name: 'print' })); + }); +}); diff --git a/tests/parsers/ocaml.test.ts b/tests/parsers/ocaml.test.ts new file mode 100644 index 00000000..70e67d9a --- /dev/null +++ b/tests/parsers/ocaml.test.ts @@ -0,0 +1,55 @@ +import { beforeAll, describe, expect, it } from 'vitest'; +import { createParsers, extractOCamlSymbols } from '../../src/domain/parser.js'; + +describe('OCaml parser', () => { + let parsers: any; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parseOCaml(code) { + const parser = parsers.get('ocaml'); + if (!parser) throw new Error('OCaml parser not available'); + const tree = parser.parse(code); + return extractOCamlSymbols(tree, 'test.ml'); + } + + it('extracts let function definitions', () => { + const symbols = parseOCaml(`let greet name = "Hello " ^ name`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'greet', kind: 'function' }), + ); + }); + + it('extracts let value definitions', () => { + const symbols = parseOCaml(`let pi = 3.14159`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'pi', kind: 'variable' }), + ); + }); + + it('extracts module definitions', () => { + const symbols = parseOCaml(`module MyModule = struct + let x = 1 +end`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'MyModule', kind: 'module' }), + ); + }); + + it('extracts type definitions', () => { + const symbols = parseOCaml(`type color = Red | Green | Blue`); + expect(symbols.definitions).toContainEqual(expect.objectContaining({ kind: 'type' })); + }); + + it('extracts open statements as imports', () => { + const symbols = parseOCaml(`open Printf`); + expect(symbols.imports).toContainEqual(expect.objectContaining({ source: 'Printf' })); + }); + + it('extracts function applications as calls', () => { + const symbols = parseOCaml(`let () = print_endline "Hello"`); + expect(symbols.calls).toContainEqual(expect.objectContaining({ name: 'print_endline' })); + }); +}); diff --git a/tests/parsers/zig.test.ts b/tests/parsers/zig.test.ts new file mode 100644 index 00000000..6985d65a --- /dev/null +++ b/tests/parsers/zig.test.ts @@ -0,0 +1,70 @@ +import { beforeAll, describe, expect, it } from 'vitest'; +import { createParsers, extractZigSymbols } from '../../src/domain/parser.js'; + +describe('Zig parser', () => { + let parsers: any; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parseZig(code) { + const parser = parsers.get('zig'); + if (!parser) throw new Error('Zig parser not available'); + const tree = parser.parse(code); + return extractZigSymbols(tree, 'test.zig'); + } + + it('extracts function declarations', () => { + const symbols = parseZig(`pub fn add(a: i32, b: i32) i32 { + return a + b; +}`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'add', kind: 'function' }), + ); + }); + + it('extracts struct definitions', () => { + const symbols = parseZig(`const Point = struct { + x: f64, + y: f64, +};`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'Point', kind: 'struct' }), + ); + }); + + it('extracts enum definitions', () => { + const symbols = parseZig(`const Color = enum { + red, + green, + blue, +};`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'Color', kind: 'enum' }), + ); + }); + + it('extracts @import as imports', () => { + const symbols = parseZig(`const std = @import("std");`); + expect(symbols.imports).toContainEqual( + expect.objectContaining({ source: 'std', names: expect.arrayContaining(['std']) }), + ); + }); + + it('extracts function calls', () => { + const symbols = parseZig(`pub fn main() void { + std.debug.print("hello", .{}); +}`); + expect(symbols.calls.length).toBeGreaterThanOrEqual(1); + }); + + it('extracts test declarations', () => { + const symbols = parseZig(`test "addition" { + const result = add(1, 2); +}`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'addition', kind: 'function' }), + ); + }); +}); From 0433af6e6a3164e0d89323eb462799e15de7216f Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 30 Mar 2026 11:51:44 -0600 Subject: [PATCH 4/9] fix: clean up unused imports and parameters in new extractors --- src/extractors/dart.ts | 3 +-- src/extractors/elixir.ts | 4 ++-- src/extractors/haskell.ts | 1 - 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/extractors/dart.ts b/src/extractors/dart.ts index 4e2b76ed..c989e792 100644 --- a/src/extractors/dart.ts +++ b/src/extractors/dart.ts @@ -1,11 +1,10 @@ import type { - Call, ExtractorOutput, SubDeclaration, TreeSitterNode, TreeSitterTree, } from '../types.js'; -import { extractModifierVisibility, findChild, nodeEndLine } from './helpers.js'; +import { findChild, nodeEndLine } from './helpers.js'; /** * Extract symbols from Dart files. diff --git a/src/extractors/elixir.ts b/src/extractors/elixir.ts index 3a7de9b6..5bf7ccec 100644 --- a/src/extractors/elixir.ts +++ b/src/extractors/elixir.ts @@ -108,8 +108,8 @@ function handleDefmodule(node: TreeSitterNode, ctx: ExtractorOutput): void { function collectModuleMembers( doBlock: TreeSitterNode, - ctx: ExtractorOutput, - moduleName: string, + _ctx: ExtractorOutput, + _moduleName: string, children: SubDeclaration[], ): void { for (let i = 0; i < doBlock.childCount; i++) { diff --git a/src/extractors/haskell.ts b/src/extractors/haskell.ts index e6f427d3..6f1af42e 100644 --- a/src/extractors/haskell.ts +++ b/src/extractors/haskell.ts @@ -1,5 +1,4 @@ import type { - Call, ExtractorOutput, SubDeclaration, TreeSitterNode, From 2c55c73db3ce9a676eba50f8cb9531970627863b Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 30 Mar 2026 11:52:04 -0600 Subject: [PATCH 5/9] style: format dart and haskell extractors --- src/extractors/dart.ts | 7 +------ src/extractors/haskell.ts | 7 +------ 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/src/extractors/dart.ts b/src/extractors/dart.ts index c989e792..3816c39b 100644 --- a/src/extractors/dart.ts +++ b/src/extractors/dart.ts @@ -1,9 +1,4 @@ -import type { - ExtractorOutput, - SubDeclaration, - TreeSitterNode, - TreeSitterTree, -} from '../types.js'; +import type { ExtractorOutput, SubDeclaration, TreeSitterNode, TreeSitterTree } from '../types.js'; import { findChild, nodeEndLine } from './helpers.js'; /** diff --git a/src/extractors/haskell.ts b/src/extractors/haskell.ts index 6f1af42e..765ef05f 100644 --- a/src/extractors/haskell.ts +++ b/src/extractors/haskell.ts @@ -1,9 +1,4 @@ -import type { - ExtractorOutput, - SubDeclaration, - TreeSitterNode, - TreeSitterTree, -} from '../types.js'; +import type { ExtractorOutput, SubDeclaration, TreeSitterNode, TreeSitterTree } from '../types.js'; import { findChild, nodeEndLine } from './helpers.js'; /** From f47be65a586090d6e251d2a5696342543d538235 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 30 Mar 2026 12:40:58 -0600 Subject: [PATCH 6/9] fix(elixir): propagate currentModule during tree walk for qualified function names (#718) walkElixirNode never updated currentModule when descending into defmodule bodies, so all functions got unqualified names (e.g. `greet` instead of `Greeter.greet`), breaking parity with the native Rust engine. Strengthen the test assertion to check qualified names. --- src/extractors/elixir.ts | 12 ++++++++++-- tests/parsers/elixir.test.ts | 4 +++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/extractors/elixir.ts b/src/extractors/elixir.ts index 5bf7ccec..cb3407c9 100644 --- a/src/extractors/elixir.ts +++ b/src/extractors/elixir.ts @@ -32,13 +32,21 @@ function walkElixirNode( ctx: ExtractorOutput, currentModule: string | null, ): void { + let nextModule = currentModule; + if (node.type === 'call') { - handleElixirCall(node, ctx, currentModule); + const target = node.childForFieldName('target'); + if (target?.type === 'identifier' && target.text === 'defmodule') { + const args = findChild(node, 'arguments'); + const aliasNode = args && findChild(args, 'alias'); + if (aliasNode) nextModule = aliasNode.text; + } + handleElixirCall(node, ctx, nextModule); } for (let i = 0; i < node.childCount; i++) { const child = node.child(i); - if (child) walkElixirNode(child, ctx, currentModule); + if (child) walkElixirNode(child, ctx, nextModule); } } diff --git a/tests/parsers/elixir.test.ts b/tests/parsers/elixir.test.ts index be38474a..1b2f6b2b 100644 --- a/tests/parsers/elixir.test.ts +++ b/tests/parsers/elixir.test.ts @@ -29,7 +29,9 @@ end`); "Hello" end end`); - expect(symbols.definitions).toContainEqual(expect.objectContaining({ kind: 'function' })); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'Greeter.greet', kind: 'function' }), + ); }); it('extracts protocol definitions', () => { From 0f7c637f413e59751faed0d83f9576d0040ae94e Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 30 Mar 2026 12:41:15 -0600 Subject: [PATCH 7/9] fix(zig): prevent duplicate struct method emission and remove unused is_pub (#718) WASM extractor: handleZigFunction now skips nodes inside struct/union containers since extractZigContainerMethods already emits them as qualified methods. Rust extractor: remove dead is_pub call from handle_zig_function (Definition struct lacks a visibility field). --- crates/codegraph-core/src/extractors/zig.rs | 4 ++-- src/extractors/zig.ts | 11 +++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/crates/codegraph-core/src/extractors/zig.rs b/crates/codegraph-core/src/extractors/zig.rs index d165179b..dfb6fa9c 100644 --- a/crates/codegraph-core/src/extractors/zig.rs +++ b/crates/codegraph-core/src/extractors/zig.rs @@ -41,8 +41,6 @@ fn handle_zig_function(node: &Node, source: &[u8], symbols: &mut FileSymbols) { }; let params = extract_zig_params(node, source); - let is_pub = is_zig_pub(node, source); - symbols.definitions.push(Definition { name: full_name, kind: kind.to_string(), @@ -299,6 +297,8 @@ fn find_zig_parent_struct<'a>(node: &Node<'a>, source: &[u8]) -> Option None } +// TODO: wire into Definition once the struct gains a `visibility` field +#[allow(dead_code)] fn is_zig_pub(node: &Node, source: &[u8]) -> bool { for i in 0..node.child_count() { if let Some(child) = node.child(i) { diff --git a/src/extractors/zig.ts b/src/extractors/zig.ts index 3eb10f5b..d1bdab6a 100644 --- a/src/extractors/zig.ts +++ b/src/extractors/zig.ts @@ -52,7 +52,18 @@ function walkZigNode(node: TreeSitterNode, ctx: ExtractorOutput): void { } } +function isInsideZigContainer(node: TreeSitterNode): boolean { + let current = node.parent; + while (current) { + if (current.type === 'struct_declaration' || current.type === 'union_declaration') return true; + current = current.parent; + } + return false; +} + function handleZigFunction(node: TreeSitterNode, ctx: ExtractorOutput): void { + if (isInsideZigContainer(node)) return; // already emitted by extractZigContainerMethods + const nameNode = node.childForFieldName('name'); if (!nameNode) return; From 441fcd1382fb2dd450f10d86742ac254b9c41f40 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 30 Mar 2026 12:41:28 -0600 Subject: [PATCH 8/9] docs: update language counts from 17 to 23 for Batch 2 (#718) CHANGELOG v3.6.0 entry incorrectly described Batch 1 languages instead of Batch 2. README comparison table, feature descriptions, and ROADMAP phase summary all updated to reflect 23 supported languages. --- CHANGELOG.md | 2 +- README.md | 14 +++++++------- docs/roadmap/ROADMAP.md | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a1fca19..41d45321 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file. See [commit ## [3.6.0](https://github.com/optave/ops-codegraph-tool/compare/v3.5.0...v3.6.0) (2026-03-30) -**Six new languages and a parser abstraction layer.** This release adds first-class support for C, C++, Kotlin, Swift, Scala, and Bash — bringing the total supported languages to 17. A new parser abstraction layer decouples language extractors from tree-sitter internals, making it straightforward to add more languages. The native Rust engine gains batched query methods for the read path, WAL corruption is fixed when native and JS connections overlap, and WASM call-AST extraction is restored for full engine parity. +**Six new languages: Elixir, Lua, Dart, Zig, Haskell, OCaml.** This release adds first-class support for Elixir, Lua, Dart, Zig, Haskell, and OCaml — bringing the total supported languages to 23. Each language ships with dual-engine extractors (WASM TypeScript + native Rust), AST configs, and parser tests. The native Rust engine gains batched query methods for the read path, WAL corruption is fixed when native and JS connections overlap, and WASM call-AST extraction is restored for full engine parity. ### Features diff --git a/README.md b/README.md index 880600ef..d5cef8f4 100644 --- a/README.md +++ b/README.md @@ -80,7 +80,7 @@ No config files, no Docker, no JVM, no API keys, no accounts. Point your agent a | Capability | codegraph | [joern](https://github.com/joernio/joern) | [narsil-mcp](https://github.com/postrv/narsil-mcp) | [cpg](https://github.com/Fraunhofer-AISEC/cpg) | [axon](https://github.com/harshkedia177/axon) | [GitNexus](https://github.com/abhigyanpatwari/GitNexus) | |---|:---:|:---:|:---:|:---:|:---:|:---:| -| Languages | **17** | ~12 | **32** | ~10 | 3 | 13 | +| Languages | **23** | ~12 | **32** | ~10 | 3 | 13 | | MCP server | **Yes** | — | **Yes** | **Yes** | **Yes** | **Yes** | | Dataflow + CFG + AST querying | **Yes** | **Yes** | **Yes**¹ | **Yes** | — | — | | Hybrid search (BM25 + semantic) | **Yes** | — | — | — | **Yes** | **Yes** | @@ -104,9 +104,9 @@ No config files, no Docker, no JVM, no API keys, no accounts. Point your agent a | **🔬** | **Function-level, not just files** | Traces `handleAuth()` → `validateToken()` → `decryptJWT()` and shows 14 callers across 9 files break if `decryptJWT` changes | | **⚡** | **Always-fresh graph** | Three-tier change detection: journal (O(changed)) → mtime+size (O(n) stats) → hash (O(changed) reads). Sub-second rebuilds — agents work with current data | | **💥** | **Git diff impact** | `codegraph diff-impact` shows changed functions, their callers, and full blast radius — enriched with historically coupled files from git co-change analysis. Ships with a GitHub Actions workflow | -| **🌐** | **Multi-language, one graph** | JS/TS + Python + Go + Rust + Java + C# + PHP + Ruby + C + C++ + Kotlin + Swift + Scala + Bash + HCL in a single graph — agents don't need per-language tools | +| **🌐** | **Multi-language, one graph** | JS/TS + Python + Go + Rust + Java + C# + PHP + Ruby + C + C++ + Kotlin + Swift + Scala + Bash + HCL + Elixir + Lua + Dart + Zig + Haskell + OCaml in a single graph — agents don't need per-language tools | | **🧠** | **Hybrid search** | BM25 keyword + semantic embeddings fused via RRF — `hybrid` (default), `semantic`, or `keyword` mode; multi-query via `"auth; token; JWT"` | -| **🔬** | **Dataflow + CFG** | Track how data flows through functions (`flows_to`, `returns`, `mutates`) and visualize intraprocedural control flow graphs for all 17 languages | +| **🔬** | **Dataflow + CFG** | Track how data flows through functions (`flows_to`, `returns`, `mutates`) and visualize intraprocedural control flow graphs for all 23 languages | | **🔓** | **Fully local, zero cost** | No API keys, no accounts, no network calls. Optionally bring your own LLM provider — your code only goes where you choose | --- @@ -186,7 +186,7 @@ cd codegraph && npm install && npm link | 🧠 | **Semantic search** | Embeddings-powered natural language search with multi-query RRF ranking | | 👀 | **Watch mode** | Incrementally update the graph as files change | | ⚡ | **Always fresh** | Three-tier incremental detection — sub-second rebuilds even on large codebases | -| 🔬 | **Data flow analysis** | Intraprocedural parameter tracking, return consumers, argument flows, and mutation detection — all 17 languages | +| 🔬 | **Data flow analysis** | Intraprocedural parameter tracking, return consumers, argument flows, and mutation detection — all 23 languages | | 🧮 | **Complexity metrics** | Cognitive, cyclomatic, nesting depth, Halstead, and Maintainability Index per function | | 🏘️ | **Community detection** | Leiden clustering to discover natural module boundaries and architectural drift | | 📜 | **Manifesto rule engine** | Configurable pass/fail rules with warn/fail thresholds for CI gates via `check` (exit code 1 on fail) | @@ -199,8 +199,8 @@ cd codegraph && npm install && npm link | ✅ | **CI validation predicates** | `check` command with configurable gates: complexity, blast radius, cycles, boundary violations — exit code 0/1 for CI | | 📋 | **Composite audit** | Single `audit` command combining explain + impact + health metrics per function — one call instead of 3-4 | | 🚦 | **Triage queue** | `triage` merges connectivity, hotspots, roles, and complexity into a ranked audit priority queue | -| 🔬 | **Dataflow analysis** | Track how data moves through functions with `flows_to`, `returns`, and `mutates` edges — all 17 languages, included by default, skip with `--no-dataflow` | -| 🧩 | **Control flow graph** | Intraprocedural CFG construction for all 17 languages — `cfg` command with text/DOT/Mermaid output, included by default, skip with `--no-cfg` | +| 🔬 | **Dataflow analysis** | Track how data moves through functions with `flows_to`, `returns`, and `mutates` edges — all 23 languages, included by default, skip with `--no-dataflow` | +| 🧩 | **Control flow graph** | Intraprocedural CFG construction for all 23 languages — `cfg` command with text/DOT/Mermaid output, included by default, skip with `--no-cfg` | | 🔎 | **AST node querying** | Stored queryable AST nodes (calls, `new`, string, regex, throw, await) — `ast` command with SQL GLOB pattern matching | | 🧬 | **Expanded node/edge types** | `parameter`, `property`, `constant` node kinds with `parent_id` for sub-declaration queries; `contains`, `parameter_of`, `receiver` edge kinds | | 📊 | **Exports analysis** | `exports ` shows all exported symbols with per-symbol consumers, re-export detection, and counts | @@ -320,7 +320,7 @@ codegraph ast -k call # Filter by kind: call, new, string, regex codegraph ast -k throw --file src/ # Combine kind and file filters ``` -> **Note:** Dataflow and CFG are included by default for all 17 languages. Use `--no-dataflow` / `--no-cfg` for faster builds. +> **Note:** Dataflow and CFG are included by default for all 23 languages. Use `--no-dataflow` / `--no-cfg` for faster builds. ### Audit, Triage & Batch diff --git a/docs/roadmap/ROADMAP.md b/docs/roadmap/ROADMAP.md index 61188102..fec8667c 100644 --- a/docs/roadmap/ROADMAP.md +++ b/docs/roadmap/ROADMAP.md @@ -20,7 +20,7 @@ Codegraph is a strong local-first code graph CLI. This roadmap describes planned | [**4**](#phase-4--resolution-accuracy) | Resolution Accuracy | Dead role sub-categories, receiver type tracking, interface/trait implementation edges, resolution precision/recall benchmarks, `package.json` exports field, monorepo workspace resolution | **Complete** (v3.3.1) | | [**5**](#phase-5--typescript-migration) | TypeScript Migration | Project setup, core type definitions, leaf -> core -> orchestration module migration, test migration | **Complete** (v3.4.0) | | [**6**](#phase-6--native-analysis-acceleration) | Native Analysis Acceleration | Rust extraction for AST/CFG/dataflow/complexity; batch SQLite inserts; incremental rebuilds; native DB write pipeline; full rusqlite migration so native engine never touches better-sqlite3 | **Complete** (v3.5.0) | -| [**7**](#phase-7--expanded-language-support) | Expanded Language Support | Parser abstraction layer, 23 new languages in 4 batches (11 → 34), dual-engine support — Batch 1 (6 languages) shipped in v3.6.0; 17 remaining in 3 batches (17 → 34) | **In Progress** (v3.6.0) | +| [**7**](#phase-7--expanded-language-support) | Expanded Language Support | Parser abstraction layer, 23 new languages in 4 batches (11 → 34), dual-engine support — Batch 1 + 2 (12 languages) shipped in v3.6.0; 11 remaining in 2 batches (23 → 34) | **In Progress** (v3.6.0) | | [**8**](#phase-8--analysis-depth) | Analysis Depth | TypeScript-native resolution, inter-procedural type propagation, field-based points-to analysis, enhanced dynamic dispatch, barrel file resolution, precision/recall CI gates | Planned | | [**9**](#phase-9--runtime--extensibility) | Runtime & Extensibility | Event-driven pipeline, unified engine strategy, subgraph export filtering, transitive confidence, query caching, configuration profiles, pagination, plugin system | Planned | | [**10**](#phase-10--quality-security--technical-debt) | Quality, Security & Technical Debt | Supply-chain security, test quality gates, architectural debt cleanup | Planned | From 67b820076fc77f3934a55d65397daa769b555521 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 30 Mar 2026 22:00:25 -0600 Subject: [PATCH 9/9] fix(elixir): remove dead visibility variable and populate module children in Rust extractor (#718) - Remove unused `visibility` variable in `handle_def_function` (Definition struct has no visibility field yet; left a comment for future wiring) - Add `collect_module_children` to `handle_defmodule` so the native engine populates module children, matching the TS/WASM extractor behaviour - Include Cargo.lock update with new batch-2 tree-sitter dependencies (elixir, lua, dart, zig, haskell, ocaml) that was missing from prior commits --- Cargo.lock | 68 ++++++++++++++++++- .../codegraph-core/src/extractors/elixir.rs | 40 ++++++++++- 2 files changed, 105 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 89018d16..304de941 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -47,7 +47,7 @@ checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "codegraph-core" -version = "3.5.0" +version = "3.6.0" dependencies = [ "napi", "napi-build", @@ -62,11 +62,16 @@ dependencies = [ "tree-sitter-c", "tree-sitter-c-sharp", "tree-sitter-cpp", + "tree-sitter-dart", + "tree-sitter-elixir", "tree-sitter-go", + "tree-sitter-haskell", "tree-sitter-hcl", "tree-sitter-java", "tree-sitter-javascript", "tree-sitter-kotlin-sg", + "tree-sitter-lua", + "tree-sitter-ocaml", "tree-sitter-php", "tree-sitter-python", "tree-sitter-ruby", @@ -74,6 +79,7 @@ dependencies = [ "tree-sitter-scala", "tree-sitter-swift", "tree-sitter-typescript", + "tree-sitter-zig", ] [[package]] @@ -617,6 +623,26 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-dart" +version = "0.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19f1f70b80ce41343e14aafcef67b5ba2e9de89587535b4aabbabb8036f4e38a" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-elixir" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66dd064a762ed95bfc29857fa3cb7403bb1e5cb88112de0f6341b7e47284ba40" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-go" version = "0.23.4" @@ -627,6 +653,16 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-haskell" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "977c51e504548cba13fc27cb5a2edab2124cf6716a1934915d07ab99523b05a4" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-hcl" version = "1.1.0" @@ -673,6 +709,26 @@ version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "009994f150cc0cd50ff54917d5bc8bffe8cad10ca10d81c34da2ec421ae61782" +[[package]] +name = "tree-sitter-lua" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cdb9adf0965fec58e7660cbb3a059dbb12ebeec9459e6dcbae3db004739641e" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-ocaml" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d19db582b3855f56b5f9ec484170fbfb9ee60b938ec7720d76d2ee788e8b640" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-php" version = "0.23.11" @@ -743,6 +799,16 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-zig" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab11fc124851b0db4dd5e55983bbd9631192e93238389dcd44521715e5d53e28" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "unicode-ident" version = "1.0.24" diff --git a/crates/codegraph-core/src/extractors/elixir.rs b/crates/codegraph-core/src/extractors/elixir.rs index 85432143..97263b74 100644 --- a/crates/codegraph-core/src/extractors/elixir.rs +++ b/crates/codegraph-core/src/extractors/elixir.rs @@ -59,6 +59,9 @@ fn handle_defmodule(node: &Node, source: &[u8], symbols: &mut FileSymbols) { }; let name = node_text(&alias_node, source).to_string(); + // Collect child function definitions from the module's do_block + let children = collect_module_children(node, source); + symbols.definitions.push(Definition { name, kind: "module".to_string(), @@ -67,10 +70,41 @@ fn handle_defmodule(node: &Node, source: &[u8], symbols: &mut FileSymbols) { decorators: None, complexity: None, cfg: None, - children: None, + children: opt_children(children), }); } +fn collect_module_children(node: &Node, source: &[u8]) -> Vec { + let mut children = Vec::new(); + let do_block = match find_child(node, "do_block") { + Some(b) => b, + None => return children, + }; + + for i in 0..do_block.child_count() { + let child = match do_block.child(i) { + Some(c) if c.kind() == "call" => c, + _ => continue, + }; + let target = match child.child_by_field_name("target").or_else(|| child.child(0)) { + Some(t) if t.kind() == "identifier" => t, + _ => continue, + }; + let kw = node_text(&target, source); + if kw != "def" && kw != "defp" { + continue; + } + let args = match find_child(&child, "arguments") { + Some(a) => a, + None => continue, + }; + if let Some(fn_name) = extract_elixir_fn_name(&args, source) { + children.push(child_def(fn_name, "property", start_line(&child))); + } + } + children +} + fn handle_def_function(node: &Node, source: &[u8], symbols: &mut FileSymbols, keyword: &str) { let args = match find_child(node, "arguments") { Some(a) => a, @@ -91,7 +125,9 @@ fn handle_def_function(node: &Node, source: &[u8], symbols: &mut FileSymbols, ke None => fn_name, }; - let visibility = if keyword == "defp" { Some("private".to_string()) } else { Some("public".to_string()) }; + // Note: visibility (public/private) is determined by keyword but the + // Definition struct does not yet have a visibility field. When it does, + // wire `keyword == "defp"` → private, else → public. symbols.definitions.push(Definition { name: full_name,