diff --git a/.claude/settings.json b/.claude/settings.json index c676a85..fe7b509 100644 --- a/.claude/settings.json +++ b/.claude/settings.json @@ -1,13 +1,14 @@ { "enabledPlugins": { "typescript-lsp@code-intelligence": true, - "rust-analyzer-lsp@code-intelligence": true, + "rust-analyzer-lsp@code-intelligence": true, "eslint-lsp@code-intelligence": true, "bun@pleaseai": true, "claude-md-management@claude-plugins-official": true, "code-review@pleaseai": true, "please@passionfactory": true, "standards@passionfactory": true, - "review@passionfactory": true + "review@passionfactory": true, + "plannotator@passionfactory": true } } diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8d0fc99..c299764 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -33,14 +33,3 @@ jobs: - name: Lint run: bun run lint - - - name: Test with coverage - run: bun test --coverage --coverage-reporter=lcov - - - name: Upload coverage to Codecov - uses: codecov/codecov-action@fb8b3582c8e4def4969c97caa2f19720cb33a72f # v7.0.0 - with: - files: ./coverage/lcov.info - fail_ci_if_error: false - env: - CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} diff --git a/.github/workflows/release-please.yml b/.github/workflows/release-please.yml index c84c25d..033cbec 100644 --- a/.github/workflows/release-please.yml +++ b/.github/workflows/release-please.yml @@ -93,6 +93,20 @@ jobs: with: tag: ${{ needs.release-please.outputs.tag_name }} + # Build + publish the napi-rs SDK (@pleaseai/csp-sdk) — the separate in-process + # native-addon channel. Reuses release-sdk.yml so its cross-compile matrix + # lives in one place. id-token: write is granted here (the caller) so the + # reusable workflow's publish job can use npm Trusted Publishing (OIDC). + build-and-publish-sdk: + needs: release-please + if: ${{ needs.release-please.outputs.release_created }} + permissions: + contents: read + id-token: write + uses: ./.github/workflows/release-sdk.yml + with: + publish: true + # Generate the per-platform npm packages from the released binaries and # publish the wrapper + platform packages via npm Trusted Publishing (OIDC). # No NPM_TOKEN needed — auth is the OIDC id-token, and provenance is generated diff --git a/.github/workflows/release-sdk.yml b/.github/workflows/release-sdk.yml new file mode 100644 index 0000000..5366a78 --- /dev/null +++ b/.github/workflows/release-sdk.yml @@ -0,0 +1,181 @@ +# napi-rs SDK release pipeline — the @pleaseai/csp-sdk in-process native addon. +# +# Distinct from release-rust.yml (which ships the standalone `csp` binary + its +# npm launcher). This builds `csp-sdk..node` for each target from +# crates/csp-node, then publishes the wrapper + per-platform packages to npm via +# Trusted Publishing (OIDC — no NPM_TOKEN; provenance is automatic). +# +# Runs two ways: manually (workflow_dispatch) for ad-hoc rebuilds, and as a +# reusable workflow (workflow_call) invoked by release-please.yml on +# release_created. Each per-platform package (@pleaseai/csp-sdk-*) and the +# wrapper (@pleaseai/csp-sdk) must have a trusted publisher configured on +# npmjs.com pointing at this repo + workflow before the publish job can succeed. + +name: Release (SDK) + +on: + workflow_dispatch: + inputs: + publish: + description: Publish to npm after building (needs trusted-publisher config). Leave false to only build artifacts. + required: false + type: boolean + default: false + workflow_call: + inputs: + publish: + description: Publish to npm after building. + required: false + type: boolean + default: true + +permissions: + contents: read + +concurrency: + group: release-sdk-${{ github.ref }} + cancel-in-progress: false + +jobs: + build: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + include: + - os: macos-14 # Apple Silicon + target: aarch64-apple-darwin + - os: macos-15-intel # Intel + target: x86_64-apple-darwin + - os: ubuntu-latest + target: x86_64-unknown-linux-gnu + - os: ubuntu-24.04-arm + target: aarch64-unknown-linux-gnu + - os: ubuntu-latest + target: x86_64-unknown-linux-musl + - os: windows-latest + target: x86_64-pc-windows-msvc + + steps: + - name: Checkout code + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 + with: + # Build-only job — it never pushes, so don't persist the token. + persist-credentials: false + + # rust-toolchain.toml pins the toolchain; add the target triple so the + # cross-target build resolves its std. + - name: Add Rust target + run: rustup target add ${{ matrix.target }} + + # @napi-rs/cli is a Node tool (devDependency of crates/csp-node). Bun + # installs it; the runner's preinstalled Node runs the `napi` binary. + - name: Setup Bun + uses: oven-sh/setup-bun@0c5077e51419868618aeaa5fe8019c62421857d6 # v2.2.0 + with: + bun-version: 1.3.14 + + - name: Install SDK build tooling + run: bun install + working-directory: crates/csp-node + + # musl pulls C/C++ deps (esaxx-rs C++ via tokenizers, onig/zstd C) and + # musl-tools ships only musl-gcc, not musl-g++. cargo-zigbuild + zig is a + # full C/C++ cross toolchain; napi's `--zig` flag routes the build through + # it. zig is minisign-verified against the official key (mirrors + # release-rust.yml) before it is extracted and run. + - name: Set up cargo-zigbuild (musl) + if: ${{ endsWith(matrix.target, '-musl') }} + run: | + ZIG_VERSION=0.13.0 + ZIG_PUBKEY="RWSGOq2NVecA2UPNdBUZykf1CCb147pkmdtYxgb3Ti+JO/wCYvhbAb/U" + TARBALL="zig-linux-x86_64-${ZIG_VERSION}.tar.xz" + sudo apt-get update && sudo apt-get install -y minisign + curl -fsSLO "https://ziglang.org/download/${ZIG_VERSION}/${TARBALL}" + curl -fsSLO "https://ziglang.org/download/${ZIG_VERSION}/${TARBALL}.minisig" + minisign -Vm "${TARBALL}" -P "${ZIG_PUBKEY}" + tar -xJf "${TARBALL}" -C "$RUNNER_TEMP" + echo "$RUNNER_TEMP/zig-linux-x86_64-${ZIG_VERSION}" >> "$GITHUB_PATH" + cargo install --locked cargo-zigbuild --version '^0.19' + + - name: Build native addon + shell: bash + working-directory: crates/csp-node + run: | + if [[ "${{ matrix.target }}" == *-musl ]]; then + bunx napi build --platform --release --target "${{ matrix.target }}" --zig + else + bunx napi build --platform --release --target "${{ matrix.target }}" + fi + ls -lh ./*.node + + - name: Upload artifact + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + with: + name: csp-sdk-${{ matrix.target }} + path: crates/csp-node/*.node + if-no-files-found: error + + # Assemble the per-platform packages from the built addons and publish them + + # the wrapper via npm Trusted Publishing. Resilient: publishes whatever + # targets built (always()), and fails loudly if nothing built. + publish: + needs: build + if: ${{ always() && inputs.publish }} + runs-on: ubuntu-latest + permissions: + contents: read + id-token: write + steps: + - name: Checkout code + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 + with: + # Publish-only; never pushes to git, so don't persist the token. + persist-credentials: false + + - name: Setup Bun + uses: oven-sh/setup-bun@0c5077e51419868618aeaa5fe8019c62421857d6 # v2.2.0 + with: + bun-version: 1.3.14 + + # Trusted Publishing requires npm >= 11.5.1; ubuntu-latest ships npm 10.x. + - name: Upgrade npm for Trusted Publishing + run: sudo npm install -g npm@latest + + - name: Install SDK build tooling + run: bun install + working-directory: crates/csp-node + + - name: Download built addons + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0 + with: + pattern: csp-sdk-* + merge-multiple: true + path: crates/csp-node/artifacts + + # `create-npm-dirs` materializes the npm// package dirs from the + # `napi.triples` config; `artifacts` moves each built .node into its dir. + - name: Assemble platform packages + working-directory: crates/csp-node + run: | + bunx napi create-npm-dirs + bunx napi artifacts --dir artifacts + ls -R npm + + # OIDC supplies auth + provenance, so no token / --provenance flag. + # Platform packages first; the wrapper last (its optionalDependencies pin + # them by version). Skip any platform dir that has no addon (partial matrix). + - name: Publish to npm + working-directory: crates/csp-node + run: | + set -e + for dir in npm/*/; do + if compgen -G "$dir/*.node" > /dev/null; then + echo "publishing platform package: $dir" + npm publish "$dir" --access public + else + echo "skipping $dir (no addon built for this target)" + fi + done + echo "publishing wrapper: @pleaseai/csp-sdk" + npm publish --access public diff --git a/CLAUDE.md b/CLAUDE.md index a308c30..ad4fa51 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -4,14 +4,18 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co ## Project context -`@pleaseai/csp` (binary: `csp`) is a TypeScript/Bun port of [MinishLab/semble](https://github.com/MinishLab/semble), a Python hybrid code-search library for agents. The current repo is an **initial scaffold only** — `src/index.ts` and `src/cli.ts` are placeholders. The README is the canonical spec for the intended public surface (MCP server, CLI, library). +`@pleaseai/csp` (binary: `csp`) is a **Rust** port of [MinishLab/semble](https://github.com/MinishLab/semble), a Python hybrid code-search library for agents. The implementation lives in `crates/csp` (library) + `crates/csp-cli` (`csp` binary). The README is the canonical spec for the public surface (MCP server, CLI, library). -### Rust rewrite (ADR-0003) +The deprecated TypeScript implementation that formerly lived under `src/` has been **removed** — the Rust port is the only implementation. The root `package.json` / `tsconfig.json` / `eslint.config.ts` remain only as repo JS tooling (lint/typecheck of `npm/`) and the release-please version anchor. The **napi-rs native-binding SDK** binds the `crates/` Rust directly (it does not reintroduce a TS port). -A Rust port lives in `crates/csp` (library) + `crates/csp-cli` (`csp` binary). **The Python upstream ([MinishLab/semble](https://github.com/MinishLab/semble)) is the source of truth** — the Rust port targets behavioral equivalence with the upstream Python. The TS `src/` is **deprecated**: slated for deletion and retained only as a historical/reference implementation; it is **no longer** the source of truth or the parity oracle. +**SDK packaging decision: keep the two distribution channels separate.** `npm/` stays the Biome-style CLI/MCP launcher — a thin Node shim that execs the **standalone Rust binary** (this preserves the no-runtime Homebrew story; do NOT convert it to napi). The napi-rs SDK is a distinct concern: `crates/csp-node` holds `#[napi]` bindings over `crates/csp` and is shipped as its **own npm package** (`@pleaseai/csp-sdk`), an in-process native addon — not merged into `npm/`. Both build outputs share the one `crates/csp` core. The SDK is in place: `#[napi]` bindings (`fromPath`/`fromGit`/`loadFromDisk` are async on the libuv worker pool; `search`/`findRelated`/`save`/`stats` sync, with `inner` held behind `Arc` to enable a future async move), the `napi build` toolchain (`.node` + `index.js`; `index.d.ts` is the committed type surface), and the cross-compile + Trusted-Publishing release in `release-sdk.yml`. The remaining step is publish-only — a maintainer must configure the npm trusted publisher for `@pleaseai/csp-sdk` + its platform packages (see `crates/csp-node/README.md`). + +### Rust port (ADR-0003) + +**The Python upstream ([MinishLab/semble](https://github.com/MinishLab/semble)) is the source of truth** — the Rust port targets behavioral equivalence with the upstream Python. - Quality gate before every Rust commit: `cargo fmt --all && cargo clippy --all-targets --all-features -- -D warnings && cargo test --workspace`. -- Parity oracle = the **Python upstream** behavior (read the source directly — see the fetch note below). The TS test suite stays usable as language-neutral golden fixtures for already-ported modules, but is not authoritative where it disagrees with upstream. The Rust port has intentionally moved **past the old TS stubs** to match upstream: dense embeddings are real (`model2vec-rs`, not the deterministic stub), the ranking pipeline is wired (query boosts + path penalties + file saturation), and the chunk length is `750`. The TS `src/` still carries the older stubs/values until it is removed. -- CLI/MCP output is a **snake_case** wire dict (`csp::utils::format_results`, mirroring TS `SearchResult.toDict`), distinct from the camelCase `ChunkDict` used for on-disk persistence. +- Parity oracle = the **Python upstream** behavior (read the source directly — see the fetch note below). Dense embeddings are real (`model2vec-rs`), the ranking pipeline is wired (query boosts + path penalties + file saturation), and the chunk length is `750`. +- CLI/MCP output is a **snake_case** wire dict (`csp::utils::format_results`, mirroring upstream `SearchResult.to_dict`), distinct from the camelCase `ChunkDict` used for on-disk persistence. - rmcp 1.7: the default `#[tool_handler]` rebuilds the router via `Self::tool_router()` and leaves a stored `tool_router` field unread (clippy `dead_code`) — use `#[tool_handler(router = self.tool_router)]`. When porting modules from semble, fetch the upstream source and read the Python directly: @@ -22,32 +26,34 @@ ask src github:MinishLab/semble@main # absolute path to the cached checkout ( curl -fsSL https://raw.githubusercontent.com/MinishLab/semble/main/src/semble/search.py ``` -Read the Python source directly — do not infer behavior from the README. Key upstream modules and their target TS counterparts live under `src/semble/` (Python): `types.py`, `tokens.py`, `chunking/`, `index/` (files, file_walker, dense, sparse, create, index), `ranking/` (boosting, penalties, weighting), `search.py`, `mcp.py`, `cli.py`, `cache.py`, `stats.py`, `utils.py`. +Read the Python source directly — do not infer behavior from the README. Key upstream modules (mapped to their `crates/csp` Rust counterparts in `.please/docs/references/semble.md`) live under `src/semble/` (Python): `types.py`, `tokens.py`, `chunking/`, `index/` (files, file_walker, dense, sparse, create, index), `ranking/` (boosting, penalties, weighting), `search.py`, `mcp.py`, `cli.py`, `cache.py`, `stats.py`, `utils.py`. ## Stack -- **Runtime / package manager**: Bun 1.3.10+ (`packageManager` pinned in `package.json`). Node.js 22+ supported. -- **Module system**: ESM only (`"type": "module"`). Use `.ts` imports with `verbatimModuleSyntax`. -- **Build**: `tsdown` — config at `tsdown.config.ts`, two entries (`src/index.ts`, `src/cli.ts`), `unbundle: true`, emits ESM + DTS into `dist/`. -- **Lint**: `@pleaseai/eslint-config` (wraps `@antfu/eslint-config`). Flat config at `eslint.config.ts`. No semicolons, single quotes, 2-space indent. Type-aware rules enabled via `tsconfigPath`. -- **TypeScript**: strict + `noUncheckedIndexedAccess` + `exactOptionalPropertyTypes` + `verbatimModuleSyntax`. Target ES2022, `moduleResolution: bundler`. -- **Tests**: `bun:test` (no jest/vitest). Run with `bun test`. +The implementation is **Rust** (a Cargo workspace). A thin Node/Bun toolchain remains for repo-level JS lint/typecheck and the future napi-rs SDK. + +- **Impl**: Rust, edition 2021. Cargo workspace (`crates/csp` lib + `crates/csp-cli` `csp` binary), toolchain pinned by `rust-toolchain.toml`. Single-binary release profile (`lto`, `codegen-units=1`, `strip`). +- **Tests**: `cargo test --workspace` (255+ lib + CLI tests). Network-gated grammar-fetch tests run with `-- --ignored` (see ADR-0004). +- **Distribution**: self-contained Rust binary via Homebrew (`pleaseai/homebrew-tap`) + an npm wrapper under `npm/` that preserves the `bunx @pleaseai/csp` entrypoint. +- **JS tooling** (no TS implementation): Bun 1.3.10+ / Node 22+. `@pleaseai/eslint-config` (wraps `@antfu/eslint-config`) lints `npm/` JS + `eslint.config.ts`; `tsc --noEmit` typechecks. No semicolons, single quotes, 2-space indent. ## Commands ```bash -bun install # install deps -bun run build # tsdown build → dist/ -bun run dev # tsdown --watch +# Rust (the implementation) +cargo build --release # → target/release/csp +cargo run -p csp-cli -- search "query" . # run the CLI locally +cargo test --workspace # test runner +cargo fmt --all && cargo clippy --all-targets --all-features -- -D warnings # pre-commit gate + +# JS tooling (lint/typecheck of npm/ + configs; no TS sources to build) +bun install # install dev tooling bun run typecheck # tsc --noEmit bun run lint # eslint . --cache bun run lint:fix # eslint . --fix --cache -bun test # bun:test runner -bun test path/to/file.test.ts # single file -bun test --watch # watch mode ``` -`bunx @pleaseai/csp` is the published-package entrypoint referenced throughout the README (MCP/CLI setup snippets). Locally, use `bun run --bun src/cli.ts` or build first. +`bunx @pleaseai/csp` is the published-package entrypoint referenced throughout the README (MCP/CLI setup snippets); it resolves to the npm wrapper (`npm/`) that execs the Rust binary. ## Public API surface (target, from README) diff --git a/Cargo.lock b/Cargo.lock index b7eaf0f..e202a79 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -366,6 +366,15 @@ dependencies = [ "tiny-keccak", ] +[[package]] +name = "convert_case" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca" +dependencies = [ + "unicode-segmentation", +] + [[package]] name = "core-foundation" version = "0.10.1" @@ -474,6 +483,26 @@ dependencies = [ "tokio", ] +[[package]] +name = "csp-node" +version = "0.1.4" +dependencies = [ + "code-search-please", + "napi", + "napi-build", + "napi-derive", +] + +[[package]] +name = "ctor" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a2785755761f3ddc1492979ce1e48d2c00d09311c39e4466429188f3dd6501" +dependencies = [ + "quote", + "syn", +] + [[package]] name = "darling" version = "0.20.11" @@ -1224,6 +1253,16 @@ version = "0.2.186" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link", +] + [[package]] name = "libloading" version = "0.9.0" @@ -1349,6 +1388,63 @@ dependencies = [ "syn", ] +[[package]] +name = "napi" +version = "2.16.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55740c4ae1d8696773c78fdafd5d0e5fe9bc9f1b071c7ba493ba5c413a9184f3" +dependencies = [ + "bitflags", + "ctor", + "napi-derive", + "napi-sys", + "once_cell", +] + +[[package]] +name = "napi-build" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9c366d2c8c60b86fa632df75f745509b52f9128f91a6bad4c796e44abb505e1" + +[[package]] +name = "napi-derive" +version = "2.16.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cbe2585d8ac223f7d34f13701434b9d5f4eb9c332cccce8dee57ea18ab8ab0c" +dependencies = [ + "cfg-if", + "convert_case", + "napi-derive-backend", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "napi-derive-backend" +version = "1.0.75" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1639aaa9eeb76e91c6ae66da8ce3e89e921cd3885e99ec85f4abacae72fc91bf" +dependencies = [ + "convert_case", + "once_cell", + "proc-macro2", + "quote", + "regex", + "semver", + "syn", +] + +[[package]] +name = "napi-sys" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "427802e8ec3a734331fec1035594a210ce1ff4dc5bc1950530920ab717964ea3" +dependencies = [ + "libloading 0.8.9", +] + [[package]] name = "ndarray" version = "0.15.6" @@ -1886,6 +1982,12 @@ dependencies = [ "libc", ] +[[package]] +name = "semver" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" + [[package]] name = "serde" version = "1.0.228" @@ -2275,7 +2377,7 @@ dependencies = [ "cc", "dirs", "fd-lock", - "libloading", + "libloading 0.9.0", "memchr", "serde", "serde_json", diff --git a/Cargo.toml b/Cargo.toml index d168584..97b6e50 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,7 +5,7 @@ # it is retired and this becomes the primary tree. [workspace] resolver = "2" -members = ["crates/csp", "crates/csp-cli"] +members = ["crates/csp", "crates/csp-cli", "crates/csp-node"] [workspace.package] version = "0.1.4" # x-release-please-version @@ -36,6 +36,12 @@ fancy-regex = "0.16" # Phase 1 — boosting definition patterns (lookbehind/lo indexmap = "2" # Phase 1 — insertion-ordered score maps (Map parity) sha2 = "0.10" # Phase 3 — content-hash cache keys (sha256, parity with node:crypto) tempfile = "3" # Phase 3/4 — temp dirs (git clone checkout) + tests +# napi-rs native SDK (separate distribution channel — the @pleaseai/csp-sdk npm +# package; the CLI/MCP launcher under npm/ stays a standalone-binary shim). Only +# crates/csp-node opts in; the core/CLI crates do not depend on napi. +napi = "2" +napi-derive = "2" +napi-build = "2" # Single-binary release profile (ADR-0003 motivation #1). [profile.release] diff --git a/README.ko.md b/README.ko.md index 7bcdf1f..0d9c747 100644 --- a/README.ko.md +++ b/README.ko.md @@ -445,7 +445,9 @@ csp clear all # 인덱스 캐시와 savings 모두 삭제
라이브러리 사용 -`csp`는 Rust 라이브러리 크레이트로도 사용할 수 있습니다. 짧은 이름 `csp`가 이미 선점되어 있어 crates.io에는 [**`code-search-please`**](https://crates.io/crates/code-search-please)로 배포됩니다. 라이브러리 이름은 `csp` 그대로이므로 의존성은 `code-search-please`로 추가하되 코드에서는 `use csp::...`를 씁니다. `CspIndex`(`from_path` / `from_git` / `search` / `find_related`)와 `ContentType` enum, 랭킹 파이프라인을 노출합니다. +`csp`는 두 가지 방식으로 라이브러리로 쓸 수 있습니다. **Rust 크레이트**, 또는 같은 코어를 napi-rs로 네이티브 바인딩한 **JavaScript/TypeScript SDK**(`@pleaseai/csp-sdk`)입니다. + +**Rust** — 짧은 이름 `csp`가 이미 선점되어 있어 crates.io에는 [**`code-search-please`**](https://crates.io/crates/code-search-please)로 배포됩니다. 라이브러리 이름은 `csp` 그대로이므로 의존성은 `code-search-please`로 추가하되 코드에서는 `use csp::...`를 씁니다. `CspIndex`(`from_path` / `from_git` / `search` / `find_related`)와 `ContentType` enum, 랭킹 파이프라인을 노출합니다. ```toml [dependencies] @@ -465,7 +467,20 @@ for r in &results { } ``` -> 크레이트는 crates.io에 [`code-search-please`](https://crates.io/crates/code-search-please)로 배포되며 라이브러리 이름은 `csp`입니다. (npm 패키지는 `csp` 바이너리를 런처 뒤에 담아 배포할 뿐, JavaScript API를 노출하지 않습니다.) +**JavaScript / TypeScript** — [`@pleaseai/csp-sdk`](https://www.npmjs.com/package/@pleaseai/csp-sdk)는 동일한 Rust 검색 엔진을 **인프로세스**로 실행하는 네이티브(napi-rs) 애드온입니다. 서브프로세스도, JSON 왕복도 없습니다. 빌드 진입점은 비동기, 쿼리 호출은 동기입니다. + +```ts +import { ContentType, CspIndex } from '@pleaseai/csp-sdk' + +const index = await CspIndex.fromPath('./my-project', { content: [ContentType.Code] }) +const results = index.search('save model to disk', { topK: 3 }) + +for (const { chunk, score } of results) { + console.log(score.toFixed(3), chunk.location) // 예: 0.871 src/index.ts:42-58 +} +``` + +> 두 npm 패키지는 별개입니다. **`@pleaseai/csp`**는 `csp` **CLI + MCP 서버**를 런처 뒤에 담아 배포하며(JS API는 노출하지 않음), **`@pleaseai/csp-sdk`**는 인프로세스 **라이브러리** SDK입니다. 둘 다 하나의 Rust 코어에서 빌드됩니다. 크레이트는 crates.io에 [`code-search-please`](https://crates.io/crates/code-search-please)로 배포되며 라이브러리 이름은 `csp`입니다.
diff --git a/README.md b/README.md index 19ad260..2076871 100644 --- a/README.md +++ b/README.md @@ -445,7 +445,9 @@ Explicit index paths written with `csp index -o ` are not part of the auto
Library usage -`csp` is also a Rust library crate, published on crates.io as [**`code-search-please`**](https://crates.io/crates/code-search-please) (the short name `csp` was already taken). The library name stays `csp`, so you depend on `code-search-please` but still write `use csp::...`. It exposes `CspIndex` with `from_path` / `from_git` / `search` / `find_related`, plus the `ContentType` enum and the ranking pipeline. +`csp` is usable as a library two ways: the **Rust crate**, or a **JavaScript/TypeScript SDK** (`@pleaseai/csp-sdk`) that binds the same core natively via napi-rs. + +**Rust** — published on crates.io as [**`code-search-please`**](https://crates.io/crates/code-search-please) (the short name `csp` was already taken). The library name stays `csp`, so you depend on `code-search-please` but still write `use csp::...`. It exposes `CspIndex` with `from_path` / `from_git` / `search` / `find_related`, plus the `ContentType` enum and the ranking pipeline. ```toml [dependencies] @@ -465,7 +467,20 @@ for r in &results { } ``` -> The crate is published on crates.io as [`code-search-please`](https://crates.io/crates/code-search-please) with the library name `csp`. (The npm package ships only the `csp` binary behind a launcher — it does not expose a JavaScript API.) +**JavaScript / TypeScript** — [`@pleaseai/csp-sdk`](https://www.npmjs.com/package/@pleaseai/csp-sdk) is a native (napi-rs) addon that runs the same Rust search engine **in-process** — no subprocess, no JSON round-trip. The build entrypoints are async; the per-query calls are sync. + +```ts +import { ContentType, CspIndex } from '@pleaseai/csp-sdk' + +const index = await CspIndex.fromPath('./my-project', { content: [ContentType.Code] }) +const results = index.search('save model to disk', { topK: 3 }) + +for (const { chunk, score } of results) { + console.log(score.toFixed(3), chunk.location) // e.g. 0.871 src/index.ts:42-58 +} +``` + +> The two npm packages are distinct: **`@pleaseai/csp`** ships the `csp` **CLI + MCP server** behind a launcher (it does not expose a JS API), while **`@pleaseai/csp-sdk`** is the in-process **library** SDK. Both are built from the one Rust core; the crate is on crates.io as [`code-search-please`](https://crates.io/crates/code-search-please) with library name `csp`.
diff --git a/bun.lock b/bun.lock index cba0db1..a92fdd4 100644 --- a/bun.lock +++ b/bun.lock @@ -4,20 +4,11 @@ "workspaces": { "": { "name": "@pleaseai/csp", - "dependencies": { - "@huggingface/transformers": "^4.2.0", - "@kreuzberg/tree-sitter-language-pack": "^1.8.1", - "@modelcontextprotocol/sdk": "^1.29.0", - "chokidar": "^5.0.0", - "commander": "^14.0.3", - "ignore": "^7.0.5", - }, "devDependencies": { "@pleaseai/eslint-config": "^0.0.4", "@types/bun": "latest", "eslint": "^10.0.3", "jiti": "^2.7.0", - "tsdown": "^0.21.5", "typescript": "^6.0.2", }, }, @@ -29,15 +20,13 @@ "@antfu/install-pkg": ["@antfu/install-pkg@1.1.0", "", { "dependencies": { "package-manager-detector": "^1.3.0", "tinyexec": "^1.0.1" } }, "sha512-MGQsmw10ZyI+EJo45CdSER4zEb+p31LpDAFp2Z3gkSd1yqVZGi0Ebx++YTEMonJy4oChEMLsxZ64j8FH6sSqtQ=="], - "@babel/generator": ["@babel/generator@8.0.0-rc.3", "", { "dependencies": { "@babel/parser": "^8.0.0-rc.3", "@babel/types": "^8.0.0-rc.3", "@jridgewell/gen-mapping": "^0.3.12", "@jridgewell/trace-mapping": "^0.3.28", "@types/jsesc": "^2.5.0", "jsesc": "^3.0.2" } }, "sha512-em37/13/nR320G4jab/nIIHZgc2Wz2y/D39lxnTyxB4/D/omPQncl/lSdlnJY1OhQcRGugTSIF2l/69o31C9dA=="], - - "@babel/helper-string-parser": ["@babel/helper-string-parser@8.0.0", "", {}, "sha512-6mJgmFFFIIO82vvoLt9XtRC7/TkzXfts1t/SpRX4IHSzMgqoPYCWesVu1udUPUWioAE/2fcG6WuI8zrkE1gwrg=="], + "@babel/helper-string-parser": ["@babel/helper-string-parser@7.29.7", "", {}, "sha512-Pb5ijPrZ89GDH8223L4UP8i6QApWxs04RbPQJTeWDV0/keR2E36MeKnyr6LYmUUvqRRI+Iv87SuF1W6ErINzYw=="], - "@babel/helper-validator-identifier": ["@babel/helper-validator-identifier@8.0.0-rc.3", "", {}, "sha512-8AWCJ2VJJyDFlGBep5GpaaQ9AAaE/FjAcrqI7jyssYhtL7WGV0DOKpJsQqM037xDbpRLHXsY8TwU7zDma7coOw=="], + "@babel/helper-validator-identifier": ["@babel/helper-validator-identifier@7.29.7", "", {}, "sha512-qehxGkRj55h/ff8EMaJ+cYhyaKlHIxqYDn682wQD7RNp9UujOQsHog2uS0r2vzr4pW+sXf90NeeayjcNaX3fFg=="], - "@babel/parser": ["@babel/parser@8.0.0-rc.3", "", { "dependencies": { "@babel/types": "^8.0.0-rc.3" }, "bin": "./bin/babel-parser.js" }, "sha512-B20dvP3MfNc/XS5KKCHy/oyWl5IA6Cn9YjXRdDlCjNmUFrjvLXMNUfQq/QUy9fnG2gYkKKcrto2YaF9B32ToOQ=="], + "@babel/parser": ["@babel/parser@7.29.7", "", { "dependencies": { "@babel/types": "^7.29.7" }, "bin": "./bin/babel-parser.js" }, "sha512-hnORnjP/1P/zFEndoeX+n+t1RwWRJiJpM/jO7FW32Kn9r5+sJB2JWOdYo4L6k78j15eCwY3Gm/7364B1EMwtNg=="], - "@babel/types": ["@babel/types@8.0.0-rc.3", "", { "dependencies": { "@babel/helper-string-parser": "^8.0.0-rc.3", "@babel/helper-validator-identifier": "^8.0.0-rc.3" } }, "sha512-mOm5ZrYmphGfqVWoH5YYMTITb3cDXsFgmvFlvkvWDMsR9X8RFnt7a0Wb6yNIdoFsiMO9WjYLq+U/FMtqIYAF8Q=="], + "@babel/types": ["@babel/types@7.29.7", "", { "dependencies": { "@babel/helper-string-parser": "^7.29.7", "@babel/helper-validator-identifier": "^7.29.7" } }, "sha512-4zBIxpPzowiZpusoFkyGVwakdRJUyuH5PxQ/PrqghfdFWWasvnCdPfQXHrenDai+gyLARulZjZowCOj6fjT4pA=="], "@clack/core": ["@clack/core@1.4.1", "", { "dependencies": { "fast-wrap-ansi": "^0.2.0", "sisteransi": "^1.0.5" } }, "sha512-FILJa1gGKEFTGZAJE9RpVhrjKz3c3h4ar60dSv6cGuDqufQ84YEIS3GAGvZiN+H6yaLbbvTFNejjCC4tXpZEuw=="], @@ -45,12 +34,6 @@ "@e18e/eslint-plugin": ["@e18e/eslint-plugin@0.4.1", "", { "dependencies": { "empathic": "^2.0.0", "module-replacements": "^3.0.0-beta.7", "semver": "^7.7.4" }, "peerDependencies": { "eslint": "^9.0.0 || ^10.0.0", "oxlint": "^1.61.0" }, "optionalPeers": ["eslint", "oxlint"] }, "sha512-Re00N8ad1HsNrzpuIX7Bhdr8RSaFWp6VgwJUEJF+47+D1CMcXoS7VNRkIG23e46pddhgxWU0cWk4wYiQIuMHqQ=="], - "@emnapi/core": ["@emnapi/core@1.10.0", "", { "dependencies": { "@emnapi/wasi-threads": "1.2.1", "tslib": "^2.4.0" } }, "sha512-yq6OkJ4p82CAfPl0u9mQebQHKPJkY7WrIuk205cTYnYe+k2Z8YBh11FrbRG/H6ihirqcacOgl2BIO8oyMQLeXw=="], - - "@emnapi/runtime": ["@emnapi/runtime@1.10.0", "", { "dependencies": { "tslib": "^2.4.0" } }, "sha512-ewvYlk86xUoGI0zQRNq/mC+16R1QeDlKQy21Ki3oSYXNgLb45GV1P6A0M+/s6nyCuNDqe5VpaY84BzXGwVbwFA=="], - - "@emnapi/wasi-threads": ["@emnapi/wasi-threads@1.2.1", "", { "dependencies": { "tslib": "^2.4.0" } }, "sha512-uTII7OYF+/Mes/MrcIOYp5yOtSMLBWSIoLPpcgwipoiKbli6k322tcoFsxoIIxPDqW01SQGAgko4EzZi2BNv2w=="], - "@es-joy/jsdoccomment": ["@es-joy/jsdoccomment@0.84.0", "", { "dependencies": { "@types/estree": "^1.0.8", "@typescript-eslint/types": "^8.54.0", "comment-parser": "1.4.5", "esquery": "^1.7.0", "jsdoc-type-pratt-parser": "~7.1.1" } }, "sha512-0xew1CxOam0gV5OMjh2KjFQZsKL2bByX1+q4j3E73MpYIdyUxcZb/xQct9ccUb+ve5KGUYbCUxyPnYB7RbuP+w=="], "@es-joy/resolve.exports": ["@es-joy/resolve.exports@1.2.0", "", {}, "sha512-Q9hjxWI5xBM+qW2enxfe8wDKdFWMfd0Z29k5ZJnuBqD/CasY5Zryj09aCA6owbGATWz+39p5uIdaHXpopOcG8g=="], @@ -75,14 +58,6 @@ "@eslint/plugin-kit": ["@eslint/plugin-kit@0.7.2", "", { "dependencies": { "@eslint/core": "^1.2.1", "levn": "^0.4.1" } }, "sha512-+CNAzxglkrpNf/kKywqQfk74QjtceuOE7Qm+AF8miRvPF/wmmK5+OJOgVh3AVTT3RP2mH3+FOaxlE5v72owk0A=="], - "@hono/node-server": ["@hono/node-server@1.19.14", "", { "peerDependencies": { "hono": "^4" } }, "sha512-GwtvgtXxnWsucXvbQXkRgqksiH2Qed37H9xHZocE5sA3N8O8O8/8FA3uclQXxXVzc9XBZuEOMK7+r02FmSpHtw=="], - - "@huggingface/jinja": ["@huggingface/jinja@0.5.9", "", {}, "sha512-uWTG+l3VJRsl7EXxYizuL3P+cCPoc3cRqbWWRcQN0FhejRfbdq0RNhCmbY/YDtnTcz9icdLYuLDjsnz4d8JMuw=="], - - "@huggingface/tokenizers": ["@huggingface/tokenizers@0.1.3", "", {}, "sha512-8rF/RRT10u+kn7YuUbUg0OF30K8rjTc78aHpxT+qJ1uWSqxT1MHi8+9ltwYfkFYJzT/oS+qw3JVfHtNMGAdqyA=="], - - "@huggingface/transformers": ["@huggingface/transformers@4.2.0", "", { "dependencies": { "@huggingface/jinja": "^0.5.6", "@huggingface/tokenizers": "^0.1.3", "onnxruntime-node": "1.24.3", "onnxruntime-web": "1.26.0-dev.20260416-b7804b056c", "sharp": "^0.34.5" } }, "sha512-8BRCoBMH0XsWaEIamuR0LrJGAfftgHAfb2Vrffy0VKlSAE/MnUJ5/h/zTfEP3fDIft+nk7TqB8xXEyABGitBjQ=="], - "@humanfs/core": ["@humanfs/core@0.19.2", "", { "dependencies": { "@humanfs/types": "^0.15.0" } }, "sha512-UhXNm+CFMWcbChXywFwkmhqjs3PRCmcSa/hfBgLIb7oQ5HNb1wS0icWsGtSAUNgefHeI+eBrA8I1fxmbHsGdvA=="], "@humanfs/node": ["@humanfs/node@0.16.8", "", { "dependencies": { "@humanfs/core": "^0.19.2", "@humanfs/types": "^0.15.0", "@humanwhocodes/retry": "^0.4.0" } }, "sha512-gE1eQNZ3R++kTzFUpdGlpmy8kDZD/MLyHqDwqjkVQI0JMdI1D51sy1H958PNXYkM2rAac7e5/CnIKZrHtPh3BQ=="], @@ -93,136 +68,18 @@ "@humanwhocodes/retry": ["@humanwhocodes/retry@0.4.3", "", {}, "sha512-bV0Tgo9K4hfPCek+aMAn81RppFKv2ySDQeMoSZuvTASywNTnVJCArCZE2FWqpvIatKu7VMRLWlR1EazvVhDyhQ=="], - "@img/colour": ["@img/colour@1.1.0", "", {}, "sha512-Td76q7j57o/tLVdgS746cYARfSyxk8iEfRxewL9h4OMzYhbW4TAcppl0mT4eyqXddh6L/jwoM75mo7ixa/pCeQ=="], - - "@img/sharp-darwin-arm64": ["@img/sharp-darwin-arm64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-darwin-arm64": "1.2.4" }, "os": "darwin", "cpu": "arm64" }, "sha512-imtQ3WMJXbMY4fxb/Ndp6HBTNVtWCUI0WdobyheGf5+ad6xX8VIDO8u2xE4qc/fr08CKG/7dDseFtn6M6g/r3w=="], - - "@img/sharp-darwin-x64": ["@img/sharp-darwin-x64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-darwin-x64": "1.2.4" }, "os": "darwin", "cpu": "x64" }, "sha512-YNEFAF/4KQ/PeW0N+r+aVVsoIY0/qxxikF2SWdp+NRkmMB7y9LBZAVqQ4yhGCm/H3H270OSykqmQMKLBhBJDEw=="], - - "@img/sharp-libvips-darwin-arm64": ["@img/sharp-libvips-darwin-arm64@1.2.4", "", { "os": "darwin", "cpu": "arm64" }, "sha512-zqjjo7RatFfFoP0MkQ51jfuFZBnVE2pRiaydKJ1G/rHZvnsrHAOcQALIi9sA5co5xenQdTugCvtb1cuf78Vf4g=="], - - "@img/sharp-libvips-darwin-x64": ["@img/sharp-libvips-darwin-x64@1.2.4", "", { "os": "darwin", "cpu": "x64" }, "sha512-1IOd5xfVhlGwX+zXv2N93k0yMONvUlANylbJw1eTah8K/Jtpi15KC+WSiaX/nBmbm2HxRM1gZ0nSdjSsrZbGKg=="], - - "@img/sharp-libvips-linux-arm": ["@img/sharp-libvips-linux-arm@1.2.4", "", { "os": "linux", "cpu": "arm" }, "sha512-bFI7xcKFELdiNCVov8e44Ia4u2byA+l3XtsAj+Q8tfCwO6BQ8iDojYdvoPMqsKDkuoOo+X6HZA0s0q11ANMQ8A=="], - - "@img/sharp-libvips-linux-arm64": ["@img/sharp-libvips-linux-arm64@1.2.4", "", { "os": "linux", "cpu": "arm64" }, "sha512-excjX8DfsIcJ10x1Kzr4RcWe1edC9PquDRRPx3YVCvQv+U5p7Yin2s32ftzikXojb1PIFc/9Mt28/y+iRklkrw=="], - - "@img/sharp-libvips-linux-ppc64": ["@img/sharp-libvips-linux-ppc64@1.2.4", "", { "os": "linux", "cpu": "ppc64" }, "sha512-FMuvGijLDYG6lW+b/UvyilUWu5Ayu+3r2d1S8notiGCIyYU/76eig1UfMmkZ7vwgOrzKzlQbFSuQfgm7GYUPpA=="], - - "@img/sharp-libvips-linux-riscv64": ["@img/sharp-libvips-linux-riscv64@1.2.4", "", { "os": "linux", "cpu": "none" }, "sha512-oVDbcR4zUC0ce82teubSm+x6ETixtKZBh/qbREIOcI3cULzDyb18Sr/Wcyx7NRQeQzOiHTNbZFF1UwPS2scyGA=="], - - "@img/sharp-libvips-linux-s390x": ["@img/sharp-libvips-linux-s390x@1.2.4", "", { "os": "linux", "cpu": "s390x" }, "sha512-qmp9VrzgPgMoGZyPvrQHqk02uyjA0/QrTO26Tqk6l4ZV0MPWIW6LTkqOIov+J1yEu7MbFQaDpwdwJKhbJvuRxQ=="], - - "@img/sharp-libvips-linux-x64": ["@img/sharp-libvips-linux-x64@1.2.4", "", { "os": "linux", "cpu": "x64" }, "sha512-tJxiiLsmHc9Ax1bz3oaOYBURTXGIRDODBqhveVHonrHJ9/+k89qbLl0bcJns+e4t4rvaNBxaEZsFtSfAdquPrw=="], - - "@img/sharp-libvips-linuxmusl-arm64": ["@img/sharp-libvips-linuxmusl-arm64@1.2.4", "", { "os": "linux", "cpu": "arm64" }, "sha512-FVQHuwx1IIuNow9QAbYUzJ+En8KcVm9Lk5+uGUQJHaZmMECZmOlix9HnH7n1TRkXMS0pGxIJokIVB9SuqZGGXw=="], - - "@img/sharp-libvips-linuxmusl-x64": ["@img/sharp-libvips-linuxmusl-x64@1.2.4", "", { "os": "linux", "cpu": "x64" }, "sha512-+LpyBk7L44ZIXwz/VYfglaX/okxezESc6UxDSoyo2Ks6Jxc4Y7sGjpgU9s4PMgqgjj1gZCylTieNamqA1MF7Dg=="], - - "@img/sharp-linux-arm": ["@img/sharp-linux-arm@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-arm": "1.2.4" }, "os": "linux", "cpu": "arm" }, "sha512-9dLqsvwtg1uuXBGZKsxem9595+ujv0sJ6Vi8wcTANSFpwV/GONat5eCkzQo/1O6zRIkh0m/8+5BjrRr7jDUSZw=="], - - "@img/sharp-linux-arm64": ["@img/sharp-linux-arm64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-arm64": "1.2.4" }, "os": "linux", "cpu": "arm64" }, "sha512-bKQzaJRY/bkPOXyKx5EVup7qkaojECG6NLYswgktOZjaXecSAeCWiZwwiFf3/Y+O1HrauiE3FVsGxFg8c24rZg=="], - - "@img/sharp-linux-ppc64": ["@img/sharp-linux-ppc64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-ppc64": "1.2.4" }, "os": "linux", "cpu": "ppc64" }, "sha512-7zznwNaqW6YtsfrGGDA6BRkISKAAE1Jo0QdpNYXNMHu2+0dTrPflTLNkpc8l7MUP5M16ZJcUvysVWWrMefZquA=="], - - "@img/sharp-linux-riscv64": ["@img/sharp-linux-riscv64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-riscv64": "1.2.4" }, "os": "linux", "cpu": "none" }, "sha512-51gJuLPTKa7piYPaVs8GmByo7/U7/7TZOq+cnXJIHZKavIRHAP77e3N2HEl3dgiqdD/w0yUfiJnII77PuDDFdw=="], - - "@img/sharp-linux-s390x": ["@img/sharp-linux-s390x@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-s390x": "1.2.4" }, "os": "linux", "cpu": "s390x" }, "sha512-nQtCk0PdKfho3eC5MrbQoigJ2gd1CgddUMkabUj+rBevs8tZ2cULOx46E7oyX+04WGfABgIwmMC0VqieTiR4jg=="], - - "@img/sharp-linux-x64": ["@img/sharp-linux-x64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-x64": "1.2.4" }, "os": "linux", "cpu": "x64" }, "sha512-MEzd8HPKxVxVenwAa+JRPwEC7QFjoPWuS5NZnBt6B3pu7EG2Ge0id1oLHZpPJdn3OQK+BQDiw9zStiHBTJQQQQ=="], - - "@img/sharp-linuxmusl-arm64": ["@img/sharp-linuxmusl-arm64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linuxmusl-arm64": "1.2.4" }, "os": "linux", "cpu": "arm64" }, "sha512-fprJR6GtRsMt6Kyfq44IsChVZeGN97gTD331weR1ex1c1rypDEABN6Tm2xa1wE6lYb5DdEnk03NZPqA7Id21yg=="], - - "@img/sharp-linuxmusl-x64": ["@img/sharp-linuxmusl-x64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linuxmusl-x64": "1.2.4" }, "os": "linux", "cpu": "x64" }, "sha512-Jg8wNT1MUzIvhBFxViqrEhWDGzqymo3sV7z7ZsaWbZNDLXRJZoRGrjulp60YYtV4wfY8VIKcWidjojlLcWrd8Q=="], - - "@img/sharp-wasm32": ["@img/sharp-wasm32@0.34.5", "", { "dependencies": { "@emnapi/runtime": "^1.7.0" }, "cpu": "none" }, "sha512-OdWTEiVkY2PHwqkbBI8frFxQQFekHaSSkUIJkwzclWZe64O1X4UlUjqqqLaPbUpMOQk6FBu/HtlGXNblIs0huw=="], - - "@img/sharp-win32-arm64": ["@img/sharp-win32-arm64@0.34.5", "", { "os": "win32", "cpu": "arm64" }, "sha512-WQ3AgWCWYSb2yt+IG8mnC6Jdk9Whs7O0gxphblsLvdhSpSTtmu69ZG1Gkb6NuvxsNACwiPV6cNSZNzt0KPsw7g=="], - - "@img/sharp-win32-ia32": ["@img/sharp-win32-ia32@0.34.5", "", { "os": "win32", "cpu": "ia32" }, "sha512-FV9m/7NmeCmSHDD5j4+4pNI8Cp3aW+JvLoXcTUo0IqyjSfAZJ8dIUmijx1qaJsIiU+Hosw6xM5KijAWRJCSgNg=="], - - "@img/sharp-win32-x64": ["@img/sharp-win32-x64@0.34.5", "", { "os": "win32", "cpu": "x64" }, "sha512-+29YMsqY2/9eFEiW93eqWnuLcWcufowXewwSNIT6UwZdUUCrM3oFjMWH/Z6/TMmb4hlFenmfAVbpWeup2jryCw=="], - - "@jridgewell/gen-mapping": ["@jridgewell/gen-mapping@0.3.13", "", { "dependencies": { "@jridgewell/sourcemap-codec": "^1.5.0", "@jridgewell/trace-mapping": "^0.3.24" } }, "sha512-2kkt/7niJ6MgEPxF0bYdQ6etZaA+fQvDcLKckhy1yIQOzaoKjBBjSj63/aLVjYE3qhRt5dvM+uUyfCg6UKCBbA=="], - - "@jridgewell/resolve-uri": ["@jridgewell/resolve-uri@3.1.2", "", {}, "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw=="], - "@jridgewell/sourcemap-codec": ["@jridgewell/sourcemap-codec@1.5.5", "", {}, "sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og=="], - "@jridgewell/trace-mapping": ["@jridgewell/trace-mapping@0.3.31", "", { "dependencies": { "@jridgewell/resolve-uri": "^3.1.0", "@jridgewell/sourcemap-codec": "^1.4.14" } }, "sha512-zzNR+SdQSDJzc8joaeP8QQoCQr8NuYx2dIIytl1QeBEZHJ9uW6hebsrYgbz8hJwUQao3TWCMtmfV8Nu1twOLAw=="], - - "@kreuzberg/tree-sitter-language-pack": ["@kreuzberg/tree-sitter-language-pack@1.8.1", "", { "peerDependencies": { "tree-sitter": "^0.25.0" }, "optionalPeers": ["tree-sitter"] }, "sha512-24qZIP3CvDUD/XokRJPc7rD82M4RJZHn4GxMox35Iicye/q2NLiEkjiE6BSeyMltBHXoVCXFkzDDgVCXrQhmMQ=="], - - "@modelcontextprotocol/sdk": ["@modelcontextprotocol/sdk@1.29.0", "", { "dependencies": { "@hono/node-server": "^1.19.9", "ajv": "^8.17.1", "ajv-formats": "^3.0.1", "content-type": "^1.0.5", "cors": "^2.8.5", "cross-spawn": "^7.0.5", "eventsource": "^3.0.2", "eventsource-parser": "^3.0.0", "express": "^5.2.1", "express-rate-limit": "^8.2.1", "hono": "^4.11.4", "jose": "^6.1.3", "json-schema-typed": "^8.0.2", "pkce-challenge": "^5.0.0", "raw-body": "^3.0.0", "zod": "^3.25 || ^4.0", "zod-to-json-schema": "^3.25.1" }, "peerDependencies": { "@cfworker/json-schema": "^4.1.1" }, "optionalPeers": ["@cfworker/json-schema"] }, "sha512-zo37mZA9hJWpULgkRpowewez1y6ML5GsXJPY8FI0tBBCd77HEvza4jDqRKOXgHNn867PVGCyTdzqpz0izu5ZjQ=="], - - "@napi-rs/wasm-runtime": ["@napi-rs/wasm-runtime@1.1.5", "", { "dependencies": { "@tybys/wasm-util": "^0.10.2" }, "peerDependencies": { "@emnapi/core": "^1.7.1", "@emnapi/runtime": "^1.7.1" } }, "sha512-AWPoBRJ9tsnVhor4sjO7rkni+7p+2IAEFj6cx06UgP10jkQHqay/36uRV/bFkgrh18D9vb4cr8Q0Pthskgzy+Q=="], - "@ota-meshi/ast-token-store": ["@ota-meshi/ast-token-store@0.3.0", "", {}, "sha512-XRO0zi2NIUKq2lUk3T1ecFSld1fMWRKE6naRFGkgkdeosx7IslyUKNv5Dcb5PJTja9tHJoFu0v/7yEpAkrkrTg=="], - "@oxc-project/types": ["@oxc-project/types@0.127.0", "", {}, "sha512-aIYXQBo4lCbO4z0R3FHeucQHpF46l2LbMdxRvqvuRuW2OxdnSkcng5B8+K12spgLDj93rtN3+J2Vac/TIO+ciQ=="], - "@pkgr/core": ["@pkgr/core@0.3.6", "", {}, "sha512-SEeaJLb3qBNF/OaXnaR1NmmBbFYk1zC0ZH/52fATcRPLFg/p791YrcyFFy44Bo9sLaGuSuLp5Q6axbb/O+v/RA=="], "@pleaseai/eslint-config": ["@pleaseai/eslint-config@0.0.4", "", { "dependencies": { "@antfu/eslint-config": "^8.0.0", "eslint-plugin-package-json": "^0.91.0" }, "peerDependencies": { "eslint": "^9.10.0 || ^10.0.0" } }, "sha512-FWzFoZfz1Di/BCT+G9HZpyWcOT4L3lcu9jWRfOWTjRsCkEtbUVBbgp9Wa8+C5lumGq5S5ecAKyDbfAb6xd+w0Q=="], - "@protobufjs/aspromise": ["@protobufjs/aspromise@1.1.2", "", {}, "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ=="], - - "@protobufjs/base64": ["@protobufjs/base64@1.1.2", "", {}, "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg=="], - - "@protobufjs/codegen": ["@protobufjs/codegen@2.0.5", "", {}, "sha512-zgXFLzW3Ap33e6d0Wlj4MGIm6Ce8O89n/apUaGNB/jx+hw+ruWEp7EwGUshdLKVRCxZW12fp9r40E1mQrf/34g=="], - - "@protobufjs/eventemitter": ["@protobufjs/eventemitter@1.1.1", "", {}, "sha512-vW1GmwMZNnL+gMRaovlh9yZX74kc+TTU3FObkkurpMaRtBfLP3ldjS9KQWlwZgraRE0+dheEEoAxdzcJQ8eXZg=="], - - "@protobufjs/fetch": ["@protobufjs/fetch@1.1.1", "", { "dependencies": { "@protobufjs/aspromise": "^1.1.1" } }, "sha512-GpptLrs57adMSuHi3VNj0mAF8dwh36LMaYF6XyJ6JMWlVsc+t42tm1HSEDmOs3A8fC9yyeisgLhsTVQokOZ0zw=="], - - "@protobufjs/float": ["@protobufjs/float@1.0.2", "", {}, "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ=="], - - "@protobufjs/path": ["@protobufjs/path@1.1.2", "", {}, "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA=="], - - "@protobufjs/pool": ["@protobufjs/pool@1.1.0", "", {}, "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw=="], - - "@protobufjs/utf8": ["@protobufjs/utf8@1.1.1", "", {}, "sha512-oOAWABowe8EAbMyWKM0tYDKi8Yaox52D+HWZhAIJqQXbqe0xI/GV7FhLWqlEKreMkfDjshR5FKgi3mnle0h6Eg=="], - - "@quansync/fs": ["@quansync/fs@1.0.0", "", { "dependencies": { "quansync": "^1.0.0" } }, "sha512-4TJ3DFtlf1L5LDMaM6CanJ/0lckGNtJcMjQ1NAV6zDmA0tEHKZtxNKin8EgPaVX1YzljbxckyT2tJrpQKAtngQ=="], - - "@rolldown/binding-android-arm64": ["@rolldown/binding-android-arm64@1.0.0-rc.17", "", { "os": "android", "cpu": "arm64" }, "sha512-s70pVGhw4zqGeFnXWvAzJDlvxhlRollagdCCKRgOsgUOH3N1l0LIxf83AtGzmb5SiVM4Hjl5HyarMRfdfj3DaQ=="], - - "@rolldown/binding-darwin-arm64": ["@rolldown/binding-darwin-arm64@1.0.0-rc.17", "", { "os": "darwin", "cpu": "arm64" }, "sha512-4ksWc9n0mhlZpZ9PMZgTGjeOPRu8MB1Z3Tz0Mo02eWfWCHMW1zN82Qz/pL/rC+yQa+8ZnutMF0JjJe7PjwasYw=="], - - "@rolldown/binding-darwin-x64": ["@rolldown/binding-darwin-x64@1.0.0-rc.17", "", { "os": "darwin", "cpu": "x64" }, "sha512-SUSDOI6WwUVNcWxd02QEBjLdY1VPHvlEkw6T/8nYG322iYWCTxRb1vzk4E+mWWYehTp7ERibq54LSJGjmouOsw=="], - - "@rolldown/binding-freebsd-x64": ["@rolldown/binding-freebsd-x64@1.0.0-rc.17", "", { "os": "freebsd", "cpu": "x64" }, "sha512-hwnz3nw9dbJ05EDO/PvcjaaewqqDy7Y1rn1UO81l8iIK1GjenME75dl16ajbvSSMfv66WXSRCYKIqfgq2KCfxw=="], - - "@rolldown/binding-linux-arm-gnueabihf": ["@rolldown/binding-linux-arm-gnueabihf@1.0.0-rc.17", "", { "os": "linux", "cpu": "arm" }, "sha512-IS+W7epTcwANmFSQFrS1SivEXHtl1JtuQA9wlxrZTcNi6mx+FDOYrakGevvvTwgj2JvWiK8B29/qD9BELZPyXQ=="], - - "@rolldown/binding-linux-arm64-gnu": ["@rolldown/binding-linux-arm64-gnu@1.0.0-rc.17", "", { "os": "linux", "cpu": "arm64" }, "sha512-e6usGaHKW5BMNZOymS1UcEYGowQMWcgZ71Z17Sl/h2+ZziNJ1a9n3Zvcz6LdRyIW5572wBCTH/Z+bKuZouGk9Q=="], - - "@rolldown/binding-linux-arm64-musl": ["@rolldown/binding-linux-arm64-musl@1.0.0-rc.17", "", { "os": "linux", "cpu": "arm64" }, "sha512-b/CgbwAJpmrRLp02RPfhbudf5tZnN9nsPWK82znefso832etkem8H7FSZwxrOI9djcdTP7U6YfNhbRnh7djErg=="], - - "@rolldown/binding-linux-ppc64-gnu": ["@rolldown/binding-linux-ppc64-gnu@1.0.0-rc.17", "", { "os": "linux", "cpu": "ppc64" }, "sha512-4EII1iNGRUN5WwGbF/kOh/EIkoDN9HsupgLQoXfY+D1oyJm7/F4t5PYU5n8SWZgG0FEwakyM8pGgwcBYruGTlA=="], - - "@rolldown/binding-linux-s390x-gnu": ["@rolldown/binding-linux-s390x-gnu@1.0.0-rc.17", "", { "os": "linux", "cpu": "s390x" }, "sha512-AH8oq3XqQo4IibpVXvPeLDI5pzkpYn0WiZAfT05kFzoJ6tQNzwRdDYQ45M8I/gslbodRZwW8uxLhbSBbkv96rA=="], - - "@rolldown/binding-linux-x64-gnu": ["@rolldown/binding-linux-x64-gnu@1.0.0-rc.17", "", { "os": "linux", "cpu": "x64" }, "sha512-cLnjV3xfo7KslbU41Z7z8BH/E1y5mzUYzAqih1d1MDaIGZRCMqTijqLv76/P7fyHuvUcfGsIpqCdddbxLLK9rA=="], - - "@rolldown/binding-linux-x64-musl": ["@rolldown/binding-linux-x64-musl@1.0.0-rc.17", "", { "os": "linux", "cpu": "x64" }, "sha512-0phclDw1spsL7dUB37sIARuis2tAgomCJXAHZlpt8PXZ4Ba0dRP1e+66lsRqrfhISeN9bEGNjQs+T/Fbd7oYGw=="], - - "@rolldown/binding-openharmony-arm64": ["@rolldown/binding-openharmony-arm64@1.0.0-rc.17", "", { "os": "none", "cpu": "arm64" }, "sha512-0ag/hEgXOwgw4t8QyQvUCxvEg+V0KBcA6YuOx9g0r02MprutRF5dyljgm3EmR02O292UX7UeS6HzWHAl6KgyhA=="], - - "@rolldown/binding-wasm32-wasi": ["@rolldown/binding-wasm32-wasi@1.0.0-rc.17", "", { "dependencies": { "@emnapi/core": "1.10.0", "@emnapi/runtime": "1.10.0", "@napi-rs/wasm-runtime": "^1.1.4" }, "cpu": "none" }, "sha512-LEXei6vo0E5wTGwpkJ4KoT3OZJRnglwldt5ziLzOlc6qqb55z4tWNq2A+PFqCJuvWWdP53CVhG1Z9NtToDPJrA=="], - - "@rolldown/binding-win32-arm64-msvc": ["@rolldown/binding-win32-arm64-msvc@1.0.0-rc.17", "", { "os": "win32", "cpu": "arm64" }, "sha512-gUmyzBl3SPMa6hrqFUth9sVfcLBlYsbMzBx5PlexMroZStgzGqlZ26pYG89rBb45Mnia+oil6YAIFeEWGWhoZA=="], - - "@rolldown/binding-win32-x64-msvc": ["@rolldown/binding-win32-x64-msvc@1.0.0-rc.17", "", { "os": "win32", "cpu": "x64" }, "sha512-3hkiolcUAvPB9FLb3UZdfjVVNWherN1f/skkGWJP/fgSQhYUZpSIRr0/I8ZK9TkF3F7kxvJAk0+IcKvPHk9qQg=="], - - "@rolldown/pluginutils": ["@rolldown/pluginutils@1.0.0-rc.17", "", {}, "sha512-n8iosDOt6Ig1UhJ2AYqoIhHWh/isz0xpicHTzpKBeotdVsTEcxsSA/i3EVM7gQAj0rU27OLAxCjzlj15IWY7bg=="], - "@sindresorhus/base62": ["@sindresorhus/base62@1.0.0", "", {}, "sha512-TeheYy0ILzBEI/CO55CP6zJCSdSWeRtGnHy8U8dWSUH4I68iqTsy7HkMktR4xakThc9jotkPQUXT4ITdbV7cHA=="], "@stylistic/eslint-plugin": ["@stylistic/eslint-plugin@5.10.0", "", { "dependencies": { "@eslint-community/eslint-utils": "^4.9.1", "@typescript-eslint/types": "^8.56.0", "eslint-visitor-keys": "^4.2.1", "espree": "^10.4.0", "estraverse": "^5.3.0", "picomatch": "^4.0.3" }, "peerDependencies": { "eslint": "^9.0.0 || ^10.0.0" } }, "sha512-nPK52ZHvot8Ju/0A4ucSX1dcPV2/1clx0kLcH5wDmrE4naKso7TUC/voUyU1O9OTKTrR6MYip6LP0ogEMQ9jPQ=="], - "@tybys/wasm-util": ["@tybys/wasm-util@0.10.2", "", { "dependencies": { "tslib": "^2.4.0" } }, "sha512-RoBvJ2X0wuKlWFIjrwffGw1IqZHKQqzIchKaadZZfnNpsAYp2mM0h36JtPCjNDAHGgYez/15uMBpfGwchhiMgg=="], - "@types/bun": ["@types/bun@1.3.14", "", { "dependencies": { "bun-types": "1.3.14" } }, "sha512-h1hFqFVcvAvD9j9K7ZW7vd82aSA+rTdznZa+5bwvCwqSB1jmmfLcbIWhOLx1/+boy/xmjgCs/OMUL8hRJSmnPw=="], "@types/debug": ["@types/debug@4.1.13", "", { "dependencies": { "@types/ms": "*" } }, "sha512-KSVgmQmzMwPlmtljOomayoR89W4FynCAi3E8PPs7vmDVPe84hT+vGPKkJfThkmXs0x0jAaa9U8uW8bbfyS2fWw=="], @@ -233,8 +90,6 @@ "@types/hast": ["@types/hast@3.0.4", "", { "dependencies": { "@types/unist": "*" } }, "sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ=="], - "@types/jsesc": ["@types/jsesc@2.5.1", "", {}, "sha512-9VN+6yxLOPLOav+7PwjZbxiID2bVaeq0ED4qSQmdQTdjnXJSaCVKTR58t15oqH1H5t8Ng2ZX1SabJVoN9Q34bw=="], - "@types/json-schema": ["@types/json-schema@7.0.15", "", {}, "sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA=="], "@types/katex": ["@types/katex@0.16.8", "", {}, "sha512-trgaNyfU+Xh2Tc+ABIb44a5AYUpicB3uwirOioeOkNPPbmgRNtcWyDeeFRzjPZENO9Vq8gvVqfhaaXWLlevVwg=="], @@ -281,36 +136,22 @@ "@vue/shared": ["@vue/shared@3.5.38", "", {}, "sha512-FTW0AFZNaK5/mOqvGBwVfUlNLU38TiQn4+DQgIFUnrBBJQ1crMJ82yeGQLV5jyKFsO8yRukpbuP7x+nRbH6aug=="], - "accepts": ["accepts@2.0.0", "", { "dependencies": { "mime-types": "^3.0.0", "negotiator": "^1.0.0" } }, "sha512-5cvg6CtKwfgdmVqY1WIiXKc3Q1bkRqGLi+2W/6ao+6Y7gu/RCwRuAhGEzh5B4KlszSuTLgZYuqFqo5bImjNKng=="], - "acorn": ["acorn@8.17.0", "", { "bin": { "acorn": "bin/acorn" } }, "sha512-xRQbDb9BnwDafYNn6Vwl839DYVjqXYb1XVGtWAZ1kcDc6iwAL4hg3B1dZlRiuENFeO2H53gFG3in621AdERVAg=="], "acorn-jsx": ["acorn-jsx@5.3.2", "", { "peerDependencies": { "acorn": "^6.0.0 || ^7.0.0 || ^8.0.0" } }, "sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ=="], - "adm-zip": ["adm-zip@0.5.17", "", {}, "sha512-+Ut8d9LLqwEvHHJl1+PIHqoyDxFgVN847JTVM3Izi3xHDWPE4UtzzXysMZQs64DMcrJfBeS/uoEP4AD3HQHnQQ=="], - "ajv": ["ajv@6.15.0", "", { "dependencies": { "fast-deep-equal": "^3.1.1", "fast-json-stable-stringify": "^2.0.0", "json-schema-traverse": "^0.4.1", "uri-js": "^4.2.2" } }, "sha512-fgFx7Hfoq60ytK2c7DhnF8jIvzYgOMxfugjLOSMHjLIPgenqa7S7oaagATUq99mV6IYvN2tRmC0wnTYX6iPbMw=="], - "ajv-formats": ["ajv-formats@3.0.1", "", { "dependencies": { "ajv": "^8.0.0" } }, "sha512-8iUql50EUR+uUcdRQ3HDqa6EVyo3docL8g5WJ3FNcWmu62IbkGUue/pEyLBW8VGKKucTPgqeks4fIU1DA4yowQ=="], - "ansis": ["ansis@4.3.1", "", {}, "sha512-BJ8/l4R5LRE7hW9WdSuGYrLSHi2ynxeFpDFbH0K/CgNeY/tyhk+vO6TYxXC5r5CpUhNVX310xzPsN/H9lCdfOA=="], "are-docs-informative": ["are-docs-informative@0.0.2", "", {}, "sha512-ixiS0nLNNG5jNQzgZJNoUpBKdo9yTYZMGJ+QgT2jmjR7G7+QHRCc4v6LQ3NgE7EBJq+o0ams3waJwkrlBom8Ig=="], - "ast-kit": ["ast-kit@3.0.0", "", { "dependencies": { "@babel/parser": "^8.0.0", "estree-walker": "^3.0.3", "pathe": "^2.0.3" } }, "sha512-8OG92q3R35qjC/4i6BLBMg8IB+fClWu/1PEwg2Z9Rn+BuNaiEgJzpzn+pxWOdHJWDCAwu2JP0wCDTozAM4QirQ=="], - "balanced-match": ["balanced-match@4.0.4", "", {}, "sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA=="], "baseline-browser-mapping": ["baseline-browser-mapping@2.10.37", "", { "bin": { "baseline-browser-mapping": "dist/cli.cjs" } }, "sha512-girxaJ7WZssDOFhzCGZTDKoTa1gk6A1TbflaYTpykLJ4UU9Fz9kx1aREM8JCuoVHbL8X8T/mJg7w2oYSq72Oig=="], - "birpc": ["birpc@4.0.0", "", {}, "sha512-LShSxJP0KTmd101b6DRyGBj57LZxSDYWKitQNW/mi8GRMvZb078Uf9+pveax1DrVL89vm7mWe+TovdI/UDOuPw=="], - - "body-parser": ["body-parser@2.3.0", "", { "dependencies": { "bytes": "^3.1.2", "content-type": "^2.0.0", "debug": "^4.4.3", "http-errors": "^2.0.1", "iconv-lite": "^0.7.2", "on-finished": "^2.4.1", "qs": "^6.15.2", "raw-body": "^3.0.2", "type-is": "^2.1.0" } }, "sha512-2cGmJupaNgg+QUwVLAucDuWuoMZ6EX9iHDRswZ5lsNYEmwPaRknMPCLZz07yTzVq/83p4o/wzbDZbBrTvGGTIw=="], - "boolbase": ["boolbase@1.0.0", "", {}, "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww=="], - "boolean": ["boolean@3.2.0", "", {}, "sha512-d0II/GO9uf9lfUHH2BQsjxzRJZBdsjgsBiW4BvhWk/3qoKwQFjIDVN19PfX8F2D/r9PCMTtLWjYVCFrpeYUzsw=="], - "brace-expansion": ["brace-expansion@5.0.6", "", { "dependencies": { "balanced-match": "^4.0.2" } }, "sha512-kLpxurY4Z4r9sgMsyG0Z9uzsBlgiU/EFKhj/h91/8yHu0edo7XuixOIH3VcJ8kkxs6/jPzoI6U9Vj3WqbMQ94g=="], "browserslist": ["browserslist@4.28.2", "", { "dependencies": { "baseline-browser-mapping": "^2.10.12", "caniuse-lite": "^1.0.30001782", "electron-to-chromium": "^1.5.328", "node-releases": "^2.0.36", "update-browserslist-db": "^1.2.3" }, "bin": { "browserslist": "cli.js" } }, "sha512-48xSriZYYg+8qXna9kwqjIVzuQxi+KYWp2+5nCYnYKPTr0LvD89Jqk2Or5ogxz0NUMfIjhh2lIUX/LyX9B4oIg=="], @@ -319,14 +160,8 @@ "bun-types": ["bun-types@1.3.14", "", { "dependencies": { "@types/node": "*" } }, "sha512-4N0ig0fEomHt5R0KCFWjovxow98rIoRwKolrYdCcknNwMekCXRnWEUvgu5soYV8QXtVsrUD8B95MBOZGPvr6KQ=="], - "bytes": ["bytes@3.1.2", "", {}, "sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg=="], - "cac": ["cac@7.0.0", "", {}, "sha512-tixWYgm5ZoOD+3g6UTea91eow5z6AAHaho3g0V9CNSNb45gM8SmflpAc+GRd1InC4AqN/07Unrgp56Y94N9hJQ=="], - "call-bind-apply-helpers": ["call-bind-apply-helpers@1.0.2", "", { "dependencies": { "es-errors": "^1.3.0", "function-bind": "^1.1.2" } }, "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ=="], - - "call-bound": ["call-bound@1.0.4", "", { "dependencies": { "call-bind-apply-helpers": "^1.0.2", "get-intrinsic": "^1.3.0" } }, "sha512-+ys997U96po4Kx/ABpBCqhA9EuxJaQWDQg7295H4hBphv3IZg0boBKuwYpt4YXp6MZ5AmZQnU/tyMTlRpaSejg=="], - "caniuse-lite": ["caniuse-lite@1.0.30001799", "", {}, "sha512-hG1bReV+OUU+MOqK4t/ZWI0tZOyz3rqS9XuhOUz1cIcbwBKjOyJEJuw9ER5JuNyqxNk8u/JUVbGibBOL1yrjFw=="], "ccount": ["ccount@2.0.1", "", {}, "sha512-eyrF0jiFpY+3drT6383f1qhkbGsLSifNAjA61IUjZjmLCWjItY6LB9ft9YhoDgwfmclB2zhu51Lc7+95b8NRAg=="], @@ -335,30 +170,18 @@ "character-entities": ["character-entities@2.0.2", "", {}, "sha512-shx7oQ0Awen/BRIdkjkvz54PnEEI/EjwXDSIZp86/KKdbafHh1Df/RYGBhn4hbe2+uKC9FnT5UCEdyPz3ai9hQ=="], - "chokidar": ["chokidar@5.0.0", "", { "dependencies": { "readdirp": "^5.0.0" } }, "sha512-TQMmc3w+5AxjpL8iIiwebF73dRDF4fBIieAqGn9RGCWaEVwQ6Fb2cGe31Yns0RRIzii5goJ1Y7xbMwo1TxMplw=="], - "ci-info": ["ci-info@4.4.0", "", {}, "sha512-77PSwercCZU2Fc4sX94eF8k8Pxte6JAwL4/ICZLFjJLqegs7kCuAsqqj/70NQF6TvDpgFjkubQB2FW2ZZddvQg=="], "clean-regexp": ["clean-regexp@1.0.0", "", { "dependencies": { "escape-string-regexp": "^1.0.5" } }, "sha512-GfisEZEJvzKrmGWkvfhgzcz/BllN1USeqD2V6tg14OAOgaCD2Z/PUEuxnAZ/nPvmaHRG7a8y77p1T/IRQ4D1Hw=="], - "commander": ["commander@14.0.3", "", {}, "sha512-H+y0Jo/T1RZ9qPP4Eh1pkcQcLRglraJaSLoyOtHxu6AapkjWVCy2Sit1QQ4x3Dng8qDlSsZEet7g5Pq06MvTgw=="], + "commander": ["commander@8.3.0", "", {}, "sha512-OkTL9umf+He2DZkUq8f8J9of7yL6RJKI24dVITBmNfZBmri9zYZQrKkuXiKhyfPSu8tUhnVBB1iKXevvnlR4Ww=="], "comment-parser": ["comment-parser@1.4.6", "", {}, "sha512-ObxuY6vnbWTN6Od72xfwN9DbzC7Y2vv8u1Soi9ahRKL37gb6y1qk6/dgjs+3JWuXJHWvsg3BXIwzd/rkmAwavg=="], "confbox": ["confbox@0.2.4", "", {}, "sha512-ysOGlgTFbN2/Y6Cg3Iye8YKulHw+R2fNXHrgSmXISQdMnomY6eNDprVdW9R5xBguEqI954+S6709UyiO7B+6OQ=="], - "content-disposition": ["content-disposition@1.1.0", "", {}, "sha512-5jRCH9Z/+DRP7rkvY83B+yGIGX96OYdJmzngqnw2SBSxqCFPd0w2km3s5iawpGX8krnwSGmF0FW5Nhr0Hfai3g=="], - - "content-type": ["content-type@1.0.5", "", {}, "sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA=="], - - "cookie": ["cookie@0.7.2", "", {}, "sha512-yki5XnKuf750l50uGTllt6kKILY4nQ1eNIQatoXEByZ5dWgnKqbnqmTrBE5B4N7lrMJKQ2ytWMiTO2o0v6Ew/w=="], - - "cookie-signature": ["cookie-signature@1.2.2", "", {}, "sha512-D76uU73ulSXrD1UXF4KE2TMxVVwhsnCgfAyTg9k8P6KGZjlXKrOLe4dJQKI3Bxi5wjesZoFXJWElNWBjPZMbhg=="], - "core-js-compat": ["core-js-compat@3.49.0", "", { "dependencies": { "browserslist": "^4.28.1" } }, "sha512-VQXt1jr9cBz03b331DFDCCP90b3fanciLkgiOoy8SBHy06gNf+vQ1A3WFLqG7I8TipYIKeYK9wxd0tUrvHcOZA=="], - "cors": ["cors@2.8.6", "", { "dependencies": { "object-assign": "^4", "vary": "^1" } }, "sha512-tJtZBBHA6vjIAaF6EnIaq6laBBP9aq/Y3ouVJjEfoHbRBcHBAHYcMh/w8LDrk2PvIMMq8gmopa5D4V8RmbrxGw=="], - "cross-spawn": ["cross-spawn@7.0.6", "", { "dependencies": { "path-key": "^3.1.0", "shebang-command": "^2.0.0", "which": "^2.0.1" } }, "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA=="], "cssesc": ["cssesc@3.0.0", "", { "bin": { "cssesc": "bin/cssesc" } }, "sha512-/Tb/JcjK111nNScGob5MNtsntNM1aCNUDipB/TkwZFhyDrrE47SOx/18wF2bbjgc3ZzCSKW1T5nt5EbFoAz/Vg=="], @@ -369,56 +192,26 @@ "deep-is": ["deep-is@0.1.4", "", {}, "sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ=="], - "define-data-property": ["define-data-property@1.1.4", "", { "dependencies": { "es-define-property": "^1.0.0", "es-errors": "^1.3.0", "gopd": "^1.0.1" } }, "sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A=="], - - "define-properties": ["define-properties@1.2.1", "", { "dependencies": { "define-data-property": "^1.0.1", "has-property-descriptors": "^1.0.0", "object-keys": "^1.1.1" } }, "sha512-8QmQKqEASLd5nx0U1B1okLElbUuuttJ/AnYmRXbbbGDWh6uS208EjD4Xqq/I9wK7u0v6O08XhTWnt5XtEbR6Dg=="], - - "defu": ["defu@6.1.7", "", {}, "sha512-7z22QmUWiQ/2d0KkdYmANbRUVABpZ9SNYyH5vx6PZ+nE5bcC0l7uFvEfHlyld/HcGBFTL536ClDt3DEcSlEJAQ=="], - - "depd": ["depd@2.0.0", "", {}, "sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw=="], - "dequal": ["dequal@2.0.3", "", {}, "sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA=="], "detect-indent": ["detect-indent@7.0.2", "", {}, "sha512-y+8xyqdGLL+6sh0tVeHcfP/QDd8gUgbasolJJpY7NgeQGSZ739bDtSiaiDgtoicy+mtYB81dKLxO9xRhCyIB3A=="], - "detect-libc": ["detect-libc@2.1.2", "", {}, "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ=="], - "detect-newline": ["detect-newline@4.0.1", "", {}, "sha512-qE3Veg1YXzGHQhlA6jzebZN2qVf6NX+A7m7qlhCGG30dJixrAQhYOsJjsnBjJkCSmuOPpCk30145fr8FV0bzog=="], - "detect-node": ["detect-node@2.1.0", "", {}, "sha512-T0NIuQpnTvFDATNuHN5roPwSBG83rFsuO+MXXH9/3N1eFbn4wcPjttvjMLEPWJ0RGUYgQE7cGgS3tNxbqCGM7g=="], - "devlop": ["devlop@1.1.0", "", { "dependencies": { "dequal": "^2.0.0" } }, "sha512-RWmIqhcFf1lRYBvNmr7qTNuyCt/7/ns2jbpp1+PalgE/rDQcBT0fioSMUpJ93irlUhC5hrg4cYqe6U+0ImW0rA=="], "diff-sequences": ["diff-sequences@29.6.3", "", {}, "sha512-EjePK1srD3P08o2j4f0ExnylqRs5B9tJjcp9t1krH2qRi8CCdsYfwe9JgSLurFBWwq4uOlipzfk5fHNvwFKr8Q=="], - "dts-resolver": ["dts-resolver@2.1.3", "", { "peerDependencies": { "oxc-resolver": ">=11.0.0" }, "optionalPeers": ["oxc-resolver"] }, "sha512-bihc7jPC90VrosXNzK0LTE2cuLP6jr0Ro8jk+kMugHReJVLIpHz/xadeq3MhuwyO4TD4OA3L1Q8pBBFRc08Tsw=="], - - "dunder-proto": ["dunder-proto@1.0.1", "", { "dependencies": { "call-bind-apply-helpers": "^1.0.1", "es-errors": "^1.3.0", "gopd": "^1.2.0" } }, "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A=="], - - "ee-first": ["ee-first@1.1.1", "", {}, "sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow=="], - "electron-to-chromium": ["electron-to-chromium@1.5.375", "", {}, "sha512-ZWP5eB4BVPW/ZYo9252hQZHZ5XavtsTgpbhcmMmRwymavC5AsLWQWBPaKMeNd2LW0KGby5HPXvj7+sr4ta5j/Q=="], "empathic": ["empathic@2.0.1", "", {}, "sha512-YGRs8knHhKHVShLkFET/rWAU8kmHbOV5LwN938RHI0pljAJ1Gf6SzXsSmRaEzcXTtOOmVqJ5+WtQPL5uigY50Q=="], - "encodeurl": ["encodeurl@2.0.0", "", {}, "sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg=="], - "enhanced-resolve": ["enhanced-resolve@5.24.0", "", { "dependencies": { "graceful-fs": "^4.2.4", "tapable": "^2.3.3" } }, "sha512-SkE2t82KlkkxQRVMVLAGKxLfORGQfrkx5dkj+vlgXRVNEdPc4eZcR+J/Fvj8C+yKSFH5L0q3NFlyufOVQnCcYQ=="], "entities": ["entities@7.0.1", "", {}, "sha512-TWrgLOFUQTH994YUyl1yT4uyavY5nNB5muff+RtWaqNVCAK408b5ZnnbNAUEWLTCpum9w6arT70i1XdQ4UeOPA=="], - "es-define-property": ["es-define-property@1.0.1", "", {}, "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g=="], - - "es-errors": ["es-errors@1.3.0", "", {}, "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw=="], - - "es-object-atoms": ["es-object-atoms@1.1.2", "", { "dependencies": { "es-errors": "^1.3.0" } }, "sha512-HWcBoN6NileqtSydK2FqHbS/LoDd2pqrnQHLyJzBj4kOp/ky2MWMN694xOfkK8/SnUsW2DH7EfyVlydKCsm1Zw=="], - - "es6-error": ["es6-error@4.1.1", "", {}, "sha512-Um/+FxMr9CISWh0bi5Zv0iOD+4cFh5qLeks1qhAopKVAJw3drgKbKySikp7wGhDL0HPeaja0P5ULZrxLkniUVg=="], - "escalade": ["escalade@3.2.0", "", {}, "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA=="], - "escape-html": ["escape-html@1.0.3", "", {}, "sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow=="], - "escape-string-regexp": ["escape-string-regexp@4.0.0", "", {}, "sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA=="], "eslint": ["eslint@10.5.0", "", { "dependencies": { "@eslint-community/eslint-utils": "^4.8.0", "@eslint-community/regexpp": "^4.12.2", "@eslint/config-array": "^0.23.5", "@eslint/config-helpers": "^0.6.0", "@eslint/core": "^1.2.1", "@eslint/plugin-kit": "^0.7.2", "@humanfs/node": "^0.16.6", "@humanwhocodes/module-importer": "^1.0.1", "@humanwhocodes/retry": "^0.4.2", "@types/estree": "^1.0.6", "ajv": "^6.14.0", "cross-spawn": "^7.0.6", "debug": "^4.3.2", "escape-string-regexp": "^4.0.0", "eslint-scope": "^9.1.2", "eslint-visitor-keys": "^5.0.1", "espree": "^11.2.0", "esquery": "^1.7.0", "esutils": "^2.0.2", "fast-deep-equal": "^3.1.3", "file-entry-cache": "^8.0.0", "find-up": "^5.0.0", "glob-parent": "^6.0.2", "ignore": "^5.2.0", "imurmurhash": "^0.1.4", "is-glob": "^4.0.0", "json-stable-stringify-without-jsonify": "^1.0.1", "minimatch": "^10.2.4", "natural-compare": "^1.4.0", "optionator": "^0.9.3" }, "peerDependencies": { "jiti": "*" }, "optionalPeers": ["jiti"], "bin": { "eslint": "bin/eslint.js" } }, "sha512-1y+7C+vi12bUK1IpZeaV3gsH9fHLBmPvYmPx42pvT/E9yG0IC8g3PUZZgp0+JLJl7ZDK0flc2gc+Aw9dpCvIsQ=="], @@ -483,20 +276,10 @@ "estraverse": ["estraverse@5.3.0", "", {}, "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA=="], - "estree-walker": ["estree-walker@3.0.3", "", { "dependencies": { "@types/estree": "^1.0.0" } }, "sha512-7RUKfXgSMMkzt6ZuXmqapOurLGPPfgj6l9uRZ7lRGolvk0y2yocc35LdcxKC5PQZdn2DMqioAQ2NoWcrTKmm6g=="], + "estree-walker": ["estree-walker@2.0.2", "", {}, "sha512-Rfkk/Mp/DL7JVje3u18FxFujQlTNR2q6QfMSMB7AvCBx91NGj/ba3kCfza0f6dVDbw7YlRf/nDrn7pQrCCyQ/w=="], "esutils": ["esutils@2.0.3", "", {}, "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g=="], - "etag": ["etag@1.8.1", "", {}, "sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg=="], - - "eventsource": ["eventsource@3.0.7", "", { "dependencies": { "eventsource-parser": "^3.0.1" } }, "sha512-CRT1WTyuQoD771GW56XEZFQ/ZoSfWid1alKGDYMmkt2yl8UXrVR4pspqWNEcqKvVIzg6PAltWjxcSSPrboA4iA=="], - - "eventsource-parser": ["eventsource-parser@3.1.0", "", {}, "sha512-kJezFj9YFAMLeORyi7aCLxLbD5/qWMQnoMVlVPyHIll7lgRJCc3JVln9Vgl9nwQi0YkMnhdGTMNn7CkRRAptMg=="], - - "express": ["express@5.2.1", "", { "dependencies": { "accepts": "^2.0.0", "body-parser": "^2.2.1", "content-disposition": "^1.0.0", "content-type": "^1.0.5", "cookie": "^0.7.1", "cookie-signature": "^1.2.1", "debug": "^4.4.0", "depd": "^2.0.0", "encodeurl": "^2.0.0", "escape-html": "^1.0.3", "etag": "^1.8.1", "finalhandler": "^2.1.0", "fresh": "^2.0.0", "http-errors": "^2.0.0", "merge-descriptors": "^2.0.0", "mime-types": "^3.0.0", "on-finished": "^2.4.1", "once": "^1.4.0", "parseurl": "^1.3.3", "proxy-addr": "^2.0.7", "qs": "^6.14.0", "range-parser": "^1.2.1", "router": "^2.2.0", "send": "^1.1.0", "serve-static": "^2.2.0", "statuses": "^2.0.1", "type-is": "^2.0.1", "vary": "^1.1.2" } }, "sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw=="], - - "express-rate-limit": ["express-rate-limit@8.5.2", "", { "dependencies": { "ip-address": "^10.2.0" }, "peerDependencies": { "express": ">= 4.11" } }, "sha512-5Kb34ipNX694DH48vN9irak1Qx30nb0PLYHXfJgw4YEjiC3ZEmZJhwOp+VfiCYwFzvFTdB9QkArYS5kXa2cx2A=="], - "exsolve": ["exsolve@1.0.8", "", {}, "sha512-LmDxfWXwcTArk8fUEnOfSZpHOJ6zOMUJKOtFLFqJLoKJetuQG874Uc7/Kki7zFLzYybmZhp1M7+98pfMqeX8yA=="], "fast-deep-equal": ["fast-deep-equal@3.1.3", "", {}, "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q=="], @@ -509,8 +292,6 @@ "fast-string-width": ["fast-string-width@3.0.2", "", { "dependencies": { "fast-string-truncated-width": "^3.0.2" } }, "sha512-gX8LrtNEI5hq8DVUfRQMbr5lpaS4nMIWV+7XEbXk2b8kiQIizgnlr12B4dA3ZEx3308ze0O4Q1R+cHts8kyUJg=="], - "fast-uri": ["fast-uri@3.1.2", "", {}, "sha512-rVjf7ArG3LTk+FS6Yw81V1DLuZl1bRbNrev6Tmd/9RaroeeRRJhAt7jg/6YFxbvAQXUCavSoZhPPj6oOx+5KjQ=="], - "fast-wrap-ansi": ["fast-wrap-ansi@0.2.2", "", { "dependencies": { "fast-string-width": "^3.0.2" } }, "sha512-7F2Fl+TjRSenLqlU3UjSH0iyqopqoZIu7eZVpEirP2g1GtWa2G/ecEmBdgz31+Mxr+ELclgg6sokpSFIQiZ02Q=="], "fault": ["fault@2.0.1", "", { "dependencies": { "format": "^0.2.0" } }, "sha512-WtySTkS4OKev5JtpHXnib4Gxiurzh5NCGvWrFaZ34m6JehfTUhKZvn9njTfw48t6JumVQOmrKqpmGcdwxnhqBQ=="], @@ -519,30 +300,16 @@ "file-entry-cache": ["file-entry-cache@8.0.0", "", { "dependencies": { "flat-cache": "^4.0.0" } }, "sha512-XXTUwCvisa5oacNGRP9SfNtYBNAMi+RPwBFmblZEF7N7swHYQS6/Zfk7SRwx4D5j3CH211YNRco1DEMNVfZCnQ=="], - "finalhandler": ["finalhandler@2.1.1", "", { "dependencies": { "debug": "^4.4.0", "encodeurl": "^2.0.0", "escape-html": "^1.0.3", "on-finished": "^2.4.1", "parseurl": "^1.3.3", "statuses": "^2.0.1" } }, "sha512-S8KoZgRZN+a5rNwqTxlZZePjT/4cnm0ROV70LedRHZ0p8u9fRID0hJUZQpkKLzro8LfmC8sx23bY6tVNxv8pQA=="], - "find-up": ["find-up@5.0.0", "", { "dependencies": { "locate-path": "^6.0.0", "path-exists": "^4.0.0" } }, "sha512-78/PXT1wlLLDgTzDs7sjq9hzz0vXD+zn+7wypEe4fXQxCmdmqfGsEPQxmiCSQI3ajFV91bVSsvNtrJRiW6nGng=="], "find-up-simple": ["find-up-simple@1.0.1", "", {}, "sha512-afd4O7zpqHeRyg4PfDQsXmlDe2PfdHtJt6Akt8jOWaApLOZk5JXs6VMR29lz03pRe9mpykrRCYIYxaJYcfpncQ=="], "flat-cache": ["flat-cache@4.0.1", "", { "dependencies": { "flatted": "^3.2.9", "keyv": "^4.5.4" } }, "sha512-f7ccFPK3SXFHpx15UIGyRJ/FJQctuKZ0zVuN3frBo4HnK3cay9VEW0R6yPYFHC0AgqhukPzKjq22t5DmAyqGyw=="], - "flatbuffers": ["flatbuffers@25.9.23", "", {}, "sha512-MI1qs7Lo4Syw0EOzUl0xjs2lsoeqFku44KpngfIduHBYvzm8h2+7K8YMQh1JtVVVrUvhLpNwqVi4DERegUJhPQ=="], - "flatted": ["flatted@3.4.2", "", {}, "sha512-PjDse7RzhcPkIJwy5t7KPWQSZ9cAbzQXcafsetQoD7sOJRQlGikNbx7yZp2OotDnJyrDcbyRq3Ttb18iYOqkxA=="], "format": ["format@0.2.2", "", {}, "sha512-wzsgA6WOq+09wrU1tsJ09udeR/YZRaeArL9e1wPbFg3GG2yDnC2ldKpxs4xunpFF9DgqCqOIra3bc1HWrJ37Ww=="], - "forwarded": ["forwarded@0.2.0", "", {}, "sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow=="], - - "fresh": ["fresh@2.0.0", "", {}, "sha512-Rx/WycZ60HOaqLKAi6cHRKKI7zxWbJ31MhntmtwMoaTeF7XFH9hhBp8vITaMidfljRQ6eYWCKkaTK+ykVJHP2A=="], - - "function-bind": ["function-bind@1.1.2", "", {}, "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA=="], - - "get-intrinsic": ["get-intrinsic@1.3.0", "", { "dependencies": { "call-bind-apply-helpers": "^1.0.2", "es-define-property": "^1.0.1", "es-errors": "^1.3.0", "es-object-atoms": "^1.1.1", "function-bind": "^1.1.2", "get-proto": "^1.0.1", "gopd": "^1.2.0", "has-symbols": "^1.1.0", "hasown": "^2.0.2", "math-intrinsics": "^1.1.0" } }, "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ=="], - - "get-proto": ["get-proto@1.0.1", "", { "dependencies": { "dunder-proto": "^1.0.1", "es-object-atoms": "^1.0.0" } }, "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g=="], - "get-tsconfig": ["get-tsconfig@4.14.0", "", { "dependencies": { "resolve-pkg-maps": "^1.0.0" } }, "sha512-yTb+8DXzDREzgvYmh6s9vHsSVCHeC0G3PI5bEXNBHtmshPnO+S5O7qgLEOn0I5QvMy6kpZN8K1NKGyilLb93wA=="], "git-hooks-list": ["git-hooks-list@4.2.1", "", {}, "sha512-WNvqJjOxxs/8ZP9+DWdwWJ7cDsd60NHf39XnD82pDVrKO5q7xfPqpkK6hwEAmBa/ZSEE4IOoR75EzbbIuwGlMw=="], @@ -551,52 +318,22 @@ "glob-parent": ["glob-parent@6.0.2", "", { "dependencies": { "is-glob": "^4.0.3" } }, "sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A=="], - "global-agent": ["global-agent@3.0.0", "", { "dependencies": { "boolean": "^3.0.1", "es6-error": "^4.1.1", "matcher": "^3.0.0", "roarr": "^2.15.3", "semver": "^7.3.2", "serialize-error": "^7.0.1" } }, "sha512-PT6XReJ+D07JvGoxQMkT6qji/jVNfX/h364XHZOWeRzy64sSFr+xJ5OX7LI3b4MPQzdL4H8Y8M0xzPpsVMwA8Q=="], - "globals": ["globals@17.6.0", "", {}, "sha512-sepffkT8stwnIYbsMBpoCHJuJM5l98FUF2AnE07hfvE0m/qp3R586hw4jF4uadbhvg1ooIdzuu7CsfD2jzCaNA=="], - "globalthis": ["globalthis@1.0.4", "", { "dependencies": { "define-properties": "^1.2.1", "gopd": "^1.0.1" } }, "sha512-DpLKbNU4WylpxJykQujfCcwYWiV/Jhm50Goo0wrVILAv5jOr9d+H+UR3PhSCD2rCCEIg0uc+G+muBTwD54JhDQ=="], - "globrex": ["globrex@0.1.2", "", {}, "sha512-uHJgbwAMwNFf5mLst7IWLNg14x1CkeqglJb/K3doi4dw6q2IvAAmM/Y81kevy83wP+Sst+nutFTYOGg3d1lsxg=="], - "gopd": ["gopd@1.2.0", "", {}, "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg=="], - "graceful-fs": ["graceful-fs@4.2.11", "", {}, "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ=="], - "guid-typescript": ["guid-typescript@1.0.9", "", {}, "sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ=="], - - "has-property-descriptors": ["has-property-descriptors@1.0.2", "", { "dependencies": { "es-define-property": "^1.0.0" } }, "sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg=="], - - "has-symbols": ["has-symbols@1.1.0", "", {}, "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ=="], - - "hasown": ["hasown@2.0.4", "", { "dependencies": { "function-bind": "^1.1.2" } }, "sha512-T2UbfbBEF32wiepXIsMlTW9+dDYC6wMh/t/vYA4tuOMKqWz/n3vr1NFSxQiyP+zk2mXsoMA/i/7qV6LKut1t1A=="], - - "hono": ["hono@4.12.25", "", {}, "sha512-2NFaIyNVgJmBs/ecmtGzlmluTFs5cHEWGTdu0t1HBwYzoGXOL5nUQBRMXsXWla5i4KkG//QMzVP88m1+I3fdAQ=="], - - "hookable": ["hookable@6.1.1", "", {}, "sha512-U9LYDy1CwhMCnprUfeAZWZGByVbhd54hwepegYTK7Pi5NvqEj63ifz5z+xukznehT7i6NIZRu89Ay1AZmRsLEQ=="], - "hosted-git-info": ["hosted-git-info@9.0.3", "", { "dependencies": { "lru-cache": "^11.1.0" } }, "sha512-Hc+ghLoSt6QaYZUv0WBiIvmMDZuZZ7oaDvdH8MbfOO4lOsxdXLEvuC6ePoGs9H1X9oCLyq6+NVN0MKqD+ydxyg=="], "html-entities": ["html-entities@2.6.0", "", {}, "sha512-kig+rMn/QOVRvr7c86gQ8lWXq+Hkv6CbAH1hLu+RG338StTpE8Z0b44SDVaqVu7HGKf27frdmUYEs9hTUX/cLQ=="], - "http-errors": ["http-errors@2.0.1", "", { "dependencies": { "depd": "~2.0.0", "inherits": "~2.0.4", "setprototypeof": "~1.2.0", "statuses": "~2.0.2", "toidentifier": "~1.0.1" } }, "sha512-4FbRdAX+bSdmo4AUFuS0WNiPz8NgFt+r8ThgNWmlrjQjt1Q7ZR9+zTlce2859x4KSXrwIsaeTqDoKQmtP8pLmQ=="], - - "iconv-lite": ["iconv-lite@0.7.2", "", { "dependencies": { "safer-buffer": ">= 2.1.2 < 3.0.0" } }, "sha512-im9DjEDQ55s9fL4EYzOAv0yMqmMBSZp6G0VvFyTMPKWxiSBHUj9NW/qqLmXUwXrrM7AvqSlTCfvqRb0cM8yYqw=="], - - "ignore": ["ignore@7.0.5", "", {}, "sha512-Hs59xBNfUIunMFgWAbGX5cq6893IbWg4KnrjbYwX3tx0ztorVgTDA6B2sxf8ejHJ4wz8BqGUMYlnzNBer5NvGg=="], - - "import-without-cache": ["import-without-cache@0.3.3", "", {}, "sha512-bDxwDdF04gm550DfZHgffvlX+9kUlcz32UD0AeBTmVPFiWkrexF2XVmiuFFbDhiFuP8fQkrkvI2KdSNPYWAXkQ=="], + "ignore": ["ignore@5.3.2", "", {}, "sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g=="], "imurmurhash": ["imurmurhash@0.1.4", "", {}, "sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA=="], "indent-string": ["indent-string@5.0.0", "", {}, "sha512-m6FAo/spmsW2Ab2fU35JTYwtOKa2yAwXSwgjSv1TJzh4Mh7mC3lzAOVLBprb72XsTrgkEIsl7YrFNAiDiRhIGg=="], - "inherits": ["inherits@2.0.4", "", {}, "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ=="], - - "ip-address": ["ip-address@10.2.0", "", {}, "sha512-/+S6j4E9AHvW9SWMSEY9Xfy66O5PWvVEJ08O0y5JGyEKQpojb0K0GKpz/v5HJ/G0vi3D2sjGK78119oXZeE0qA=="], - - "ipaddr.js": ["ipaddr.js@1.9.1", "", {}, "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g=="], - "is-builtin-module": ["is-builtin-module@5.0.0", "", { "dependencies": { "builtin-modules": "^5.0.0" } }, "sha512-f4RqJKBUe5rQkJ2eJEJBXSticB3hGbN9j0yxxMQFqIW89Jp9WYFtzfTcRlstDKVUTRzSOTLKRfO9vIztenwtxA=="], "is-extglob": ["is-extglob@2.1.1", "", {}, "sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ=="], @@ -605,14 +342,10 @@ "is-plain-obj": ["is-plain-obj@4.1.0", "", {}, "sha512-+Pgi+vMuUNkJyExiMBt5IlFoMyKnr5zhJ4Uspz58WOhBF5QoIZkFyNHIbBAtHwzVAgk5RtndVNsDRN61/mmDqg=="], - "is-promise": ["is-promise@4.0.0", "", {}, "sha512-hvpoI6korhJMnej285dSg6nu1+e6uxs7zG3BYAm5byqDsgJNWwxzM6z6iZiAgQR4TJ30JmBTOwqZUw3WlyH3AQ=="], - "isexe": ["isexe@2.0.0", "", {}, "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw=="], "jiti": ["jiti@2.7.0", "", { "bin": { "jiti": "lib/jiti-cli.mjs" } }, "sha512-AC/7JofJvZGrrneWNaEnJeOLUx+JlGt7tNa0wZiRPT4MY1wmfKjt2+6O2p2uz2+skll8OZZmJMNqeke7kKbNgQ=="], - "jose": ["jose@6.2.3", "", {}, "sha512-YYVDInQKFJfR/xa3ojUTl8c2KoTwiL1R5Wg9YCydwH0x0B9grbzlg5HC7mMjCtUJjbQ/YnGEZIhI5tCgfTb4Hw=="], - "jsdoc-type-pratt-parser": ["jsdoc-type-pratt-parser@7.2.0", "", {}, "sha512-dh140MMgjyg3JhJZY/+iEzW+NO5xR2gpbDFKHqotCmexElVntw7GjWjt511+C/Ef02RU5TKYrJo/Xlzk+OLaTw=="], "jsesc": ["jsesc@3.1.0", "", { "bin": { "jsesc": "bin/jsesc" } }, "sha512-/sM3dO2FOzXjKQhJuo0Q173wf2KOo8t4I8vHy6lF9poUp7bKT0/NHE8fPX23PwfhnykfqnC2xRxOnVw5XuGIaA=="], @@ -621,12 +354,8 @@ "json-schema-traverse": ["json-schema-traverse@0.4.1", "", {}, "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg=="], - "json-schema-typed": ["json-schema-typed@8.0.2", "", {}, "sha512-fQhoXdcvc3V28x7C7BMs4P5+kNlgUURe2jmUT1T//oBRMDrqy1QPelJimwZGo7Hg9VPV3EQV5Bnq4hbFy2vetA=="], - "json-stable-stringify-without-jsonify": ["json-stable-stringify-without-jsonify@1.0.1", "", {}, "sha512-Bdboy+l7tA3OGW6FjyFHWkP5LuByj1Tk33Ljyq0axyzdk9//JSi2u3fP1QSmd1KNwq6VOKYGlAu87CisVir6Pw=="], - "json-stringify-safe": ["json-stringify-safe@5.0.1", "", {}, "sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA=="], - "jsonc-eslint-parser": ["jsonc-eslint-parser@3.1.0", "", { "dependencies": { "acorn": "^8.5.0", "eslint-visitor-keys": "^5.0.0", "semver": "^7.3.5" } }, "sha512-75EA7EWZExL/j+MDKQrRbdzcRI2HOkRlmUw8fZJc1ioqFEOvBsq7Rt+A6yCxOt9w/TYNpkt52gC6nm/g5tFIng=="], "katex": ["katex@0.16.47", "", { "dependencies": { "commander": "^8.3.0" }, "bin": { "katex": "cli.js" } }, "sha512-Eeo8Ys1doU1z+x8AZsPpQu+p/QcZBI5PeOo7QGQdy2x2m0MU/hYagBbGOmXwr5KVbEfVuWv9LpnQWeehogurjg=="], @@ -641,8 +370,6 @@ "lodash.merge": ["lodash.merge@4.6.2", "", {}, "sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ=="], - "long": ["long@5.3.2", "", {}, "sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA=="], - "longest-streak": ["longest-streak@3.1.0", "", {}, "sha512-9Ri+o0JYgehTaVBBDoMqIl8GXtbWg711O3srftcHhZ0dqnETqLaoIK0x17fUw9rFSlK/0NlsKe0Ahhyl5pXE2g=="], "lru-cache": ["lru-cache@11.5.1", "", {}, "sha512-RPimw/7aMdv2oqRrxKwvZXcPfwBrn/JZ2xYcY9Hus/6LaS3VOAKVWKWgNLCFSiOm1ESXinjsDlidVU7JlnCN2A=="], @@ -651,10 +378,6 @@ "markdown-table": ["markdown-table@3.0.4", "", {}, "sha512-wiYz4+JrLyb/DqW2hkFJxP7Vd7JuTDm77fvbM8VfEQdmSMqcImWeeRbHwZjBjIFki/VaMK2BhFi7oUUZeM5bqw=="], - "matcher": ["matcher@3.0.0", "", { "dependencies": { "escape-string-regexp": "^4.0.0" } }, "sha512-OkeDaAZ/bQCxeFAozM55PKcKU0yJMPGifLwV4Qgjitu+5MoAfSQN4lsLJeXZ1b8w0x+/Emda6MZgXS1jvsapng=="], - - "math-intrinsics": ["math-intrinsics@1.1.0", "", {}, "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g=="], - "mdast-util-find-and-replace": ["mdast-util-find-and-replace@3.0.2", "", { "dependencies": { "@types/mdast": "^4.0.0", "escape-string-regexp": "^5.0.0", "unist-util-is": "^6.0.0", "unist-util-visit-parents": "^6.0.0" } }, "sha512-Tmd1Vg/m3Xz43afeNxDIhWRtFZgM2VLyaf4vSTYwudTyeuTneoL3qtWMA5jeLyz/O1vDJmmV4QuScFCA2tBPwg=="], "mdast-util-from-markdown": ["mdast-util-from-markdown@2.0.3", "", { "dependencies": { "@types/mdast": "^4.0.0", "@types/unist": "^3.0.0", "decode-named-character-reference": "^1.0.0", "devlop": "^1.0.0", "mdast-util-to-string": "^4.0.0", "micromark": "^4.0.0", "micromark-util-decode-numeric-character-reference": "^2.0.0", "micromark-util-decode-string": "^2.0.0", "micromark-util-normalize-identifier": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0", "unist-util-stringify-position": "^4.0.0" } }, "sha512-W4mAWTvSlKvf8L6J+VN9yLSqQ9AOAAvHuoDAmPkz4dHf553m5gVj2ejadHJhoJmcmxEnOv6Pa8XJhpxE93kb8Q=="], @@ -681,10 +404,6 @@ "mdast-util-to-string": ["mdast-util-to-string@4.0.0", "", { "dependencies": { "@types/mdast": "^4.0.0" } }, "sha512-0H44vDimn51F0YwvxSJSm0eCDOJTRlmN0R1yBh4HLj9wiV1Dn0QoXGbvFAWj2hSItVTlCmBF1hqKlIyUBVFLPg=="], - "media-typer": ["media-typer@1.1.0", "", {}, "sha512-aisnrDP4GNe06UcKFnV5bfMNPBUw4jsLGaWwWfnH3v02GnBuXX2MCVn5RbrWo0j3pczUilYblq7fQ7Nw2t5XKw=="], - - "merge-descriptors": ["merge-descriptors@2.0.0", "", {}, "sha512-Snk314V5ayFLhp3fkUREub6WtjBfPdCPY1Ln8/8munuLuiYhsABgBVWsozAG+MWMbVEvcdcpbi9R7ww22l9Q3g=="], - "micromark": ["micromark@4.0.2", "", { "dependencies": { "@types/debug": "^4.0.0", "debug": "^4.0.0", "decode-named-character-reference": "^1.0.0", "devlop": "^1.0.0", "micromark-core-commonmark": "^2.0.0", "micromark-factory-space": "^2.0.0", "micromark-util-character": "^2.0.0", "micromark-util-chunked": "^2.0.0", "micromark-util-combine-extensions": "^2.0.0", "micromark-util-decode-numeric-character-reference": "^2.0.0", "micromark-util-encode": "^2.0.0", "micromark-util-normalize-identifier": "^2.0.0", "micromark-util-resolve-all": "^2.0.0", "micromark-util-sanitize-uri": "^2.0.0", "micromark-util-subtokenize": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-zpe98Q6kvavpCr1NPVSCMebCKfD7CA2NqZ+rykeNhONIJBpc1tFKt9hucLGwha3jNTNI8lHpctWJWoimVF4PfA=="], "micromark-core-commonmark": ["micromark-core-commonmark@2.0.3", "", { "dependencies": { "decode-named-character-reference": "^1.0.0", "devlop": "^1.0.0", "micromark-factory-destination": "^2.0.0", "micromark-factory-label": "^2.0.0", "micromark-factory-space": "^2.0.0", "micromark-factory-title": "^2.0.0", "micromark-factory-whitespace": "^2.0.0", "micromark-util-character": "^2.0.0", "micromark-util-chunked": "^2.0.0", "micromark-util-classify-character": "^2.0.0", "micromark-util-html-tag-name": "^2.0.0", "micromark-util-normalize-identifier": "^2.0.0", "micromark-util-resolve-all": "^2.0.0", "micromark-util-subtokenize": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-RDBrHEMSxVFLg6xvnXmb1Ayr2WzLAWjeSATAoxwKYJV94TeNavgoIdA0a9ytzDSVzBy2YKFK+emCPOEibLeCrg=="], @@ -745,10 +464,6 @@ "micromark-util-types": ["micromark-util-types@2.0.2", "", {}, "sha512-Yw0ECSpJoViF1qTU4DC6NwtC4aWGt1EkzaQB8KPPyCRR8z9TWeV0HbEFGTO+ZY1wB22zmxnJqhPyTpOVCpeHTA=="], - "mime-db": ["mime-db@1.54.0", "", {}, "sha512-aU5EJuIN2WDemCcAp2vFBfp/m4EAhWJnUNSSw0ixs7/kXbd6Pg64EmwJkNdFhB8aWt1sH2CTXrLxo/iAGV3oPQ=="], - - "mime-types": ["mime-types@3.0.2", "", { "dependencies": { "mime-db": "^1.54.0" } }, "sha512-Lbgzdk0h4juoQ9fCKXW4by0UJqj+nOOrI9MJ1sSj4nI8aI2eo1qmvQEie4VD1glsS250n15LsWsYtCugiStS5A=="], - "minimatch": ["minimatch@10.2.5", "", { "dependencies": { "brace-expansion": "^5.0.5" } }, "sha512-MULkVLfKGYDFYejP07QOurDLLQpcjk7Fw+7jXS2R2czRQzR56yHRveU5NDJEOviH+hETZKSkIk5c+T23GjFUMg=="], "mlly": ["mlly@1.8.2", "", { "dependencies": { "acorn": "^8.16.0", "pathe": "^2.0.3", "pkg-types": "^1.3.1", "ufo": "^1.6.3" } }, "sha512-d+ObxMQFmbt10sretNDytwt85VrbkhhUA/JBGm1MPaWJ65Cl4wOgLaB1NYvJSZ0Ef03MMEU/0xpPMXUIQ29UfA=="], @@ -763,34 +478,14 @@ "natural-orderby": ["natural-orderby@5.0.0", "", {}, "sha512-kKHJhxwpR/Okycz4HhQKKlhWe4ASEfPgkSWNmKFHd7+ezuQlxkA5cM3+XkBPvm1gmHen3w53qsYAv+8GwRrBlg=="], - "negotiator": ["negotiator@1.0.0", "", {}, "sha512-8Ofs/AUQh8MaEcrlq5xOX0CQ9ypTF5dl78mjlMNfOK08fzpgTHQRQPBxcPlEtIw0yRpws+Zo/3r+5WRby7u3Gg=="], - "node-releases": ["node-releases@2.0.47", "", {}, "sha512-Uzmd6LXpouKo8EUK68IjH4+E01w/hXyV3R3g/geCJo+rXLNfh1xucB+LOzYEOQPSiUK3h/xZf0cQGcSsmyL2Og=="], "npm-package-arg": ["npm-package-arg@13.0.2", "", { "dependencies": { "hosted-git-info": "^9.0.0", "proc-log": "^6.0.0", "semver": "^7.3.5", "validate-npm-package-name": "^7.0.0" } }, "sha512-IciCE3SY3uE84Ld8WZU23gAPPV9rIYod4F+rc+vJ7h7cwAJt9Vk6TVsK60ry7Uj3SRS3bqRRIGuTp9YVlk6WNA=="], "nth-check": ["nth-check@2.1.1", "", { "dependencies": { "boolbase": "^1.0.0" } }, "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w=="], - "object-assign": ["object-assign@4.1.1", "", {}, "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg=="], - "object-deep-merge": ["object-deep-merge@2.0.1", "", {}, "sha512-aKttDKcU3pyZqKcCkDhsMn70WmZFG2JGDQLP9EcLyTSIFQRCPWLAmBZRLJnrVUrhPG1jETEEbfdgbNtJf1LyMg=="], - "object-inspect": ["object-inspect@1.13.4", "", {}, "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew=="], - - "object-keys": ["object-keys@1.1.1", "", {}, "sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA=="], - - "obug": ["obug@2.1.3", "", {}, "sha512-9miFgM2OFba7hB+pRgvtV84pYTBaoTHohvmIgiRt6dRIzbwEOIaNaP+dIlGs2fNFoB0SeISs0Jz5WFVRid6Xyg=="], - - "on-finished": ["on-finished@2.4.1", "", { "dependencies": { "ee-first": "1.1.1" } }, "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg=="], - - "once": ["once@1.4.0", "", { "dependencies": { "wrappy": "1" } }, "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w=="], - - "onnxruntime-common": ["onnxruntime-common@1.24.3", "", {}, "sha512-GeuPZO6U/LBJXvwdaqHbuUmoXiEdeCjWi/EG7Y1HNnDwJYuk6WUbNXpF6luSUY8yASul3cmUlLGrCCL1ZgVXqA=="], - - "onnxruntime-node": ["onnxruntime-node@1.24.3", "", { "dependencies": { "adm-zip": "^0.5.16", "global-agent": "^3.0.0", "onnxruntime-common": "1.24.3" }, "os": [ "linux", "win32", "darwin", ] }, "sha512-JH7+czbc8ALA819vlTgcV+Q214/+VjGeBHDjX81+ZCD0PCVCIFGFNtT0V4sXG/1JXypKPgScQcB3ij/hk3YnTg=="], - - "onnxruntime-web": ["onnxruntime-web@1.26.0-dev.20260416-b7804b056c", "", { "dependencies": { "flatbuffers": "^25.1.24", "guid-typescript": "^1.0.9", "long": "^5.2.3", "onnxruntime-common": "1.24.0-dev.20251116-b39e144322", "platform": "^1.3.6", "protobufjs": "^7.2.4" } }, "sha512-MD6Ss4GSpQBo6zqoJzyT9LRbKYs7x/JVN23FT24EcEvlqF4VuzPOeH6X38orZPKHQDbprn7K+SBpu0/mj2CQiw=="], - "optionator": ["optionator@0.9.4", "", { "dependencies": { "deep-is": "^0.1.3", "fast-levenshtein": "^2.0.6", "levn": "^0.4.1", "prelude-ls": "^1.2.1", "type-check": "^0.4.0", "word-wrap": "^1.2.5" } }, "sha512-6IpQ7mKUxRcZNLIObR0hz7lxsapSSIYNZJwXPGeF0mTVqGKFIXj1DQcMoT22S3ROcLyY/rz0PWaWZ9ayWmad9g=="], "p-limit": ["p-limit@3.1.0", "", { "dependencies": { "yocto-queue": "^0.1.0" } }, "sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ=="], @@ -807,26 +502,18 @@ "parse-statements": ["parse-statements@1.0.11", "", {}, "sha512-HlsyYdMBnbPQ9Jr/VgJ1YF4scnldvJpJxCVx6KgqPL4dxppsWrJHCIIxQXMJrqGnsRkNPATbeMJ8Yxu7JMsYcA=="], - "parseurl": ["parseurl@1.3.3", "", {}, "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ=="], - "path-exists": ["path-exists@4.0.0", "", {}, "sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w=="], "path-key": ["path-key@3.1.1", "", {}, "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q=="], - "path-to-regexp": ["path-to-regexp@8.4.2", "", {}, "sha512-qRcuIdP69NPm4qbACK+aDogI5CBDMi1jKe0ry5rSQJz8JVLsC7jV8XpiJjGRLLol3N+R5ihGYcrPLTno6pAdBA=="], - "pathe": ["pathe@2.0.3", "", {}, "sha512-WUjGcAqP1gQacoQe+OBJsFA7Ld4DyXuUIjZ5cc75cLHvJ7dtNsTugphxIADwspS+AraAUePCKrSVtPLFj/F88w=="], "picocolors": ["picocolors@1.1.1", "", {}, "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA=="], "picomatch": ["picomatch@4.0.4", "", {}, "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A=="], - "pkce-challenge": ["pkce-challenge@5.0.1", "", {}, "sha512-wQ0b/W4Fr01qtpHlqSqspcj3EhBvimsdh0KlHhH8HRZnMsEa0ea2fTULOXOS9ccQr3om+GcGRk4e+isrZWV8qQ=="], - "pkg-types": ["pkg-types@2.3.1", "", { "dependencies": { "confbox": "^0.2.4", "exsolve": "^1.0.8", "pathe": "^2.0.3" } }, "sha512-y+ichcgc2LrADuhLNAx8DFjVfgz91pRxfZdI3UDhxHvcVEZsenLO+7XaU5vOp0u/7V/wZ+plyuQxtrDlZJ+yeg=="], - "platform": ["platform@1.3.6", "", {}, "sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg=="], - "pluralize": ["pluralize@8.0.0", "", {}, "sha512-Nc3IT5yHzflTfbjgqWcCPpo7DaKy4FnpB0l/zCAW0Tc7jxAiuqSxHasntB3D7887LSrA93kDJ9IXovxJYxyLCA=="], "pnpm-workspace-yaml": ["pnpm-workspace-yaml@1.6.1", "", { "dependencies": { "yaml": "^2.9.0" } }, "sha512-yTeZntGWi8m9WNuhoVsP0DpFc4sC1U0+rr/qR6Zi9n2g3sxXY+JfccjXjjruNz96tM8I09yaJUA86doRnNLkbg=="], @@ -839,21 +526,9 @@ "proc-log": ["proc-log@6.1.0", "", {}, "sha512-iG+GYldRf2BQ0UDUAd6JQ/RwzaQy6mXmsk/IzlYyal4A4SNFw54MeH4/tLkF4I5WoWG9SQwuqWzS99jaFQHBuQ=="], - "protobufjs": ["protobufjs@7.6.4", "", { "dependencies": { "@protobufjs/aspromise": "^1.1.2", "@protobufjs/base64": "^1.1.2", "@protobufjs/codegen": "^2.0.5", "@protobufjs/eventemitter": "^1.1.1", "@protobufjs/fetch": "^1.1.1", "@protobufjs/float": "^1.0.2", "@protobufjs/path": "^1.1.2", "@protobufjs/pool": "^1.1.0", "@protobufjs/utf8": "^1.1.1", "@types/node": ">=13.7.0", "long": "^5.3.2" } }, "sha512-RJJPTTpvFfHcWLkIa2JFWK4XvtSzS0yEWDmunqHXli1h3JlkbcQZXDZdcWxv+JK3Xsl5/UFDPZ0iGm7DAengYw=="], - - "proxy-addr": ["proxy-addr@2.0.7", "", { "dependencies": { "forwarded": "0.2.0", "ipaddr.js": "1.9.1" } }, "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg=="], - "punycode": ["punycode@2.3.1", "", {}, "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg=="], - "qs": ["qs@6.15.2", "", { "dependencies": { "side-channel": "^1.1.0" } }, "sha512-Rzq0KEyX/w/tEybncDgdkZrJgVUsUMk3xjh3t5bv3S1HTAtg+uOYt72+ZfwiQwKdysThkTBdL/rTi6HDmX9Ddw=="], - - "quansync": ["quansync@1.0.0", "", {}, "sha512-5xZacEEufv3HSTPQuchrvV6soaiACMFnq1H8wkVioctoH3TRha9Sz66lOxRwPK/qZj7HPiSveih9yAyh98gvqA=="], - - "range-parser": ["range-parser@1.2.1", "", {}, "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg=="], - - "raw-body": ["raw-body@3.0.2", "", { "dependencies": { "bytes": "~3.1.2", "http-errors": "~2.0.1", "iconv-lite": "~0.7.0", "unpipe": "~1.0.0" } }, "sha512-K5zQjDllxWkf7Z5xJdV0/B0WTNqx6vxG70zJE4N0kBs4LovmEYWJzQGxC9bS9RAKu3bgM40lrd5zoLJ12MQ5BA=="], - - "readdirp": ["readdirp@5.0.0", "", {}, "sha512-9u/XQ1pvrQtYyMpZe7DXKv2p5CNvyVwzUB6uhLAnQwHMSgKMBR62lc7AHljaeteeHXn11XTAaLLUVZYVZyuRBQ=="], + "quansync": ["quansync@0.2.11", "", {}, "sha512-AifT7QEbW9Nri4tAwR5M/uzpBuqfZf+zwaEM/QkzEjj7NBuFD2rBuy0K3dE+8wltbezDV7JMA0WfnCPYRSYbXA=="], "refa": ["refa@0.12.1", "", { "dependencies": { "@eslint-community/regexpp": "^4.8.0" } }, "sha512-J8rn6v4DBb2nnFqkqwy6/NnTYMcgLA+sLr0iIO41qpv0n+ngb7ksag2tMRl0inb1bbO/esUwzW1vbJi7K0sI0g=="], @@ -863,50 +538,18 @@ "regjsparser": ["regjsparser@0.13.2", "", { "dependencies": { "jsesc": "~3.1.0" }, "bin": { "regjsparser": "bin/parser" } }, "sha512-NgRBy2Nx/bE+9F27nVHnqcN5HjyLmecqsqx2PJHu3/IEtADD4WuxuXIVExD5PoSDFVrl78dOonfcOe5O+5nbzQ=="], - "require-from-string": ["require-from-string@2.0.2", "", {}, "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw=="], - "reserved-identifiers": ["reserved-identifiers@1.2.0", "", {}, "sha512-yE7KUfFvaBFzGPs5H3Ops1RevfUEsDc5Iz65rOwWg4lE8HJSYtle77uul3+573457oHvBKuHYDl/xqUkKpEEdw=="], "resolve-pkg-maps": ["resolve-pkg-maps@1.0.0", "", {}, "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw=="], - "roarr": ["roarr@2.15.4", "", { "dependencies": { "boolean": "^3.0.1", "detect-node": "^2.0.4", "globalthis": "^1.0.1", "json-stringify-safe": "^5.0.1", "semver-compare": "^1.0.0", "sprintf-js": "^1.1.2" } }, "sha512-CHhPh+UNHD2GTXNYhPWLnU8ONHdI+5DI+4EYIAOaiD63rHeYlZvyh8P+in5999TTSFgUYuKUAjzRI4mdh/p+2A=="], - - "rolldown": ["rolldown@1.0.0-rc.17", "", { "dependencies": { "@oxc-project/types": "=0.127.0", "@rolldown/pluginutils": "1.0.0-rc.17" }, "optionalDependencies": { "@rolldown/binding-android-arm64": "1.0.0-rc.17", "@rolldown/binding-darwin-arm64": "1.0.0-rc.17", "@rolldown/binding-darwin-x64": "1.0.0-rc.17", "@rolldown/binding-freebsd-x64": "1.0.0-rc.17", "@rolldown/binding-linux-arm-gnueabihf": "1.0.0-rc.17", "@rolldown/binding-linux-arm64-gnu": "1.0.0-rc.17", "@rolldown/binding-linux-arm64-musl": "1.0.0-rc.17", "@rolldown/binding-linux-ppc64-gnu": "1.0.0-rc.17", "@rolldown/binding-linux-s390x-gnu": "1.0.0-rc.17", "@rolldown/binding-linux-x64-gnu": "1.0.0-rc.17", "@rolldown/binding-linux-x64-musl": "1.0.0-rc.17", "@rolldown/binding-openharmony-arm64": "1.0.0-rc.17", "@rolldown/binding-wasm32-wasi": "1.0.0-rc.17", "@rolldown/binding-win32-arm64-msvc": "1.0.0-rc.17", "@rolldown/binding-win32-x64-msvc": "1.0.0-rc.17" }, "bin": { "rolldown": "bin/cli.mjs" } }, "sha512-ZrT53oAKrtA4+YtBWPQbtPOxIbVDbxT0orcYERKd63VJTF13zPcgXTvD4843L8pcsI7M6MErt8QtON6lrB9tyA=="], - - "rolldown-plugin-dts": ["rolldown-plugin-dts@0.23.2", "", { "dependencies": { "@babel/generator": "8.0.0-rc.3", "@babel/helper-validator-identifier": "8.0.0-rc.3", "@babel/parser": "8.0.0-rc.3", "@babel/types": "8.0.0-rc.3", "ast-kit": "^3.0.0-beta.1", "birpc": "^4.0.0", "dts-resolver": "^2.1.3", "get-tsconfig": "^4.13.7", "obug": "^2.1.1", "picomatch": "^4.0.4" }, "peerDependencies": { "@ts-macro/tsc": "^0.3.6", "@typescript/native-preview": ">=7.0.0-dev.20260325.1", "rolldown": "^1.0.0-rc.12", "typescript": "^5.0.0 || ^6.0.0", "vue-tsc": "~3.2.0" }, "optionalPeers": ["@ts-macro/tsc", "@typescript/native-preview", "typescript", "vue-tsc"] }, "sha512-PbSqLawLgZBGcOGT3yqWBGn4cX+wh2nt5FuBGdcMHyOhoukmjbhYAl8NT9sE4U38Cm9tqLOIQeOrvzeayM0DLQ=="], - - "router": ["router@2.2.0", "", { "dependencies": { "debug": "^4.4.0", "depd": "^2.0.0", "is-promise": "^4.0.0", "parseurl": "^1.3.3", "path-to-regexp": "^8.0.0" } }, "sha512-nLTrUKm2UyiL7rlhapu/Zl45FwNgkZGaCpZbIHajDYgwlJCOzLSk+cIPAnsEqV955GjILJnKbdQC1nVPz+gAYQ=="], - - "safer-buffer": ["safer-buffer@2.1.2", "", {}, "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg=="], - "scslre": ["scslre@0.3.0", "", { "dependencies": { "@eslint-community/regexpp": "^4.8.0", "refa": "^0.12.0", "regexp-ast-analysis": "^0.7.0" } }, "sha512-3A6sD0WYP7+QrjbfNA2FN3FsOaGGFoekCVgTyypy53gPxhbkCIjtO6YWgdrfM+n/8sI8JeXZOIxsHjMTNxQ4nQ=="], "semver": ["semver@7.8.4", "", { "bin": { "semver": "bin/semver.js" } }, "sha512-rUCObTnP32Q08R2uuIrt7r9PlEonuTmtuXYcW6s5kjdlj3xbnwe+21yXptAUYcMAABLkYYTtnmzb3w3EDZfueA=="], - "semver-compare": ["semver-compare@1.0.0", "", {}, "sha512-YM3/ITh2MJ5MtzaM429anh+x2jiLVjqILF4m4oyQB18W7Ggea7BfqdH/wGMK7dDiMghv/6WG7znWMwUDzJiXow=="], - - "send": ["send@1.2.1", "", { "dependencies": { "debug": "^4.4.3", "encodeurl": "^2.0.0", "escape-html": "^1.0.3", "etag": "^1.8.1", "fresh": "^2.0.0", "http-errors": "^2.0.1", "mime-types": "^3.0.2", "ms": "^2.1.3", "on-finished": "^2.4.1", "range-parser": "^1.2.1", "statuses": "^2.0.2" } }, "sha512-1gnZf7DFcoIcajTjTwjwuDjzuz4PPcY2StKPlsGAQ1+YH20IRVrBaXSWmdjowTJ6u8Rc01PoYOGHXfP1mYcZNQ=="], - - "serialize-error": ["serialize-error@7.0.1", "", { "dependencies": { "type-fest": "^0.13.1" } }, "sha512-8I8TjW5KMOKsZQTvoxjuSIa7foAwPWGOts+6o7sgjz41/qMD9VQHEDxi6PBvK2l0MXUmqZyNpUK+T2tQaaElvw=="], - - "serve-static": ["serve-static@2.2.1", "", { "dependencies": { "encodeurl": "^2.0.0", "escape-html": "^1.0.3", "parseurl": "^1.3.3", "send": "^1.2.0" } }, "sha512-xRXBn0pPqQTVQiC8wyQrKs2MOlX24zQ0POGaj0kultvoOCstBQM5yvOhAVSUwOMjQtTvsPWoNCHfPGwaaQJhTw=="], - - "setprototypeof": ["setprototypeof@1.2.0", "", {}, "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw=="], - - "sharp": ["sharp@0.34.5", "", { "dependencies": { "@img/colour": "^1.0.0", "detect-libc": "^2.1.2", "semver": "^7.7.3" }, "optionalDependencies": { "@img/sharp-darwin-arm64": "0.34.5", "@img/sharp-darwin-x64": "0.34.5", "@img/sharp-libvips-darwin-arm64": "1.2.4", "@img/sharp-libvips-darwin-x64": "1.2.4", "@img/sharp-libvips-linux-arm": "1.2.4", "@img/sharp-libvips-linux-arm64": "1.2.4", "@img/sharp-libvips-linux-ppc64": "1.2.4", "@img/sharp-libvips-linux-riscv64": "1.2.4", "@img/sharp-libvips-linux-s390x": "1.2.4", "@img/sharp-libvips-linux-x64": "1.2.4", "@img/sharp-libvips-linuxmusl-arm64": "1.2.4", "@img/sharp-libvips-linuxmusl-x64": "1.2.4", "@img/sharp-linux-arm": "0.34.5", "@img/sharp-linux-arm64": "0.34.5", "@img/sharp-linux-ppc64": "0.34.5", "@img/sharp-linux-riscv64": "0.34.5", "@img/sharp-linux-s390x": "0.34.5", "@img/sharp-linux-x64": "0.34.5", "@img/sharp-linuxmusl-arm64": "0.34.5", "@img/sharp-linuxmusl-x64": "0.34.5", "@img/sharp-wasm32": "0.34.5", "@img/sharp-win32-arm64": "0.34.5", "@img/sharp-win32-ia32": "0.34.5", "@img/sharp-win32-x64": "0.34.5" } }, "sha512-Ou9I5Ft9WNcCbXrU9cMgPBcCK8LiwLqcbywW3t4oDV37n1pzpuNLsYiAV8eODnjbtQlSDwZ2cUEeQz4E54Hltg=="], - "shebang-command": ["shebang-command@2.0.0", "", { "dependencies": { "shebang-regex": "^3.0.0" } }, "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA=="], "shebang-regex": ["shebang-regex@3.0.0", "", {}, "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A=="], - "side-channel": ["side-channel@1.1.1", "", { "dependencies": { "es-errors": "^1.3.0", "object-inspect": "^1.13.4", "side-channel-list": "^1.0.1", "side-channel-map": "^1.0.1", "side-channel-weakmap": "^1.0.2" } }, "sha512-6x6dK6zJdpTzF4sQeNYxwtvBzf6Eg4GtlesS94HOvTudUeyK2WXAaIfmDgsyslYrRBeFIlsi54AYsFGUuhmvrQ=="], - - "side-channel-list": ["side-channel-list@1.0.1", "", { "dependencies": { "es-errors": "^1.3.0", "object-inspect": "^1.13.4" } }, "sha512-mjn/0bi/oUURjc5Xl7IaWi/OJJJumuoJFQJfDDyO46+hBWsfaVM65TBHq2eoZBhzl9EchxOijpkbRC8SVBQU0w=="], - - "side-channel-map": ["side-channel-map@1.0.1", "", { "dependencies": { "call-bound": "^1.0.2", "es-errors": "^1.3.0", "get-intrinsic": "^1.2.5", "object-inspect": "^1.13.3" } }, "sha512-VCjCNfgMsby3tTdo02nbjtM/ewra6jPHmpThenkTYh8pG9ucZ/1P8So4u4FGBek/BjpOVsDCMoLA/iuBKIFXRA=="], - - "side-channel-weakmap": ["side-channel-weakmap@1.0.2", "", { "dependencies": { "call-bound": "^1.0.2", "es-errors": "^1.3.0", "get-intrinsic": "^1.2.5", "object-inspect": "^1.13.3", "side-channel-map": "^1.0.1" } }, "sha512-WPS/HvHQTYnHisLo9McqBHOJk2FkHO/tlpvldyrnem4aeQp4hai3gythswg6p01oSoTl58rcpiFAjF2br2Ak2A=="], - "sisteransi": ["sisteransi@1.0.5", "", {}, "sha512-bLGGlR1QxBcynn2d5YmDX4MGjlZvy2MRBDRNHLJ8VI6l6+9FUiyTFNJ0IveOSP0bcXgVDPRcfGqA0pjaqUpfVg=="], "sort-object-keys": ["sort-object-keys@2.1.0", "", {}, "sha512-SOiEnthkJKPv2L6ec6HMwhUcN0/lppkeYuN1x63PbyPRrgSPIuBJCiYxYyvWRTtjMlOi14vQUCGUJqS6PLVm8g=="], @@ -923,10 +566,6 @@ "spdx-license-ids": ["spdx-license-ids@3.0.23", "", {}, "sha512-CWLcCCH7VLu13TgOH+r8p1O/Znwhqv/dbb6lqWy67G+pT1kHmeD/+V36AVb/vq8QMIQwVShJ6Ssl5FPh0fuSdw=="], - "sprintf-js": ["sprintf-js@1.1.3", "", {}, "sha512-Oo+0REFV59/rz3gfJNKQiBlwfHaSESl1pcGyABQsnnIfWOFt6JNj5gCog2U6MLZ//IGYD+nA8nI+mTShREReaA=="], - - "statuses": ["statuses@2.0.2", "", {}, "sha512-DvEy55V3DB7uknRo+4iOGT5fP1slR8wQohVdknigZPMpMstaKJQWhwiYBACJE3Ul2pTnATihhBYnRhZQHGBiRw=="], - "strip-indent": ["strip-indent@4.1.1", "", {}, "sha512-SlyRoSkdh1dYP0PzclLE7r0M9sgbFKKMFXpFRUMNuKhQSbC6VQIGzq3E0qsfvGJaUFJPGv6Ws1NZ/haTAjfbMA=="], "synckit": ["synckit@0.11.13", "", { "dependencies": { "@pkgr/core": "^0.3.6" } }, "sha512-eNRKgb3z66Yp3D2CixVujOUvXLFUTij/zVnV8KRyvFdQwpz7I5DS8UfRkTeLzb64u+dkzDSdelE24izu+zSSUg=="], @@ -939,30 +578,16 @@ "to-valid-identifier": ["to-valid-identifier@1.0.0", "", { "dependencies": { "@sindresorhus/base62": "^1.0.0", "reserved-identifiers": "^1.0.0" } }, "sha512-41wJyvKep3yT2tyPqX/4blcfybknGB4D+oETKLs7Q76UiPqRpUJK3hr1nxelyYO0PHKVzJwlu0aCeEAsGI6rpw=="], - "toidentifier": ["toidentifier@1.0.1", "", {}, "sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA=="], - "toml-eslint-parser": ["toml-eslint-parser@1.0.3", "", { "dependencies": { "eslint-visitor-keys": "^5.0.0" } }, "sha512-A5F0cM6+mDleacLIEUkmfpkBbnHJFV1d2rprHU2MXNk7mlxHq2zGojA+SRvQD1RoMo9gqjZPWEaKG4v1BQ48lw=="], - "tree-kill": ["tree-kill@1.2.2", "", { "bin": { "tree-kill": "cli.js" } }, "sha512-L0Orpi8qGpRG//Nd+H90vFB+3iHnue1zSSGmNOOCh1GLJ7rUKVwV2HvijphGQS2UmhUZewS9VgvxYIdgr+fG1A=="], - "ts-api-utils": ["ts-api-utils@2.5.0", "", { "peerDependencies": { "typescript": ">=4.8.4" } }, "sha512-OJ/ibxhPlqrMM0UiNHJ/0CKQkoKF243/AEmplt3qpRgkW8VG7IfOS41h7V8TjITqdByHzrjcS/2si+y4lIh8NA=="], - "tsdown": ["tsdown@0.21.10", "", { "dependencies": { "ansis": "^4.2.0", "cac": "^7.0.0", "defu": "^6.1.7", "empathic": "^2.0.0", "hookable": "^6.1.1", "import-without-cache": "^0.3.3", "obug": "^2.1.1", "picomatch": "^4.0.4", "rolldown": "1.0.0-rc.17", "rolldown-plugin-dts": "^0.23.2", "semver": "^7.7.4", "tinyexec": "^1.1.1", "tinyglobby": "^0.2.16", "tree-kill": "^1.2.2", "unconfig-core": "^7.5.0", "unrun": "^0.2.37" }, "peerDependencies": { "@arethetypeswrong/core": "^0.18.1", "@tsdown/css": "0.21.10", "@tsdown/exe": "0.21.10", "@vitejs/devtools": "*", "publint": "^0.3.0", "typescript": "^5.0.0 || ^6.0.0", "unplugin-unused": "^0.5.0" }, "optionalPeers": ["@arethetypeswrong/core", "@tsdown/css", "@tsdown/exe", "@vitejs/devtools", "publint", "typescript", "unplugin-unused"], "bin": { "tsdown": "dist/run.mjs" } }, "sha512-3wk73yBhZe/wX7REqSdivNQ84TDs1mJ+IlnzrrEREP70xlJ/AEIzqaI04l/TzMKVIdkTdC3CPaADn2Lk/0SkdA=="], - - "tslib": ["tslib@2.8.1", "", {}, "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w=="], - "type-check": ["type-check@0.4.0", "", { "dependencies": { "prelude-ls": "^1.2.1" } }, "sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew=="], - "type-fest": ["type-fest@0.13.1", "", {}, "sha512-34R7HTnG0XIJcBSn5XhDd7nNFPRcXYRZrBB2O2jdKqYODldSzBAqzsWoZYYvduky73toYS/ESqxPvkDf/F0XMg=="], - - "type-is": ["type-is@2.1.0", "", { "dependencies": { "content-type": "^2.0.0", "media-typer": "^1.1.0", "mime-types": "^3.0.0" } }, "sha512-faYHw0anBbc/kWF3zFTEnxSFOAGUX9GFbOBthvDdLsIlEoWOFOtS0zgCiQYwIskL9iGXZL3kAXD8OoZ4GmMATA=="], - "typescript": ["typescript@6.0.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-y2TvuxSZPDyQakkFRPZHKFm+KKVqIisdg9/CZwm9ftvKXLP8NRWj38/ODjNbr43SsoXqNuAisEf1GdCxqWcdBw=="], "ufo": ["ufo@1.6.4", "", {}, "sha512-JFNbkD1Svwe0KvGi8GOeLcP4kAWQ609twvCdcHxq1oSL8svv39ZuSvajcD8B+5D0eL4+s1Is2D/O6KN3qcTeRA=="], - "unconfig-core": ["unconfig-core@7.5.0", "", { "dependencies": { "@quansync/fs": "^1.0.0", "quansync": "^1.0.0" } }, "sha512-Su3FauozOGP44ZmKdHy2oE6LPjk51M/TRRjHv2HNCWiDvfvCoxC2lno6jevMA91MYAdCdwP05QnWdWpSbncX/w=="], - "undici-types": ["undici-types@7.24.6", "", {}, "sha512-WRNW+sJgj5OBN4/0JpHFqtqzhpbnV0GuB+OozA9gCL7a993SmU+1JBZCzLNxYsbMfIeDL+lTsphD5jN5N+n0zg=="], "unist-util-is": ["unist-util-is@6.0.1", "", { "dependencies": { "@types/unist": "^3.0.0" } }, "sha512-LsiILbtBETkDz8I9p1dQ0uyRUWuaQzd/cuEeS1hoRSyW5E5XGmTzlwY1OrNzzakGowI9Dr/I8HVaw4hTtnxy8g=="], @@ -975,10 +600,6 @@ "unist-util-visit-parents": ["unist-util-visit-parents@6.0.2", "", { "dependencies": { "@types/unist": "^3.0.0", "unist-util-is": "^6.0.0" } }, "sha512-goh1s1TBrqSqukSc8wrjwWhL0hiJxgA8m4kFxGlQ+8FYQ3C/m11FcTs4YYem7V664AhHVvgoQLk890Ssdsr2IQ=="], - "unpipe": ["unpipe@1.0.0", "", {}, "sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ=="], - - "unrun": ["unrun@0.2.39", "", { "dependencies": { "rolldown": "1.0.0-rc.17" }, "peerDependencies": { "synckit": "^0.11.11" }, "optionalPeers": ["synckit"], "bin": { "unrun": "dist/cli.mjs" } }, "sha512-h9FxYVpztY/wwq+bauLOh6Y3CWu2IVeRLq5lxzneBiIU9Tn86OGp9xiQrGhnYspAmg5dzdY0Cc8+Y70kuTARCg=="], - "update-browserslist-db": ["update-browserslist-db@1.2.3", "", { "dependencies": { "escalade": "^3.2.0", "picocolors": "^1.1.1" }, "peerDependencies": { "browserslist": ">= 4.21.0" }, "bin": { "update-browserslist-db": "cli.js" } }, "sha512-Js0m9cx+qOgDxo0eMiFGEueWztz+d4+M3rGlmKPT+T4IS/jP4ylw3Nwpu6cpTTP8R1MAC1kF4VbdLt3ARf209w=="], "uri-js": ["uri-js@4.4.1", "", { "dependencies": { "punycode": "^2.1.0" } }, "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg=="], @@ -989,16 +610,12 @@ "validate-npm-package-name": ["validate-npm-package-name@7.0.2", "", {}, "sha512-hVDIBwsRruT73PbK7uP5ebUt+ezEtCmzZz3F59BSr2F6OVFnJ/6h8liuvdLrQ88Xmnk6/+xGGuq+pG9WwTuy3A=="], - "vary": ["vary@1.1.2", "", {}, "sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg=="], - "vue-eslint-parser": ["vue-eslint-parser@10.4.1", "", { "dependencies": { "debug": "^4.4.0", "eslint-scope": "^8.2.0 || ^9.0.0", "eslint-visitor-keys": "^4.2.0 || ^5.0.0", "espree": "^10.3.0 || ^11.0.0", "esquery": "^1.6.0", "semver": "^7.6.3" }, "peerDependencies": { "eslint": "^8.57.0 || ^9.0.0 || ^10.0.0" } }, "sha512-Gk6gRDj0n/fkRa3C3l0bBheoBckUq/Rs0F/TvMWIS6nzzx67amAViMe9CkNgsP2tXyQONvGiHQESHwFtZ3aYDA=="], "which": ["which@2.0.2", "", { "dependencies": { "isexe": "^2.0.0" }, "bin": { "node-which": "./bin/node-which" } }, "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA=="], "word-wrap": ["word-wrap@1.2.5", "", {}, "sha512-BN22B5eaMMI9UMtjrGd5g5eCYPpCPDUy0FJXbYsaT5zYxjFOckS53SQDE3pWkVoWpHXVb3BrYcEN4Twa55B5cA=="], - "wrappy": ["wrappy@1.0.2", "", {}, "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ=="], - "xml-name-validator": ["xml-name-validator@4.0.0", "", {}, "sha512-ICP2e+jsHvAj2E2lIHxa5tjXRlKDJo4IdvPvCXbXQGdzSfmSpNVyIKMvoZHjDY9DP0zV17iI85o90vRFXNccRw=="], "yaml": ["yaml@2.9.0", "", { "bin": { "yaml": "bin.mjs" } }, "sha512-2AvhNX3mb8zd6Zy7INTtSpl1F15HW6Wnqj0srWlkKLcpYl/gMIMJiyuGq2KeI2YFxUPjdlB+3Lc10seMLtL4cA=="], @@ -1007,92 +624,42 @@ "yocto-queue": ["yocto-queue@0.1.0", "", {}, "sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q=="], - "zod": ["zod@4.4.3", "", {}, "sha512-ytENFjIJFl2UwYglde2jchW2Hwm4GJFLDiSXWdTrJQBIN9Fcyp7n4DhxJEiWNAJMV1/BqWfW/kkg71UDcHJyTQ=="], - - "zod-to-json-schema": ["zod-to-json-schema@3.25.2", "", { "peerDependencies": { "zod": "^3.25.28 || ^4" } }, "sha512-O/PgfnpT1xKSDeQYSCfRI5Gy3hPf91mKVDuYLUHZJMiDFptvP41MSnWofm8dnCm0256ZNfZIM7DSzuSMAFnjHA=="], - "zwitch": ["zwitch@2.0.4", "", {}, "sha512-bXE4cR/kVZhKZX/RjPEflHaKVhUVl85noU3v6b8apfQEc1x4A+zBxjZ4lN8LqGd6WZ3dl98pY4o717VFmoPp+A=="], "@es-joy/jsdoccomment/comment-parser": ["comment-parser@1.4.5", "", {}, "sha512-aRDkn3uyIlCFfk5NUA+VdwMmMsh8JGhc4hapfV4yxymHGQ3BVskMQfoXGpCo5IoBuQ9tS5iiVKhCpTcB4pW4qw=="], "@es-joy/jsdoccomment/jsdoc-type-pratt-parser": ["jsdoc-type-pratt-parser@7.1.1", "", {}, "sha512-/2uqY7x6bsrpi3i9LVU6J89352C0rpMk0as8trXxCtvd4kPk1ke/Eyif6wqfSLvoNJqcDG9Vk4UsXgygzCt2xA=="], - "@eslint-community/eslint-utils/eslint-visitor-keys": ["eslint-visitor-keys@3.4.3", "", {}, "sha512-wpc+LXeiyiisxPlEkUzU6svyS1frIO3Mgxj1fdy7Pm8Ygzguax2N3Fa/D/ag1WqbOprdI+uY6wMUl8/a2G+iag=="], + "@eslint-community/eslint-plugin-eslint-comments/ignore": ["ignore@7.0.5", "", {}, "sha512-Hs59xBNfUIunMFgWAbGX5cq6893IbWg4KnrjbYwX3tx0ztorVgTDA6B2sxf8ejHJ4wz8BqGUMYlnzNBer5NvGg=="], - "@modelcontextprotocol/sdk/ajv": ["ajv@8.20.0", "", { "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", "json-schema-traverse": "^1.0.0", "require-from-string": "^2.0.2" } }, "sha512-Thbli+OlOj+iMPYFBVBfJ3OmCAnaSyNn4M1vz9T6Gka5Jt9ba/HIR56joy65tY6kx/FCF5VXNB819Y7/GUrBGA=="], + "@eslint-community/eslint-utils/eslint-visitor-keys": ["eslint-visitor-keys@3.4.3", "", {}, "sha512-wpc+LXeiyiisxPlEkUzU6svyS1frIO3Mgxj1fdy7Pm8Ygzguax2N3Fa/D/ag1WqbOprdI+uY6wMUl8/a2G+iag=="], "@stylistic/eslint-plugin/eslint-visitor-keys": ["eslint-visitor-keys@4.2.1", "", {}, "sha512-Uhdk5sfqcee/9H/rCOJikYz67o0a2Tw2hGRPOG2Y1R2dg7brRe1uG0yaNQDHu+TO/uQPF/5eCapvYSmHUjt7JQ=="], "@stylistic/eslint-plugin/espree": ["espree@10.4.0", "", { "dependencies": { "acorn": "^8.15.0", "acorn-jsx": "^5.3.2", "eslint-visitor-keys": "^4.2.1" } }, "sha512-j6PAQ2uUr79PZhBjP5C5fhl8e39FmRnOjsD5lGnWrFU8i2G776tBK7+nP8KuQUTTyAZUwfQqXAgrVH5MbH9CYQ=="], - "@vue/compiler-core/@babel/parser": ["@babel/parser@7.29.7", "", { "dependencies": { "@babel/types": "^7.29.7" }, "bin": "./bin/babel-parser.js" }, "sha512-hnORnjP/1P/zFEndoeX+n+t1RwWRJiJpM/jO7FW32Kn9r5+sJB2JWOdYo4L6k78j15eCwY3Gm/7364B1EMwtNg=="], - - "@vue/compiler-core/estree-walker": ["estree-walker@2.0.2", "", {}, "sha512-Rfkk/Mp/DL7JVje3u18FxFujQlTNR2q6QfMSMB7AvCBx91NGj/ba3kCfza0f6dVDbw7YlRf/nDrn7pQrCCyQ/w=="], - - "@vue/compiler-sfc/@babel/parser": ["@babel/parser@7.29.7", "", { "dependencies": { "@babel/types": "^7.29.7" }, "bin": "./bin/babel-parser.js" }, "sha512-hnORnjP/1P/zFEndoeX+n+t1RwWRJiJpM/jO7FW32Kn9r5+sJB2JWOdYo4L6k78j15eCwY3Gm/7364B1EMwtNg=="], - - "@vue/compiler-sfc/estree-walker": ["estree-walker@2.0.2", "", {}, "sha512-Rfkk/Mp/DL7JVje3u18FxFujQlTNR2q6QfMSMB7AvCBx91NGj/ba3kCfza0f6dVDbw7YlRf/nDrn7pQrCCyQ/w=="], - - "ajv-formats/ajv": ["ajv@8.20.0", "", { "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", "json-schema-traverse": "^1.0.0", "require-from-string": "^2.0.2" } }, "sha512-Thbli+OlOj+iMPYFBVBfJ3OmCAnaSyNn4M1vz9T6Gka5Jt9ba/HIR56joy65tY6kx/FCF5VXNB819Y7/GUrBGA=="], - - "ast-kit/@babel/parser": ["@babel/parser@8.0.0", "", { "dependencies": { "@babel/types": "^8.0.0" }, "bin": "./bin/babel-parser.js" }, "sha512-aLxAE+imI9bCcyaPrUDjBv3uSkWieifjLe0kuFOZF0zli0L6GCsTmsePnTr55adbIAgYz2zhN1vnFimCBUYcRQ=="], - - "body-parser/content-type": ["content-type@2.0.0", "", {}, "sha512-j/O/d7GcZCyNl7/hwZAb606rzqkyvaDctLmckbxLzHvFBzTJHuGEdodATcP3yIRoDrLHkIATJuvzbFlp/ki2cQ=="], + "@typescript-eslint/eslint-plugin/ignore": ["ignore@7.0.5", "", {}, "sha512-Hs59xBNfUIunMFgWAbGX5cq6893IbWg4KnrjbYwX3tx0ztorVgTDA6B2sxf8ejHJ4wz8BqGUMYlnzNBer5NvGg=="], "clean-regexp/escape-string-regexp": ["escape-string-regexp@1.0.5", "", {}, "sha512-vbRorB5FUQWvla16U8R/qgaFIya2qGzwDrNmCZuYKrbdSUMG6I1ZCGQRefkRVhuOkIGVne7BQ35DSfo1qvJqFg=="], - "eslint/ignore": ["ignore@5.3.2", "", {}, "sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g=="], - "eslint-flat-config-utils/@eslint/config-helpers": ["@eslint/config-helpers@0.5.5", "", { "dependencies": { "@eslint/core": "^1.2.1" } }, "sha512-eIJYKTCECbP/nsKaaruF6LW967mtbQbsw4JTtSVkUQc9MneSkbrgPJAbKl9nWr0ZeowV8BfsarBmPpBzGelA2w=="], "eslint-plugin-jsdoc/@es-joy/jsdoccomment": ["@es-joy/jsdoccomment@0.86.0", "", { "dependencies": { "@types/estree": "^1.0.8", "@typescript-eslint/types": "^8.58.0", "comment-parser": "1.4.6", "esquery": "^1.7.0", "jsdoc-type-pratt-parser": "~7.2.0" } }, "sha512-ukZmRQ81WiTpDWO6D/cTBM7XbrNtutHKvAVnZN/8pldAwLoJArGOvkNyxPTBGsPjsoaQBJxlH+tE2TNA/92Qgw=="], "eslint-plugin-n/globals": ["globals@15.15.0", "", {}, "sha512-7ACyT3wmyp3I61S4fG682L0VA2RGD9otkqGJIwNUMF1SWUombIIk+af1unuDYgMm082aHYwD+mzJvv9Iu8dsgg=="], - "eslint-plugin-n/ignore": ["ignore@5.3.2", "", {}, "sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g=="], - - "eslint-plugin-unicorn/@babel/helper-validator-identifier": ["@babel/helper-validator-identifier@7.29.7", "", {}, "sha512-qehxGkRj55h/ff8EMaJ+cYhyaKlHIxqYDn682wQD7RNp9UujOQsHog2uS0r2vzr4pW+sXf90NeeayjcNaX3fFg=="], - "eslint-plugin-yml/escape-string-regexp": ["escape-string-regexp@5.0.0", "", {}, "sha512-/veY75JbMK4j1yjvuUxuVsiS/hr/4iHs9FTT6cgTexxdE0Ly/glccBAkloH/DofkjRbZU3bnoj38mOmhkZ0lHw=="], - "katex/commander": ["commander@8.3.0", "", {}, "sha512-OkTL9umf+He2DZkUq8f8J9of7yL6RJKI24dVITBmNfZBmri9zYZQrKkuXiKhyfPSu8tUhnVBB1iKXevvnlR4Ww=="], - - "local-pkg/quansync": ["quansync@0.2.11", "", {}, "sha512-AifT7QEbW9Nri4tAwR5M/uzpBuqfZf+zwaEM/QkzEjj7NBuFD2rBuy0K3dE+8wltbezDV7JMA0WfnCPYRSYbXA=="], - "mdast-util-find-and-replace/escape-string-regexp": ["escape-string-regexp@5.0.0", "", {}, "sha512-/veY75JbMK4j1yjvuUxuVsiS/hr/4iHs9FTT6cgTexxdE0Ly/glccBAkloH/DofkjRbZU3bnoj38mOmhkZ0lHw=="], "mdast-util-frontmatter/escape-string-regexp": ["escape-string-regexp@5.0.0", "", {}, "sha512-/veY75JbMK4j1yjvuUxuVsiS/hr/4iHs9FTT6cgTexxdE0Ly/glccBAkloH/DofkjRbZU3bnoj38mOmhkZ0lHw=="], "mlly/pkg-types": ["pkg-types@1.3.1", "", { "dependencies": { "confbox": "^0.1.8", "mlly": "^1.7.4", "pathe": "^2.0.1" } }, "sha512-/Jm5M4RvtBFVkKWRu2BLUTNP8/M2a+UwuAX+ae4770q1qVGtfjG+WTCupoZixokjmHiry8uI+dlY8KXYV5HVVQ=="], - "onnxruntime-web/onnxruntime-common": ["onnxruntime-common@1.24.0-dev.20251116-b39e144322", "", {}, "sha512-BOoomdHYmNRL5r4iQ4bMvsl2t0/hzVQ3OM3PHD0gxeXu1PmggqBv3puZicEUVOA3AtHHYmqZtjMj9FOfGrATTw=="], - "spdx-correct/spdx-expression-parse": ["spdx-expression-parse@3.0.1", "", { "dependencies": { "spdx-exceptions": "^2.1.0", "spdx-license-ids": "^3.0.0" } }, "sha512-cbqHunsQWnJNE6KhVSMsMeH5H/L9EpymbzqTQ3uLwNCLZ1Q481oWaofqH7nO6V07xlXwY6PhQdQ2IedWx/ZK4Q=="], - "type-is/content-type": ["content-type@2.0.0", "", {}, "sha512-j/O/d7GcZCyNl7/hwZAb606rzqkyvaDctLmckbxLzHvFBzTJHuGEdodATcP3yIRoDrLHkIATJuvzbFlp/ki2cQ=="], - "validate-npm-package-license/spdx-expression-parse": ["spdx-expression-parse@3.0.1", "", { "dependencies": { "spdx-exceptions": "^2.1.0", "spdx-license-ids": "^3.0.0" } }, "sha512-cbqHunsQWnJNE6KhVSMsMeH5H/L9EpymbzqTQ3uLwNCLZ1Q481oWaofqH7nO6V07xlXwY6PhQdQ2IedWx/ZK4Q=="], - "@modelcontextprotocol/sdk/ajv/json-schema-traverse": ["json-schema-traverse@1.0.0", "", {}, "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug=="], - - "@vue/compiler-core/@babel/parser/@babel/types": ["@babel/types@7.29.7", "", { "dependencies": { "@babel/helper-string-parser": "^7.29.7", "@babel/helper-validator-identifier": "^7.29.7" } }, "sha512-4zBIxpPzowiZpusoFkyGVwakdRJUyuH5PxQ/PrqghfdFWWasvnCdPfQXHrenDai+gyLARulZjZowCOj6fjT4pA=="], - - "@vue/compiler-sfc/@babel/parser/@babel/types": ["@babel/types@7.29.7", "", { "dependencies": { "@babel/helper-string-parser": "^7.29.7", "@babel/helper-validator-identifier": "^7.29.7" } }, "sha512-4zBIxpPzowiZpusoFkyGVwakdRJUyuH5PxQ/PrqghfdFWWasvnCdPfQXHrenDai+gyLARulZjZowCOj6fjT4pA=="], - - "ajv-formats/ajv/json-schema-traverse": ["json-schema-traverse@1.0.0", "", {}, "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug=="], - - "ast-kit/@babel/parser/@babel/types": ["@babel/types@8.0.0", "", { "dependencies": { "@babel/helper-string-parser": "^8.0.0", "@babel/helper-validator-identifier": "^8.0.0" } }, "sha512-K8ponJDxBwDHigkeFqaqT5wLGl4bTlwMafR8k7b5CPxr6Ww+UG9ls8Yx6Tcpboxu97eeGVEEyKcHmEyOwN1vSw=="], - "mlly/pkg-types/confbox": ["confbox@0.1.8", "", {}, "sha512-RMtmw0iFkeR4YV+fUOSucriAQNb9g8zFR52MWCtl+cCZOFRNL6zeB395vPzFhEjjn4fMxXudmELnl/KF/WrK6w=="], - - "@vue/compiler-core/@babel/parser/@babel/types/@babel/helper-string-parser": ["@babel/helper-string-parser@7.29.7", "", {}, "sha512-Pb5ijPrZ89GDH8223L4UP8i6QApWxs04RbPQJTeWDV0/keR2E36MeKnyr6LYmUUvqRRI+Iv87SuF1W6ErINzYw=="], - - "@vue/compiler-core/@babel/parser/@babel/types/@babel/helper-validator-identifier": ["@babel/helper-validator-identifier@7.29.7", "", {}, "sha512-qehxGkRj55h/ff8EMaJ+cYhyaKlHIxqYDn682wQD7RNp9UujOQsHog2uS0r2vzr4pW+sXf90NeeayjcNaX3fFg=="], - - "@vue/compiler-sfc/@babel/parser/@babel/types/@babel/helper-string-parser": ["@babel/helper-string-parser@7.29.7", "", {}, "sha512-Pb5ijPrZ89GDH8223L4UP8i6QApWxs04RbPQJTeWDV0/keR2E36MeKnyr6LYmUUvqRRI+Iv87SuF1W6ErINzYw=="], - - "@vue/compiler-sfc/@babel/parser/@babel/types/@babel/helper-validator-identifier": ["@babel/helper-validator-identifier@7.29.7", "", {}, "sha512-qehxGkRj55h/ff8EMaJ+cYhyaKlHIxqYDn682wQD7RNp9UujOQsHog2uS0r2vzr4pW+sXf90NeeayjcNaX3fFg=="], - - "ast-kit/@babel/parser/@babel/types/@babel/helper-validator-identifier": ["@babel/helper-validator-identifier@8.0.0", "", {}, "sha512-kXxQVZHNOctSJJsqzmcbPSCEkM6oHNnDIkua7g9RCO9xRHj2eCiKvRx2KPdfWR9QxcGWnK/oArrtunmie3rL9g=="], } } diff --git a/crates/csp-node/.gitignore b/crates/csp-node/.gitignore new file mode 100644 index 0000000..7f47b5a --- /dev/null +++ b/crates/csp-node/.gitignore @@ -0,0 +1,9 @@ +# napi-rs build artifacts +*.node +index.js + +# Generated per-platform packages (created by `napi prepublish` / `napi artifacts`) +npm/ + +# Node tooling +node_modules/ diff --git a/crates/csp-node/Cargo.toml b/crates/csp-node/Cargo.toml new file mode 100644 index 0000000..2677ac4 --- /dev/null +++ b/crates/csp-node/Cargo.toml @@ -0,0 +1,29 @@ +# napi-rs native Node bindings — the in-process JS SDK for csp. +# +# This crate is the SDK distribution channel (decision A): an in-process native +# addon over `crates/csp`, shipped as the `@pleaseai/csp-sdk` npm package. It is +# DISTINCT from the CLI/MCP launcher under `npm/`, which execs the standalone +# `csp` binary (and preserves the no-runtime Homebrew story). Not published to +# crates.io — the artifact is the `.node` addon, not a Rust library. +[package] +name = "csp-node" +description = "napi-rs native Node bindings for csp (the @pleaseai/csp-sdk in-process SDK)." +version.workspace = true +edition.workspace = true +license.workspace = true +repository.workspace = true +authors.workspace = true +publish = false + +[lib] +# `cdylib` produces the `.node` addon loaded by Node; `lib` (rlib) keeps the +# crate linkable for `cargo test --workspace` and doctests. +crate-type = ["cdylib", "lib"] + +[dependencies] +csp = { workspace = true } +napi = { workspace = true } +napi-derive = { workspace = true } + +[build-dependencies] +napi-build = { workspace = true } diff --git a/crates/csp-node/README.md b/crates/csp-node/README.md new file mode 100644 index 0000000..6a16ed2 --- /dev/null +++ b/crates/csp-node/README.md @@ -0,0 +1,59 @@ +# @pleaseai/csp-sdk + +In-process native (napi-rs) SDK for [csp](https://github.com/pleaseai/code-search) — fast, accurate hybrid code search for agents. + +This package binds the Rust `csp` core (`crates/csp`) **directly** through a Node-API addon, so JS callers run search in-process: no subprocess, no JSON round-trip, native objects back. + +> **This is the library channel.** The `csp` **CLI + MCP server** ship separately as [`@pleaseai/csp`](../../npm) — a thin launcher that execs the standalone Rust binary (and the Homebrew formula, which needs no Node runtime). Use this SDK when you want to embed search in a Node/Bun program; use `@pleaseai/csp` to run the CLI or an MCP server. Both are built from the same `crates/csp` core (decision A in the repo `CLAUDE.md`). + +## Usage + +```ts +import { CspIndex, ContentType } from '@pleaseai/csp-sdk' + +// Build entrypoints run on a worker thread and return a Promise. +const index = await CspIndex.fromPath('./my-project', { content: [ContentType.Code] }) + +for (const { chunk, score } of index.search('parse config file', { topK: 5 })) { + console.log(score.toFixed(3), chunk.location) +} + +// Persist (sync) / reload (async) +index.save('./.csp-index') +const reloaded = await CspIndex.loadFromDisk('./.csp-index') + +// Related chunks (sync, per-query) +const [top] = reloaded.search('auth middleware') +reloaded.findRelated(top.chunk, { topK: 10 }) +``` + +`fromPath` / `fromGit` / `loadFromDisk` are async (`Promise`) — the heavy indexing / clone / disk read runs off the Node event loop. `search` / `findRelated` / `save` / `stats` are synchronous. + +The full type surface is in [`index.d.ts`](./index.d.ts). + +## Develop + +This crate (`csp-node`) is a member of the workspace Cargo build. The Rust side compiles like any crate: + +```bash +cargo build -p csp-node # compile the bindings +cargo clippy -p csp-node --all-targets -- -D warnings +``` + +The `.node` addon + the JS loader (`index.js`) are produced by `@napi-rs/cli`, which reads `package.json` and the colocated `Cargo.toml`: + +```bash +cd crates/csp-node +bun install # @napi-rs/cli +bun run build # napi build --platform --release → csp-sdk..node + index.js +``` + +`index.d.ts` is committed as the authoritative type surface; `napi build` regenerates it with the same shape. The compiled `*.node`, generated `index.js`, and per-platform `npm/` packages are gitignored. + +## Publish + +Cross-compilation targets mirror the binary release (`release-rust.yml`): darwin arm64/x64, linux x64/arm64-gnu + x64-musl, win32 x64. Publishing follows the standard napi-rs flow (build per-target on CI → `napi artifacts` → `napi prepublish -t npm`), with the version kept in lockstep by release-please (`crates/csp-node/package.json#version` is an `extra-files` entry). + +## CI / release + +Cross-compilation + publish runs in [`release-sdk.yml`](../../.github/workflows/release-sdk.yml): a per-target build matrix produces `csp-sdk..node` artifacts, then a publish job runs `napi prepublish -t npm` to push the wrapper + per-platform packages via npm Trusted Publishing (OIDC). It triggers on the `@pleaseai/csp-sdk` release tag (or manual dispatch); the version stays in lockstep via release-please (`extra-files`). diff --git a/crates/csp-node/build.rs b/crates/csp-node/build.rs new file mode 100644 index 0000000..e13788a --- /dev/null +++ b/crates/csp-node/build.rs @@ -0,0 +1,6 @@ +// napi-rs codegen + linker setup. Configures the platform-specific link flags +// (e.g. `-undefined dynamic_lookup` on macOS) so the cdylib resolves Node-API +// symbols at load time, and emits the binding registration glue. +fn main() { + napi_build::setup(); +} diff --git a/crates/csp-node/index.d.ts b/crates/csp-node/index.d.ts new file mode 100644 index 0000000..dd1b522 --- /dev/null +++ b/crates/csp-node/index.d.ts @@ -0,0 +1,65 @@ +/* tslint:disable */ +/* eslint-disable */ + +/* auto-generated by NAPI-RS */ +/* + * Committed as the authoritative type surface for @pleaseai/csp-sdk. `napi + * build` regenerates this file from crates/csp-node/src/lib.rs; keep the two in + * sync (a build overwrites this with the same shape). + */ + +export const enum ContentType { + Code = 0, + Docs = 1, + Config = 2 +} +export interface LoadOptions { + /** Path to a Model2Vec model directory; omit to use the bundled default. */ + modelPath?: string + /** Content types to index; omit for the default set. */ + content?: Array +} +export interface QueryOptions { + /** Maximum number of results to return. */ + topK?: number + /** Restrict results to these languages. */ + filterLanguages?: Array + /** Restrict results to chunks whose path matches one of these substrings. */ + filterPaths?: Array +} +export interface Chunk { + content: string + filePath: string + startLine: number + endLine: number + language?: string + /** `filePath:startLine-endLine`. */ + location: string +} +export interface SearchResult { + chunk: Chunk + score: number +} +export interface IndexStats { + indexedFiles: number + totalChunks: number + /** language → chunk count. */ + languages: Record +} +/** Hybrid (dense + BM25) code-search index. */ +export class CspIndex { + /** Build an index from a local directory. Runs on a worker thread. */ + static fromPath(path: string, options?: LoadOptions | undefined | null): Promise + /** Build an index from a remote git URL (shallow clone into a temp dir). Runs on a worker thread. */ + static fromGit(url: string, options?: LoadOptions | undefined | null, gitRef?: string | undefined | null): Promise + /** Load an index previously persisted with `save`. Runs on a worker thread. */ + static loadFromDisk(dir: string): Promise + /** Hybrid search over the indexed chunks. */ + search(query: string, options?: QueryOptions | undefined | null): Array + /** Find chunks similar to a seed chunk, excluding the seed itself. */ + findRelated(seed: Chunk, options?: QueryOptions | undefined | null): Array + /** Persist the index to a directory. */ + save(dir: string, contentHash?: string | undefined | null): void + /** Aggregate index statistics. */ + stats(): IndexStats +} diff --git a/crates/csp-node/package.json b/crates/csp-node/package.json new file mode 100644 index 0000000..2c18cb9 --- /dev/null +++ b/crates/csp-node/package.json @@ -0,0 +1,64 @@ +{ + "name": "@pleaseai/csp-sdk", + "version": "0.1.4", + "description": "In-process native (napi-rs) SDK for csp — fast, accurate hybrid code search for agents. Binds the Rust core directly (no subprocess). The CLI/MCP server ships separately as @pleaseai/csp.", + "license": "MIT", + "homepage": "https://github.com/pleaseai/code-search", + "repository": { + "type": "git", + "url": "https://github.com/pleaseai/code-search.git" + }, + "bugs": { + "url": "https://github.com/pleaseai/code-search/issues" + }, + "keywords": [ + "code-search", + "hybrid-search", + "semantic-search", + "bm25", + "embeddings", + "agent", + "rag", + "napi-rs", + "native" + ], + "main": "index.js", + "types": "index.d.ts", + "type": "commonjs", + "napi": { + "name": "csp-sdk", + "triples": { + "defaults": true, + "additional": [ + "aarch64-apple-darwin", + "aarch64-unknown-linux-gnu", + "x86_64-unknown-linux-musl" + ] + } + }, + "files": [ + "index.js", + "index.d.ts" + ], + "engines": { + "node": ">=22" + }, + "scripts": { + "build": "napi build --platform --release", + "build:debug": "napi build --platform", + "artifacts": "napi artifacts", + "prepublishOnly": "napi prepublish -t npm", + "version": "napi version" + }, + "devDependencies": { + "@napi-rs/cli": "^2.18.4" + }, + "optionalDependencies": { + "@pleaseai/csp-sdk-darwin-arm64": "0.0.0", + "@pleaseai/csp-sdk-darwin-x64": "0.0.0", + "@pleaseai/csp-sdk-linux-arm64-gnu": "0.0.0", + "@pleaseai/csp-sdk-linux-x64-gnu": "0.0.0", + "@pleaseai/csp-sdk-linux-x64-musl": "0.0.0", + "@pleaseai/csp-sdk-win32-x64-msvc": "0.0.0" + } +} diff --git a/crates/csp-node/src/lib.rs b/crates/csp-node/src/lib.rs new file mode 100644 index 0000000..2f0d0f2 --- /dev/null +++ b/crates/csp-node/src/lib.rs @@ -0,0 +1,318 @@ +//! napi-rs native bindings — the in-process JS SDK for csp (`@pleaseai/csp-sdk`). +//! +//! This is the **library** distribution channel: it binds the `crates/csp` core +//! directly so JS callers run hybrid code search in-process (no subprocess, no +//! JSON round-trip), returning native objects. It is separate from the CLI/MCP +//! launcher under `npm/`, which execs the standalone `csp` binary. +//! +//! The public surface mirrors the README library contract: a `CspIndex` class +//! with `fromPath` / `fromGit` / `loadFromDisk` factories and `search` / +//! `findRelated` / `save` / `stats` methods, over camelCase `Chunk` / +//! `SearchResult` shapes. napi-rs converts Rust `snake_case` identifiers to JS +//! `camelCase` automatically (so `file_path` → `filePath`, `from_path` → +//! `fromPath`). +//! +//! The build entrypoints (`fromPath` / `fromGit` / `loadFromDisk`) do heavy work +//! — file walking, embedding, and (for git) a network clone — so they run on the +//! libuv worker pool via napi [`AsyncTask`] and resolve a `Promise`; they do NOT +//! block the Node event loop. The per-query methods (`search` / `findRelated` / +//! `stats`) and `save` stay synchronous: they are fast and/or borrow `&self`, +//! which cannot cross to a worker thread without cloning the whole index. + +use std::collections::HashMap; +use std::path::Path; +use std::sync::Arc; + +use napi::bindgen_prelude::*; +use napi::Task; +use napi_derive::napi; + +use csp::indexing::index::{ + CspIndex as CoreIndex, LoadOptions as CoreLoadOptions, QueryOptions as CoreQueryOptions, +}; +use csp::search::SearchResult as CoreSearchResult; +use csp::types::{chunk_location, Chunk as CoreChunk, ContentType as CoreContentType}; + +/// Content type for indexing / search-pipeline selection. Mirrors the README +/// `ContentType` enum (`Code | Docs | Config`). +#[napi] +pub enum ContentType { + Code, + Docs, + Config, +} + +/// Build/load options shared by `fromPath` / `fromGit`. +#[napi(object)] +pub struct LoadOptions { + /// Path to a Model2Vec model directory; omit to use the bundled default. + pub model_path: Option, + /// Content types to index; omit for the default set. + pub content: Option>, +} + +/// Query options for `search` / `findRelated`. +#[napi(object)] +pub struct QueryOptions { + /// Maximum number of results to return. + pub top_k: Option, + /// Restrict results to these languages. + pub filter_languages: Option>, + /// Restrict results to chunks whose path matches one of these substrings. + pub filter_paths: Option>, +} + +/// A single indexable unit of code (camelCase JS shape, with derived `location`). +#[napi(object)] +pub struct Chunk { + pub content: String, + pub file_path: String, + pub start_line: u32, + pub end_line: u32, + pub language: Option, + /// `filePath:startLine-endLine`. + pub location: String, +} + +/// A scored search result. +#[napi(object)] +pub struct SearchResult { + pub chunk: Chunk, + pub score: f64, +} + +/// Aggregate index statistics. +#[napi(object)] +pub struct IndexStats { + pub indexed_files: u32, + pub total_chunks: u32, + /// language → chunk count. + pub languages: HashMap, +} + +/// Hybrid (dense + BM25) code-search index. +#[napi(js_name = "CspIndex")] +pub struct CspIndex { + // `Arc` so a future async `search` / `find_related` can hand a cheap shared + // handle to a libuv worker thread instead of cloning the whole index. + inner: Arc, +} + +#[napi] +impl CspIndex { + /// Build an index from a local directory. Resolves a `Promise` + /// (the file walk + embedding runs on a worker thread). + #[napi(ts_return_type = "Promise")] + pub fn from_path(path: String, options: Option) -> AsyncTask { + AsyncTask::new(BuildFromPath { path, options }) + } + + /// Build an index from a remote git URL (shallow clone into a temp dir). + /// Resolves a `Promise` (clone + build runs on a worker thread). + #[napi(ts_return_type = "Promise")] + pub fn from_git( + url: String, + options: Option, + git_ref: Option, + ) -> AsyncTask { + AsyncTask::new(BuildFromGit { + url, + options, + git_ref, + }) + } + + /// Load an index previously persisted with `save`. Resolves a + /// `Promise` (disk read runs on a worker thread). + #[napi(ts_return_type = "Promise")] + pub fn load_from_disk(dir: String) -> AsyncTask { + AsyncTask::new(LoadFromDisk { dir }) + } + + /// Hybrid search over the indexed chunks. + #[napi] + pub fn search(&self, query: String, options: Option) -> Vec { + self.inner + .search(&query, &to_core_query_options(options)) + .iter() + .map(to_js_result) + .collect() + } + + /// Find chunks similar to a seed chunk, excluding the seed itself. + #[napi] + pub fn find_related(&self, seed: Chunk, options: Option) -> Vec { + self.inner + .find_related(&to_core_chunk(seed), &to_core_query_options(options)) + .iter() + .map(to_js_result) + .collect() + } + + /// Persist the index to a directory. + #[napi] + pub fn save(&self, dir: String, content_hash: Option) -> Result<()> { + self.inner + .save(Path::new(&dir), content_hash.as_deref()) + .map_err(to_napi_err) + } + + /// Aggregate index statistics. + #[napi] + pub fn stats(&self) -> IndexStats { + let s = self.inner.stats(); + IndexStats { + indexed_files: s.indexed_files as u32, + total_chunks: s.total_chunks as u32, + languages: s + .languages + .into_iter() + .map(|(lang, count)| (lang, count as u32)) + .collect(), + } + } +} + +// --- async build tasks (run on the libuv worker pool) --- +// +// Each holds owned inputs, runs the blocking core call in `compute`, and wraps +// the resulting core index in the JS `CspIndex` class in `resolve`. `options` +// is `take`n in `compute` (it runs once), avoiding a needless clone. + +/// Backs `CspIndex.fromPath`. +pub struct BuildFromPath { + path: String, + options: Option, +} + +impl Task for BuildFromPath { + type Output = CoreIndex; + type JsValue = CspIndex; + + fn compute(&mut self) -> Result { + CoreIndex::from_path( + Path::new(&self.path), + &to_core_load_options(self.options.take()), + ) + .map_err(to_napi_err) + } + + fn resolve(&mut self, _env: Env, output: Self::Output) -> Result { + Ok(CspIndex { + inner: Arc::new(output), + }) + } +} + +/// Backs `CspIndex.fromGit`. +pub struct BuildFromGit { + url: String, + options: Option, + git_ref: Option, +} + +impl Task for BuildFromGit { + type Output = CoreIndex; + type JsValue = CspIndex; + + fn compute(&mut self) -> Result { + CoreIndex::from_git( + &self.url, + &to_core_load_options(self.options.take()), + self.git_ref.as_deref(), + ) + .map_err(to_napi_err) + } + + fn resolve(&mut self, _env: Env, output: Self::Output) -> Result { + Ok(CspIndex { + inner: Arc::new(output), + }) + } +} + +/// Backs `CspIndex.loadFromDisk`. +pub struct LoadFromDisk { + dir: String, +} + +impl Task for LoadFromDisk { + type Output = CoreIndex; + type JsValue = CspIndex; + + fn compute(&mut self) -> Result { + CoreIndex::load_from_disk(Path::new(&self.dir)).map_err(to_napi_err) + } + + fn resolve(&mut self, _env: Env, output: Self::Output) -> Result { + Ok(CspIndex { + inner: Arc::new(output), + }) + } +} + +// --- conversions between the JS-facing and core types --- + +fn to_napi_err(message: String) -> Error { + Error::from_reason(message) +} + +fn to_core_content(content: &ContentType) -> CoreContentType { + match content { + ContentType::Code => CoreContentType::Code, + ContentType::Docs => CoreContentType::Docs, + ContentType::Config => CoreContentType::Config, + } +} + +fn to_core_load_options(options: Option) -> CoreLoadOptions { + match options { + None => CoreLoadOptions::default(), + Some(o) => CoreLoadOptions { + model_path: o.model_path, + content: o + .content + .map(|types| types.iter().map(to_core_content).collect()), + }, + } +} + +fn to_core_query_options(options: Option) -> CoreQueryOptions { + match options { + None => CoreQueryOptions::default(), + Some(o) => CoreQueryOptions { + top_k: o.top_k.map(|n| n as usize), + filter_languages: o.filter_languages, + filter_paths: o.filter_paths, + }, + } +} + +fn to_core_chunk(chunk: Chunk) -> CoreChunk { + // `location` is derived; never trusted on the way in. + CoreChunk { + content: chunk.content, + file_path: chunk.file_path, + start_line: chunk.start_line, + end_line: chunk.end_line, + language: chunk.language, + } +} + +fn to_js_chunk(chunk: &CoreChunk) -> Chunk { + Chunk { + content: chunk.content.clone(), + file_path: chunk.file_path.clone(), + start_line: chunk.start_line, + end_line: chunk.end_line, + language: chunk.language.clone(), + location: chunk_location(chunk), + } +} + +fn to_js_result(result: &CoreSearchResult) -> SearchResult { + SearchResult { + chunk: to_js_chunk(&result.chunk), + score: result.score, + } +} diff --git a/package.json b/package.json index 7ab990e..dbfe2fb 100644 --- a/package.json +++ b/package.json @@ -4,7 +4,7 @@ "version": "0.1.4", "private": true, "packageManager": "bun@1.3.14", - "description": "Fast and Accurate Code Search for Agents — TypeScript port of MinishLab/semble", + "description": "Fast and accurate hybrid code search for agents — Rust port of MinishLab/semble. This manifest holds repo JS tooling and the release-please version anchor; the published artifacts are the Rust binary (Homebrew) and its npm wrapper under npm/.", "author": "Minsu Lee ", "license": "MIT", "homepage": "https://github.com/pleaseai/code-search", @@ -26,47 +26,20 @@ "rag", "tree-sitter" ], - "exports": { - ".": { - "types": "./dist/index.d.mts", - "import": "./dist/index.mjs" - } - }, - "main": "./dist/index.mjs", - "types": "./dist/index.d.mts", - "bin": { - "csp": "./dist/cli.mjs" - }, - "files": [ - "dist" - ], "engines": { "node": ">=22.0.0", "bun": ">=1.3.10" }, "scripts": { - "build": "tsdown", - "dev": "tsdown --watch", "typecheck": "tsc --noEmit", "lint": "eslint . --cache", - "lint:fix": "eslint . --fix --cache", - "test": "bun test", - "prepublishOnly": "bun run build" - }, - "dependencies": { - "@huggingface/transformers": "^4.2.0", - "@kreuzberg/tree-sitter-language-pack": "^1.8.1", - "@modelcontextprotocol/sdk": "^1.29.0", - "chokidar": "^5.0.0", - "commander": "^14.0.3", - "ignore": "^7.0.5" + "lint:fix": "eslint . --fix --cache" }, "devDependencies": { "@pleaseai/eslint-config": "^0.0.4", "@types/bun": "latest", "eslint": "^10.0.3", "jiti": "^2.7.0", - "tsdown": "^0.21.5", "typescript": "^6.0.2" } } diff --git a/release-please-config.json b/release-please-config.json index 7a3d138..d8158a1 100644 --- a/release-please-config.json +++ b/release-please-config.json @@ -29,12 +29,13 @@ "jsonpath": "$.version" }, { - "type": "generic", - "path": "Cargo.toml" + "type": "json", + "path": "crates/csp-node/package.json", + "jsonpath": "$.version" }, { "type": "generic", - "path": "src/version.ts" + "path": "Cargo.toml" } ] } diff --git a/src/agents/antigravity.md b/src/agents/antigravity.md deleted file mode 100644 index adaaeec..0000000 --- a/src/agents/antigravity.md +++ /dev/null @@ -1,58 +0,0 @@ ---- -name: csp-search -description: Code search agent for exploring any codebase. Use for finding code by intent, locating implementations, understanding how something works, or discovering related code. Prefer over run_shell_command/read_file for any semantic or exploratory question. -tools: - - run_shell_command - - read_file ---- - -Use `csp search` to find code by describing what it does or naming a symbol/identifier, instead of grep: - -```bash -csp search "authentication flow" ./my-project -csp search "save_pretrained" ./my-project -csp search "save model to disk" ./my-project --top-k 10 -``` - -If you anticipate doing more than one search, use `csp index` to create an index. - -```bash -csp index ./my-project -o my_index -``` - -You can then reuse this index later on: - -```bash -csp search "save_pretrained" --index my_index -``` - -An index is not automatically updated, so if the code changes significantly, reindex. If you notice stale results while resolving searches to files, reindex. - -Use `--content docs` to search documentation and prose, `--content config` for config files (yaml, toml, etc.), or `--content all` to search code, docs, and config: - -```bash -csp search "deployment guide" ./my-project --content docs -csp search "database host port" ./my-project --content config -csp search "authentication" ./my-project --content all -``` - -Use `csp find-related` to discover code similar to a known location (pass `filePath` and `line` from a prior search result): - -```bash -csp find-related src/auth.ts 42 ./my-project -``` - -Like search, `find-related` also accepts an `--index` argument. - -`path` defaults to the current directory when omitted; git URLs are accepted. - -If `csp` is not on `$PATH`, use `bunx @pleaseai/csp` in its place. - -### Workflow - -1. Index the repo using `csp index -o cached_index`. -2. Start with `csp search` to find relevant chunks. Pass the index to achieve results faster. -3. Use `--content docs` for documentation, `--content config` for config files, or `--content all` for everything. -4. Inspect full files only when the returned chunk does not give enough context. -5. Optionally use `csp find-related` with a promising result's `filePath` and `line` to discover related implementations. -6. Use grep only when you need exhaustive literal matches or quick confirmation of an exact string. \ No newline at end of file diff --git a/src/agents/claude.md b/src/agents/claude.md deleted file mode 100644 index 238afdd..0000000 --- a/src/agents/claude.md +++ /dev/null @@ -1,56 +0,0 @@ ---- -name: csp-search -description: Code search agent for exploring any codebase. Use for finding code by intent, locating implementations, understanding how something works, or discovering related code. Prefer over Grep/Glob/Read for any semantic or exploratory question. -tools: Bash, Read ---- - -Use `csp search` to find code by describing what it does or naming a symbol/identifier, instead of grep: - -```bash -csp search "authentication flow" ./my-project -csp search "save_pretrained" ./my-project -csp search "save model to disk" ./my-project --top-k 10 -``` - -If you anticipate doing more than one search, use `csp index` to create an index. - -```bash -csp index ./my-project -o my_index -``` - -You can then reuse this index later on: - -```bash -csp search "save_pretrained" --index my_index -``` - -An index is not automatically updated, so if the code changes significantly, reindex. If you notice stale results while resolving searches to files, reindex. - -Use `--content docs` to search documentation and prose, `--content config` for config files (yaml, toml, etc.), or `--content all` to search code, docs, and config: - -```bash -csp search "deployment guide" ./my-project --content docs -csp search "database host port" ./my-project --content config -csp search "authentication" ./my-project --content all -``` - -Use `csp find-related` to discover code similar to a known location (pass `filePath` and `line` from a prior search result): - -```bash -csp find-related src/auth.ts 42 ./my-project -``` - -Like search, `find-related` also accepts an `--index` argument. - -`path` defaults to the current directory when omitted; git URLs are accepted. - -If `csp` is not on `$PATH`, use `bunx @pleaseai/csp` in its place. - -### Workflow - -1. Index the repo using `csp index -o cached_index`. -2. Start with `csp search` to find relevant chunks. Pass the index to achieve results faster. -3. Use `--content docs` for documentation, `--content config` for config files, or `--content all` for everything. -4. Inspect full files only when the returned chunk does not give enough context. -5. Optionally use `csp find-related` with a promising result's `filePath` and `line` to discover related implementations. -6. Use grep only when you need exhaustive literal matches or quick confirmation of an exact string. diff --git a/src/agents/commandcode.md b/src/agents/commandcode.md deleted file mode 100644 index aa008b7..0000000 --- a/src/agents/commandcode.md +++ /dev/null @@ -1,56 +0,0 @@ ---- -name: csp-search -description: Code search agent for exploring any codebase. Use for finding code by intent, locating implementations, understanding how something works, or discovering related code. Prefer over bash/read_file for any semantic or exploratory question. -tools: bash, read_file ---- - -Use `csp search` to find code by describing what it does or naming a symbol/identifier, instead of grep: - -```bash -csp search "authentication flow" ./my-project -csp search "save_pretrained" ./my-project -csp search "save model to disk" ./my-project --top-k 10 -``` - -If you anticipate doing more than one search, use `csp index` to create an index. - -```bash -csp index ./my-project -o my_index -``` - -You can then reuse this index later on: - -```bash -csp search "save_pretrained" --index my_index -``` - -An index is not automatically updated, so if the code changes significantly, reindex. If you notice stale results while resolving searches to files, reindex. - -Use `--content docs` to search documentation and prose, `--content config` for config files (yaml, toml, etc.), or `--content all` to search code, docs, and config: - -```bash -csp search "deployment guide" ./my-project --content docs -csp search "database host port" ./my-project --content config -csp search "authentication" ./my-project --content all -``` - -Use `csp find-related` to discover code similar to a known location (pass `filePath` and `line` from a prior search result): - -```bash -csp find-related src/auth.ts 42 ./my-project -``` - -Like search, `find-related` also accepts an `--index` argument. - -`path` defaults to the current directory when omitted; git URLs are accepted. - -If `csp` is not on `$PATH`, use `bunx @pleaseai/csp` in its place. - -### Workflow - -1. Index the repo using `csp index -o cached_index`. -2. Start with `csp search` to find relevant chunks. Pass the index to achieve results faster. -3. Use `--content docs` for documentation, `--content config` for config files, or `--content all` for everything. -4. Inspect full files only when the returned chunk does not give enough context. -5. Optionally use `csp find-related` with a promising result's `filePath` and `line` to discover related implementations. -6. Use grep only when you need exhaustive literal matches or quick confirmation of an exact string. \ No newline at end of file diff --git a/src/agents/copilot.md b/src/agents/copilot.md deleted file mode 100644 index 238afdd..0000000 --- a/src/agents/copilot.md +++ /dev/null @@ -1,56 +0,0 @@ ---- -name: csp-search -description: Code search agent for exploring any codebase. Use for finding code by intent, locating implementations, understanding how something works, or discovering related code. Prefer over Grep/Glob/Read for any semantic or exploratory question. -tools: Bash, Read ---- - -Use `csp search` to find code by describing what it does or naming a symbol/identifier, instead of grep: - -```bash -csp search "authentication flow" ./my-project -csp search "save_pretrained" ./my-project -csp search "save model to disk" ./my-project --top-k 10 -``` - -If you anticipate doing more than one search, use `csp index` to create an index. - -```bash -csp index ./my-project -o my_index -``` - -You can then reuse this index later on: - -```bash -csp search "save_pretrained" --index my_index -``` - -An index is not automatically updated, so if the code changes significantly, reindex. If you notice stale results while resolving searches to files, reindex. - -Use `--content docs` to search documentation and prose, `--content config` for config files (yaml, toml, etc.), or `--content all` to search code, docs, and config: - -```bash -csp search "deployment guide" ./my-project --content docs -csp search "database host port" ./my-project --content config -csp search "authentication" ./my-project --content all -``` - -Use `csp find-related` to discover code similar to a known location (pass `filePath` and `line` from a prior search result): - -```bash -csp find-related src/auth.ts 42 ./my-project -``` - -Like search, `find-related` also accepts an `--index` argument. - -`path` defaults to the current directory when omitted; git URLs are accepted. - -If `csp` is not on `$PATH`, use `bunx @pleaseai/csp` in its place. - -### Workflow - -1. Index the repo using `csp index -o cached_index`. -2. Start with `csp search` to find relevant chunks. Pass the index to achieve results faster. -3. Use `--content docs` for documentation, `--content config` for config files, or `--content all` for everything. -4. Inspect full files only when the returned chunk does not give enough context. -5. Optionally use `csp find-related` with a promising result's `filePath` and `line` to discover related implementations. -6. Use grep only when you need exhaustive literal matches or quick confirmation of an exact string. diff --git a/src/agents/cursor.md b/src/agents/cursor.md deleted file mode 100644 index 23e85d9..0000000 --- a/src/agents/cursor.md +++ /dev/null @@ -1,55 +0,0 @@ ---- -name: csp-search -description: Code search agent for exploring any codebase. Use for finding code by intent, locating implementations, understanding how something works, or discovering related code. Prefer over Bash/Read for any semantic or exploratory question. ---- - -Use `csp search` to find code by describing what it does or naming a symbol/identifier, instead of grep: - -```bash -csp search "authentication flow" ./my-project -csp search "save_pretrained" ./my-project -csp search "save model to disk" ./my-project --top-k 10 -``` - -If you anticipate doing more than one search, use `csp index` to create an index. - -```bash -csp index ./my-project -o my_index -``` - -You can then reuse this index later on: - -```bash -csp search "save_pretrained" --index my_index -``` - -An index is not automatically updated, so if the code changes significantly, reindex. If you notice stale results while resolving searches to files, reindex. - -Use `--content docs` to search documentation and prose, `--content config` for config files (yaml, toml, etc.), or `--content all` to search code, docs, and config: - -```bash -csp search "deployment guide" ./my-project --content docs -csp search "database host port" ./my-project --content config -csp search "authentication" ./my-project --content all -``` - -Use `csp find-related` to discover code similar to a known location (pass `filePath` and `line` from a prior search result): - -```bash -csp find-related src/auth.ts 42 ./my-project -``` - -Like search, `find-related` also accepts an `--index` argument. - -`path` defaults to the current directory when omitted; git URLs are accepted. - -If `csp` is not on `$PATH`, use `bunx @pleaseai/csp` in its place. - -### Workflow - -1. Index the repo using `csp index -o cached_index`. -2. Start with `csp search` to find relevant chunks. Pass the index to achieve results faster. -3. Use `--content docs` for documentation, `--content config` for config files, or `--content all` for everything. -4. Inspect full files only when the returned chunk does not give enough context. -5. Optionally use `csp find-related` with a promising result's `filePath` and `line` to discover related implementations. -6. Use grep only when you need exhaustive literal matches or quick confirmation of an exact string. diff --git a/src/agents/gemini.md b/src/agents/gemini.md deleted file mode 100644 index 9436d1a..0000000 --- a/src/agents/gemini.md +++ /dev/null @@ -1,58 +0,0 @@ ---- -name: csp-search -description: Code search agent for exploring any codebase. Use for finding code by intent, locating implementations, understanding how something works, or discovering related code. Prefer over run_shell_command/read_file for any semantic or exploratory question. -tools: - - run_shell_command - - read_file ---- - -Use `csp search` to find code by describing what it does or naming a symbol/identifier, instead of grep: - -```bash -csp search "authentication flow" ./my-project -csp search "save_pretrained" ./my-project -csp search "save model to disk" ./my-project --top-k 10 -``` - -If you anticipate doing more than one search, use `csp index` to create an index. - -```bash -csp index ./my-project -o my_index -``` - -You can then reuse this index later on: - -```bash -csp search "save_pretrained" --index my_index -``` - -An index is not automatically updated, so if the code changes significantly, reindex. If you notice stale results while resolving searches to files, reindex. - -Use `--content docs` to search documentation and prose, `--content config` for config files (yaml, toml, etc.), or `--content all` to search code, docs, and config: - -```bash -csp search "deployment guide" ./my-project --content docs -csp search "database host port" ./my-project --content config -csp search "authentication" ./my-project --content all -``` - -Use `csp find-related` to discover code similar to a known location (pass `filePath` and `line` from a prior search result): - -```bash -csp find-related src/auth.ts 42 ./my-project -``` - -Like search, `find-related` also accepts an `--index` argument. - -`path` defaults to the current directory when omitted; git URLs are accepted. - -If `csp` is not on `$PATH`, use `bunx @pleaseai/csp` in its place. - -### Workflow - -1. Index the repo using `csp index -o cached_index`. -2. Start with `csp search` to find relevant chunks. Pass the index to achieve results faster. -3. Use `--content docs` for documentation, `--content config` for config files, or `--content all` for everything. -4. Inspect full files only when the returned chunk does not give enough context. -5. Optionally use `csp find-related` with a promising result's `filePath` and `line` to discover related implementations. -6. Use grep only when you need exhaustive literal matches or quick confirmation of an exact string. diff --git a/src/agents/kiro.md b/src/agents/kiro.md deleted file mode 100644 index 01e0df1..0000000 --- a/src/agents/kiro.md +++ /dev/null @@ -1,58 +0,0 @@ ---- -name: csp-search -description: Code search agent for exploring any codebase. Use for finding code by intent, locating implementations, understanding how something works, or discovering related code. Prefer over shell/read tools for any semantic or exploratory question. -tools: - - shell - - read ---- - -Use `csp search` to find code by describing what it does or naming a symbol/identifier, instead of grep: - -```bash -csp search "authentication flow" ./my-project -csp search "save_pretrained" ./my-project -csp search "save model to disk" ./my-project --top-k 10 -``` - -If you anticipate doing more than one search, use `csp index` to create an index. - -```bash -csp index ./my-project -o my_index -``` - -You can then reuse this index later on: - -```bash -csp search "save_pretrained" --index my_index -``` - -An index is not automatically updated, so if the code changes significantly, reindex. If you notice stale results while resolving searches to files, reindex. - -Use `--content docs` to search documentation and prose, `--content config` for config files (yaml, toml, etc.), or `--content all` to search code, docs, and config: - -```bash -csp search "deployment guide" ./my-project --content docs -csp search "database host port" ./my-project --content config -csp search "authentication" ./my-project --content all -``` - -Use `csp find-related` to discover code similar to a known location (pass `filePath` and `line` from a prior search result): - -```bash -csp find-related src/auth.ts 42 ./my-project -``` - -Like search, `find-related` also accepts an `--index` argument. - -`path` defaults to the current directory when omitted; git URLs are accepted. - -If `csp` is not on `$PATH`, use `bunx @pleaseai/csp` in its place. - -### Workflow - -1. Index the repo using `csp index -o cached_index`. -2. Start with `csp search` to find relevant chunks. Pass the index to achieve results faster. -3. Use `--content docs` for documentation, `--content config` for config files, or `--content all` for everything. -4. Inspect full files only when the returned chunk does not give enough context. -5. Optionally use `csp find-related` with a promising result's `filePath` and `line` to discover related implementations. -6. Use grep only when you need exhaustive literal matches or quick confirmation of an exact string. diff --git a/src/agents/opencode.md b/src/agents/opencode.md deleted file mode 100644 index 8a5abc0..0000000 --- a/src/agents/opencode.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -name: csp-search -description: Code search agent for exploring any codebase. Use for finding code by intent, locating implementations, understanding how something works, or discovering related code. Prefer over Bash/Read for any semantic or exploratory question. -mode: subagent -permission: - bash: allow - read: allow ---- - -Use `csp search` to find code by describing what it does or naming a symbol/identifier, instead of grep: - -```bash -csp search "authentication flow" ./my-project -csp search "save_pretrained" ./my-project -csp search "save model to disk" ./my-project --top-k 10 -``` - -If you anticipate doing more than one search, use `csp index` to create an index. - -```bash -csp index ./my-project -o my_index -``` - -You can then reuse this index later on: - -```bash -csp search "save_pretrained" --index my_index -``` - -An index is not automatically updated, so if the code changes significantly, reindex. If you notice stale results while resolving searches to files, reindex. - -Use `--content docs` to search documentation and prose, `--content config` for config files (yaml, toml, etc.), or `--content all` to search code, docs, and config: - -```bash -csp search "deployment guide" ./my-project --content docs -csp search "database host port" ./my-project --content config -csp search "authentication" ./my-project --content all -``` - -Use `csp find-related` to discover code similar to a known location (pass `filePath` and `line` from a prior search result): - -```bash -csp find-related src/auth.ts 42 ./my-project -``` - -Like search, `find-related` also accepts an `--index` argument. - -`path` defaults to the current directory when omitted; git URLs are accepted. - -If `csp` is not on `$PATH`, use `bunx @pleaseai/csp` in its place. - -### Workflow - -1. Index the repo using `csp index -o cached_index`. -2. Start with `csp search` to find relevant chunks. Pass the index to achieve results faster. -3. Use `--content docs` for documentation, `--content config` for config files, or `--content all` for everything. -4. Inspect full files only when the returned chunk does not give enough context. -5. Optionally use `csp find-related` with a promising result's `filePath` and `line` to discover related implementations. -6. Use grep only when you need exhaustive literal matches or quick confirmation of an exact string. diff --git a/src/agents/pi.md b/src/agents/pi.md deleted file mode 100644 index 374f998..0000000 --- a/src/agents/pi.md +++ /dev/null @@ -1,55 +0,0 @@ ---- -name: csp-search -description: Code search agent for exploring any codebase. Use for finding code by intent, locating implementations, understanding how something works, or discovering related code. Prefer over Bash/Read for any semantic or exploratory question. ---- - -Use `csp search` to find code by describing what it does or naming a symbol/identifier, instead of grep: - -```bash -csp search "authentication flow" ./my-project -csp search "save_pretrained" ./my-project -csp search "save model to disk" ./my-project --top-k 10 -``` - -If you anticipate doing more than one search, use `csp index` to create an index. - -```bash -csp index ./my-project -o my_index -``` - -You can then reuse this index later on: - -```bash -csp search "save_pretrained" --index my_index -``` - -An index is not automatically updated, so if the code changes significantly, reindex. If you notice stale results while resolving searches to files, reindex. - -Use `--content docs` to search documentation and prose, `--content config` for config files (yaml, toml, etc.), or `--content all` to search code, docs, and config: - -```bash -csp search "deployment guide" ./my-project --content docs -csp search "database host port" ./my-project --content config -csp search "authentication" ./my-project --content all -``` - -Use `csp find-related` to discover code similar to a known location (pass `filePath` and `line` from a prior search result): - -```bash -csp find-related src/auth.ts 42 ./my-project -``` - -Like search, `find-related` also accepts an `--index` argument. - -`path` defaults to the current directory when omitted; git URLs are accepted. - -If `csp` is not on `$PATH`, use `bunx @pleaseai/csp` in its place. - -### Workflow - -1. Index the repo using `csp index -o cached_index`. -2. Start with `csp search` to find relevant chunks. Pass the index to achieve results faster. -3. Use `--content docs` for documentation, `--content config` for config files, or `--content all` for everything. -4. Inspect full files only when the returned chunk does not give enough context. -5. Optionally use `csp find-related` with a promising result's `filePath` and `line` to discover related implementations. -6. Use grep only when you need exhaustive literal matches or quick confirmation of an exact string. \ No newline at end of file diff --git a/src/agents/reasonix.md b/src/agents/reasonix.md deleted file mode 100644 index 9353344..0000000 --- a/src/agents/reasonix.md +++ /dev/null @@ -1,57 +0,0 @@ ---- -name: csp-search -description: Code search agent for exploring any codebase. Use for finding code by intent, locating implementations, understanding how something works, or discovering related code. Prefer over bash/read_file for any semantic or exploratory question. -runAs: subagent -allowed-tools: bash, read_file ---- - -Use `csp search` to find code by describing what it does or naming a symbol/identifier, instead of grep: - -```bash -csp search "authentication flow" ./my-project -csp search "save_pretrained" ./my-project -csp search "save model to disk" ./my-project --top-k 10 -``` - -If you anticipate doing more than one search, use `csp index` to create an index. - -```bash -csp index ./my-project -o my_index -``` - -You can then reuse this index later on: - -```bash -csp search "save_pretrained" --index my_index -``` - -An index is not automatically updated, so if the code changes significantly, reindex. If you notice stale results while resolving searches to files, reindex. - -Use `--content docs` to search documentation and prose, `--content config` for config files (yaml, toml, etc.), or `--content all` to search code, docs, and config: - -```bash -csp search "deployment guide" ./my-project --content docs -csp search "database host port" ./my-project --content config -csp search "authentication" ./my-project --content all -``` - -Use `csp find-related` to discover code similar to a known location (pass `filePath` and `line` from a prior search result): - -```bash -csp find-related src/auth.ts 42 ./my-project -``` - -Like search, `find-related` also accepts an `--index` argument. - -`path` defaults to the current directory when omitted; git URLs are accepted. - -If `csp` is not on `$PATH`, use `bunx @pleaseai/csp` in its place. - -### Workflow - -1. Index the repo using `csp index -o cached_index`. -2. Start with `csp search` to find relevant chunks. Pass the index to achieve results faster. -3. Use `--content docs` for documentation, `--content config` for config files, or `--content all` for everything. -4. Inspect full files only when the returned chunk does not give enough context. -5. Optionally use `csp find-related` with a promising result's `filePath` and `line` to discover related implementations. -6. Use grep only when you need exhaustive literal matches or quick confirmation of an exact string. \ No newline at end of file diff --git a/src/chunking/.gitkeep b/src/chunking/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/src/chunking/chunk-source.test.ts b/src/chunking/chunk-source.test.ts deleted file mode 100644 index efa6ca6..0000000 --- a/src/chunking/chunk-source.test.ts +++ /dev/null @@ -1,85 +0,0 @@ -import { describe, expect, it } from 'bun:test' - -import { chunkSource, DESIRED_CHUNK_LENGTH_CHARS } from './chunk-source.ts' - -describe('chunkSource', () => { - it('returns [] for empty source', async () => { - expect(await chunkSource('', 'foo.txt', null)).toEqual([]) - }) - - it('returns [] for whitespace-only source', async () => { - expect(await chunkSource(' \n\t\n ', 'foo.txt', null)).toEqual([]) - }) - - it('produces a single chunk for short plain text (no language)', async () => { - const src = 'hello\nworld\n' - const chunks = await chunkSource(src, 'foo.txt', null) - expect(chunks).toHaveLength(1) - expect(chunks[0]).toMatchObject({ - filePath: 'foo.txt', - language: null, - startLine: 1, - endLine: 2, - }) - // Content should reproduce the source (minus possibly the very last byte - // depending on the end-clamp logic). - expect(chunks[0]!.content.startsWith('hello\nworld')).toBe(true) - }) - - it('chunks ≤ DESIRED_CHUNK_LENGTH_CHARS for long source (line fallback)', async () => { - // ~3000 chars, well above the 1500-char target. - const line = `${'x'.repeat(49)}\n` // 50 chars per line - const src = line.repeat(60) // 3000 chars - expect(src.length).toBe(3000) - - const chunks = await chunkSource(src, 'big.txt', null) - expect(chunks.length).toBeGreaterThanOrEqual(2) - - for (const c of chunks) { - expect(c.content.length).toBeLessThanOrEqual(DESIRED_CHUNK_LENGTH_CHARS) - } - }) - - it('emits 1-indexed start/end line numbers', async () => { - const src = 'line1\nline2\nline3\nline4\n' - const chunks = await chunkSource(src, 'foo.txt', null) - expect(chunks).toHaveLength(1) - expect(chunks[0]!.startLine).toBe(1) - // Last line of content is "line4\n" — start of line 4, end is also line 4. - expect(chunks[0]!.endLine).toBe(4) - }) - - it('falls back to line chunker for an unsupported language', async () => { - const src = 'a\nb\nc\n' - const chunks = await chunkSource(src, 'foo.xyz', 'not-a-real-language') - expect(chunks).toHaveLength(1) - expect(chunks[0]!.startLine).toBe(1) - expect(chunks[0]!.language).toBe('not-a-real-language') - }) - - it('preserves filePath on every chunk', async () => { - const src = `${'a'.repeat(100)}\n`.repeat(50) - const chunks = await chunkSource(src, 'path/to/file.txt', null) - expect(chunks.length).toBeGreaterThan(0) - for (const c of chunks) { - expect(c.filePath).toBe('path/to/file.txt') - } - }) - - it('start/end lines align with source content across multi-chunk output', async () => { - // 100 lines × 40 chars = 4000 chars — comfortably above 1500. - const lines = Array.from({ length: 100 }, (_, i) => `${i.toString().padStart(3, '0')} ${'x'.repeat(35)}`) - const src = `${lines.join('\n')}\n` - const chunks = await chunkSource(src, 'foo.txt', null) - expect(chunks.length).toBeGreaterThanOrEqual(2) - - // First chunk must start at line 1; chunks are sorted; line ranges should - // be contiguous (next chunk starts on or right after the previous end). - expect(chunks[0]!.startLine).toBe(1) - for (let i = 1; i < chunks.length; i++) { - const prev = chunks[i - 1]! - const cur = chunks[i]! - expect(cur.startLine).toBeGreaterThanOrEqual(prev.endLine) - } - }) -}) diff --git a/src/chunking/chunk-source.ts b/src/chunking/chunk-source.ts deleted file mode 100644 index b5f89e8..0000000 --- a/src/chunking/chunk-source.ts +++ /dev/null @@ -1,78 +0,0 @@ -// Port of src/semble/chunking/chunking.py -// -// Public entry point that takes raw source text and a language hint and -// returns concrete `Chunk` values with line numbers. Uses the AST chunker -// when the language is supported, line fallback otherwise. - -import type { ChunkBoundary } from './core.ts' -import { chunk, chunkLines, isSupportedLanguage } from './core.ts' - -// Inline Chunk type until Unit 1 (types) lands. -// Once `src/types.ts` exists, replace this with: -// import type { Chunk } from '../types.ts' -export interface Chunk { - content: string - filePath: string - startLine: number - endLine: number - language: string | null -} - -/** The desired length of chunks in chars. */ -export const DESIRED_CHUNK_LENGTH_CHARS = 1500 - -/** Chunk pre-read source text. */ -export async function chunkSource( - source: string, - filePath: string, - language: string | null, -): Promise { - if (source.trim().length === 0) { - return [] - } - - let chunkBoundaries: ChunkBoundary[] | null = null - if (language !== null && isSupportedLanguage(language)) { - chunkBoundaries = await chunk(source, language, DESIRED_CHUNK_LENGTH_CHARS) - } - - // This is an `if` (not `else`) because the error state of the parser - // above is `null` — fall through and use the line chunker. - if (chunkBoundaries === null) { - chunkBoundaries = chunkLines(source, DESIRED_CHUNK_LENGTH_CHARS) - } - - // Resolve 1-indexed line numbers in a single pass. Boundaries are sorted by - // their start offset, so we can advance a cursor through `source` once - // instead of rescanning from index 0 per chunk (avoids O(N²) on large files). - // Matches semble parity: only `\n` counts as a newline (see chunking.py). - const chunks: Chunk[] = [] - let cursor = 0 - let line = 1 - const advanceTo = (target: number): number => { - const limit = Math.min(target, source.length) - while (cursor < limit) { - if (source[cursor] === '\n') { - line += 1 - } - cursor += 1 - } - return line - } - - for (const boundary of chunkBoundaries) { - // Clamp to start_index so zero-length chunks don't produce an off-by-one. - const endIndex = Math.max(boundary.end - 1, boundary.start) - const text = source.slice(boundary.start, endIndex + 1) - const startLine = advanceTo(boundary.start) - const endLine = advanceTo(endIndex) - chunks.push({ - content: text, - filePath, - startLine, - endLine, - language, - }) - } - return chunks -} diff --git a/src/chunking/core.test.ts b/src/chunking/core.test.ts deleted file mode 100644 index 9900fd3..0000000 --- a/src/chunking/core.test.ts +++ /dev/null @@ -1,226 +0,0 @@ -import type { ChunkBoundary } from './core.ts' - -import { describe, expect, it } from 'bun:test' -import { - _mergeAdjacentChunks, - _mergeNode, - _mergeNodeInner, - chunk, - chunkLines, - isSupportedLanguage, - MIN_CHUNK_SIZE, - RECURSION_DEPTH, -} from './core.ts' - -describe('constants', () => { - it('matches semble defaults', () => { - expect(RECURSION_DEPTH).toBe(500) - expect(MIN_CHUNK_SIZE).toBe(50) - }) -}) - -describe('isSupportedLanguage', () => { - it('returns true for known languages and false for unknown ones', () => { - expect(isSupportedLanguage('typescript')).toBe(true) - expect(isSupportedLanguage('python')).toBe(true) - expect(isSupportedLanguage('not-a-real-language')).toBe(false) - }) -}) - -describe('_mergeAdjacentChunks', () => { - it('returns [] for empty input', () => { - expect(_mergeAdjacentChunks([], 100)).toEqual([]) - }) - - it('passes through a single chunk', () => { - expect(_mergeAdjacentChunks([{ start: 0, end: 50 }], 100)).toEqual([ - { start: 0, end: 50 }, - ]) - }) - - it('merges adjacent chunks under the desired length', () => { - const input: ChunkBoundary[] = [ - { start: 0, end: 30 }, - { start: 30, end: 60 }, - { start: 60, end: 80 }, - ] - expect(_mergeAdjacentChunks(input, 100)).toEqual([{ start: 0, end: 80 }]) - }) - - it('keeps chunks separate when the merged length exceeds desired', () => { - const input: ChunkBoundary[] = [ - { start: 0, end: 60 }, - { start: 60, end: 130 }, - ] - expect(_mergeAdjacentChunks(input, 100)).toEqual([ - { start: 0, end: 60 }, - { start: 60, end: 130 }, - ]) - }) - - it('greedily packs groups up to desired length', () => { - const input: ChunkBoundary[] = [ - { start: 0, end: 40 }, - { start: 40, end: 80 }, - { start: 80, end: 130 }, - { start: 130, end: 160 }, - ] - // 40 + 40 = 80 (fits), +50 = 130 (exceeds 100) → split, 50+30=80 (fits). - expect(_mergeAdjacentChunks(input, 100)).toEqual([ - { start: 0, end: 80 }, - { start: 80, end: 160 }, - ]) - }) -}) - -describe('chunkLines', () => { - it('returns [] for empty source', () => { - expect(chunkLines('', 100)).toEqual([]) - }) - - it('returns [] for whitespace-only source', () => { - expect(chunkLines(' \n\n\t \n', 100)).toEqual([]) - }) - - it('emits one chunk for short input', () => { - const src = 'hello\nworld\n' - const chunks = chunkLines(src, 1500) - expect(chunks).toHaveLength(1) - expect(chunks[0]).toEqual({ start: 0, end: src.length }) - }) - - it('splits a long source into multiple chunks (each ≤ desired length)', () => { - // 100 lines × ~40 chars = ~4000 chars total. - const line = `${'x'.repeat(39)}\n` - const src = line.repeat(100) - expect(src.length).toBe(4000) - - const desired = 1500 - const chunks = chunkLines(src, desired) - expect(chunks.length).toBeGreaterThanOrEqual(2) - - // Each merged chunk should be ≤ desired length, except possibly the - // tail when a single line exceeds desired (not the case here). - for (const c of chunks) { - const len = c.end - c.start - expect(len).toBeLessThanOrEqual(desired) - } - }) - - it('chunks contiguously cover the input', () => { - const src = Array.from({ length: 200 }, (_, i) => `line ${i}\n`).join('') - const chunks = chunkLines(src, 500) - expect(chunks[0]!.start).toBe(0) - expect(chunks[chunks.length - 1]!.end).toBe(src.length) - for (let i = 1; i < chunks.length; i++) { - expect(chunks[i]!.start).toBe(chunks[i - 1]!.end) - } - }) - - it('preserves CRLF line endings in offsets', () => { - const src = 'a\r\nb\r\nc\r\n' - const chunks = chunkLines(src, 1500) - expect(chunks).toHaveLength(1) - expect(chunks[0]).toEqual({ start: 0, end: src.length }) - }) -}) - -describe('_mergeNode + _mergeNodeInner', () => { - // Build a fake tree-sitter node tree for unit testing the algorithm. - interface FakeNode { - startByte: () => number - endByte: () => number - childCount: () => number - child: (i: number) => FakeNode | null - } - - function leaf(start: number, end: number): FakeNode { - return { - startByte: () => start, - endByte: () => end, - childCount: () => 0, - child: () => null, - } - } - - function branch(start: number, end: number, children: FakeNode[]): FakeNode { - return { - startByte: () => start, - endByte: () => end, - childCount: () => children.length, - child: (i: number) => children[i] ?? null, - } - } - - it('returns a single boundary for a leaf', () => { - const out = _mergeNodeInner(leaf(10, 60), 100, 0) - expect(out).toEqual([{ start: 10, end: 60 }]) - }) - - it('does not recurse into nodes shorter than MIN_CHUNK_SIZE', () => { - // length = 40, MIN_CHUNK_SIZE = 50 — must be treated as a leaf. - const root = branch(0, 40, [leaf(0, 20), leaf(20, 40)]) - expect(_mergeNodeInner(root, 100, 0)).toEqual([{ start: 0, end: 40 }]) - }) - - it('caps recursion depth at RECURSION_DEPTH', () => { - const root = branch(0, 200, [leaf(0, 100), leaf(100, 200)]) - const out = _mergeNodeInner(root, 50, RECURSION_DEPTH + 1) - expect(out).toEqual([{ start: 0, end: 200 }]) - }) - - it('groups children up to the desired length', () => { - const root = branch(0, 300, [ - leaf(0, 40), - leaf(40, 80), - leaf(80, 200), - leaf(200, 300), - ]) - // 40+40=80 (fits in 100), +120 would exceed → close group. - // Then 120 > 100 → recurse (but 120-child has no children) → leaf. - // Then 100 (fits alone). - const inner = _mergeNodeInner(root, 100, 0) - expect(inner).toEqual([ - { start: 0, end: 80 }, - { start: 80, end: 200 }, - { start: 200, end: 300 }, - ]) - }) - - it('_mergeNode merges adjacent groups returned by inner', () => { - // Three small children that each end up alone in inner because they're leaves. - const root = branch(0, 150, [leaf(0, 30), leaf(30, 60), leaf(60, 150)]) - // inner returns [(0,30), (30,60), (60,150)] when desired=100: - // - 30+30=60 fits, +90=150 exceeds → group (0,60), then (60,150). - // Wait — inner has different logic. Let's verify the actual semble behavior: - // inner: index=0, child=(0,30) start=0 end=30 len=30, len<=100 not >desired - // inner loop: child[1]=(30,60) childLen=30, 30+30=60<=100, end=60 len=60 idx=2 - // child[2]=(60,150) childLen=90, 60+90=150>100 → break - // push (0,60) - // index=2, child=(60,150) start=60 end=150 len=90, len<=100 → push (60,150) - // → [(0,60),(60,150)] - // Then _mergeAdjacentChunks with desired=100: - // (0,60) curLen=60, (60,150) len=90, 60+90=150>100 → keep separate. - // → same. - expect(_mergeNode(root, 100)).toEqual([ - { start: 0, end: 60 }, - { start: 60, end: 150 }, - ]) - }) -}) - -describe('chunk (tree-sitter)', () => { - it('returns [] for whitespace-only input regardless of language', async () => { - expect(await chunk(' \n\t\n', 'typescript', 1500)).toEqual([]) - expect(await chunk('', 'python', 1500)).toEqual([]) - }) - - // Real tree-sitter parsing is best-effort — depends on Worker 0 installing - // @kreuzberg/tree-sitter-language-pack. When the parser is unavailable the - // function returns null and callers fall back to chunkLines. - it('returns null when no parser is available (line-fallback contract)', async () => { - // A bogus language guarantees the parser load fails. - const result = await chunk('let x = 1\n', '__definitely_not_a_real_language__', 1500) - expect(result).toBeNull() - }) -}) diff --git a/src/chunking/core.ts b/src/chunking/core.ts deleted file mode 100644 index 3c72114..0000000 --- a/src/chunking/core.ts +++ /dev/null @@ -1,311 +0,0 @@ -// Port of src/semble/chunking/core.py -// -// AST-based chunker built on top of tree-sitter with a line-based fallback. -// -// Tree-sitter integration uses `@kreuzberg/tree-sitter-language-pack`, a NAPI -// binding that exposes the raw `Parser`/`Tree`/`Node` API (see its `index.d.ts`). -// The dependency itself is owned by Unit 0 — we import lazily so this module -// loads even when the package is not yet installed, falling back to the -// line chunker in that case. - -import { ALL_LANGUAGES } from '../languages.ts' - -export const RECURSION_DEPTH = 500 -export const MIN_CHUNK_SIZE = 50 - -export interface ChunkBoundary { - start: number - end: number -} - -/** Minimal structural shape of a tree-sitter Node we depend on. */ -interface TreeSitterNode { - startByte: () => number - endByte: () => number - childCount: () => number - child: (index: number) => TreeSitterNode | null -} - -interface TreeSitterParser { - parse: (source: string) => { rootNode: () => TreeSitterNode } | null -} - -/** Cache of language → parser (or null when load fails). */ -const _parserCache = new Map() - -/** - * Lazily load `@kreuzberg/tree-sitter-language-pack`'s `getParser`. - * Returns null if the dependency is unavailable. - */ -async function _loadGetParser(): Promise<((name: string) => TreeSitterParser) | null> { - try { - // eslint-disable-next-line ts/ban-ts-comment - // @ts-ignore -- optional dep owned by Unit 0 - const mod = await import('@kreuzberg/tree-sitter-language-pack') - const getParser = (mod as { getParser?: (name: string) => TreeSitterParser }).getParser - return typeof getParser === 'function' ? getParser : null - } - catch { - return null - } -} - -let _getParserPromise: Promise<((name: string) => TreeSitterParser) | null> | null = null - -async function _cachedGetParser(language: string): Promise { - if (_parserCache.has(language)) { - return _parserCache.get(language) ?? null - } - _getParserPromise ??= _loadGetParser() - const getParser = await _getParserPromise - if (getParser === null) { - _parserCache.set(language, null) - return null - } - try { - const parser = getParser(language) - _parserCache.set(language, parser) - return parser - } - catch { - _parserCache.set(language, null) - return null - } -} - -/** Visible for tests. */ -export function _resetParserCacheForTests(): void { - _parserCache.clear() - _getParserPromise = null -} - -/** Check if the language is supported by tree-sitter (matches ALL_LANGUAGES). */ -export function isSupportedLanguage(language: string): boolean { - return ALL_LANGUAGES.has(language) -} - -/** Merge adjacent chunks up to the desired length. */ -export function _mergeAdjacentChunks( - chunks: readonly ChunkBoundary[], - desiredLength: number, -): ChunkBoundary[] { - if (chunks.length === 0) { - return [] - } - - const merged: ChunkBoundary[] = [] - - const first = chunks[0]! - let currentStart = first.start - let currentEnd = first.end - let currentLength = currentEnd - currentStart - - for (let i = 1; i < chunks.length; i++) { - const group = chunks[i]! - const { start, end } = group - const length = end - start - - if (currentLength + length > desiredLength) { - merged.push({ start: currentStart, end: currentEnd }) - currentStart = start - currentEnd = end - currentLength = length - continue - } - - currentEnd = end - currentLength += length - } - - merged.push({ start: currentStart, end: currentEnd }) - - return merged -} - -function _children(node: TreeSitterNode): TreeSitterNode[] { - const count = node.childCount() - const out: TreeSitterNode[] = [] - for (let i = 0; i < count; i++) { - const c = node.child(i) - if (c !== null) { - out.push(c) - } - } - return out -} - -/** Recursively merge and split nodes. */ -export function _mergeNodeInner( - node: TreeSitterNode, - desiredLength: number, - depth: number, -): ChunkBoundary[] { - const children = _children(node) - - // If there are no child nodes, the only thing we can do is return the current node. - if (children.length === 0) { - return [{ start: node.startByte(), end: node.endByte() }] - } - - const length = node.endByte() - node.startByte() - - // Prevent recursion issues. A depth of > 500 is unlikely. - if (depth > RECURSION_DEPTH) { - return [{ start: node.startByte(), end: node.endByte() }] - } - - // Prevent recursing into short chunks. - if (length < MIN_CHUNK_SIZE) { - return [{ start: node.startByte(), end: node.endByte() }] - } - - const groups: ChunkBoundary[] = [] - let index = 0 - - while (index < children.length) { - let child = children[index]! - const start = child.startByte() - let end = child.endByte() - let runLength = end - start - - // Increment the pointer, as we accessed a child node. - index += 1 - - // If this single chunk is longer than the desired length, try to split it again. - if (runLength > desiredLength) { - groups.push(..._mergeNodeInner(child, desiredLength, depth + 1)) - continue - } - - while (index < children.length) { - // Extend the current group with one or more children, if they fit. - child = children[index]! - const childLength = child.endByte() - child.startByte() - - if (runLength + childLength > desiredLength) { - break - } - - end = child.endByte() - runLength += childLength - index += 1 - } - - groups.push({ start, end }) - } - - return groups -} - -/** Recursively turn nodes into chunks, then merge adjacent chunks. */ -export function _mergeNode(node: TreeSitterNode, desiredLength: number): ChunkBoundary[] { - const rawChunks = _mergeNodeInner(node, desiredLength, 0) - return _mergeAdjacentChunks(rawChunks, desiredLength) -} - -/** - * Split `text` into lines preserving the trailing newline on each line — - * equivalent to Python's `str.splitlines(keepends=True)`. - */ -function _splitLinesKeepEnds(text: string): string[] { - if (text.length === 0) { - return [] - } - - const lines: string[] = [] - let start = 0 - for (let i = 0; i < text.length; i++) { - const ch = text[i] - if (ch === '\n') { - lines.push(text.slice(start, i + 1)) - start = i + 1 - } - else if (ch === '\r') { - // Handle \r\n and bare \r as line separators (matches Python's splitlines). - const next = text[i + 1] - if (next === '\n') { - lines.push(text.slice(start, i + 2)) - i += 1 - start = i + 1 - } - else { - lines.push(text.slice(start, i + 1)) - start = i + 1 - } - } - } - if (start < text.length) { - lines.push(text.slice(start)) - } - - return lines -} - -/** Chunk source code by line. */ -export function chunkLines(text: string, desiredLength: number): ChunkBoundary[] { - if (text.trim().length === 0) { - return [] - } - - const linesAsGroups: ChunkBoundary[] = [] - let index = 0 - for (const line of _splitLinesKeepEnds(text)) { - linesAsGroups.push({ start: index, end: index + line.length }) - index += line.length - } - - return _mergeAdjacentChunks(linesAsGroups, desiredLength) -} - -/** - * Chunk source code via tree-sitter. Returns null when no parser is - * available for `language` (caller falls back to `chunkLines`). - * - * Async because parser loading is lazy — see `_loadGetParser`. - */ -export async function chunk( - text: string, - language: string, - desiredLength: number, -): Promise { - if (text.trim().length === 0) { - return [] - } - - const parser = await _cachedGetParser(language) - if (parser === null) { - return null - } - - const tree = parser.parse(text) - if (tree === null) { - return null - } - const root = tree.rootNode() - - const asBytes = new TextEncoder().encode(text) - const decoder = new TextDecoder('utf-8') - - // Convert byte offsets to character offsets in a single pass. Boundaries are - // sorted by their start offset, so we maintain running byte/char cursors and - // decode each byte exactly once — avoids O(M×N) re-decoding the prefix per - // chunk. - const chunks: ChunkBoundary[] = [] - let cursorByte = 0 - let cursorChar = 0 - const byteToChar = (byteOffset: number): number => { - if (byteOffset > cursorByte) { - cursorChar += decoder.decode(asBytes.subarray(cursorByte, byteOffset)).length - cursorByte = byteOffset - } - return cursorChar - } - - for (const boundary of _mergeNode(root, desiredLength)) { - const startChar = byteToChar(boundary.start) - const endChar = byteToChar(boundary.end) - chunks.push({ start: startChar, end: endChar }) - } - - return chunks -} diff --git a/src/cli.test.ts b/src/cli.test.ts deleted file mode 100644 index 062b07a..0000000 --- a/src/cli.test.ts +++ /dev/null @@ -1,939 +0,0 @@ -import type { CspIndex } from './indexing/index.ts' -import type { SearchResult } from './types.ts' -import { Buffer } from 'node:buffer' -import { existsSync } from 'node:fs' -import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises' -import { tmpdir } from 'node:os' -import { join } from 'node:path' - -import process from 'node:process' -// Port of (none) — unit tests for src/cli.ts -import { afterEach, beforeEach, describe, expect, test } from 'bun:test' -import { _agentPath, _readAgentFile, _resolveContent, _runInit, Agent, parseArgs, runCli } from './cli.ts' -import { ContentType } from './types.ts' - -describe('Agent enum', () => { - test('enum values', () => { - expect(String(Agent.Antigravity)).toBe('antigravity') - expect(String(Agent.Claude)).toBe('claude') - expect(String(Agent.Commandcode)).toBe('commandcode') - expect(String(Agent.Copilot)).toBe('copilot') - expect(String(Agent.Cursor)).toBe('cursor') - expect(String(Agent.Gemini)).toBe('gemini') - expect(String(Agent.Kiro)).toBe('kiro') - expect(String(Agent.Opencode)).toBe('opencode') - expect(String(Agent.Pi)).toBe('pi') - expect(String(Agent.Reasonix)).toBe('reasonix') - }) -}) - -describe('_agentPath', () => { - test('claude → .claude/agents/csp-search.md', () => { - expect(_agentPath(Agent.Claude)).toBe('.claude/agents/csp-search.md') - }) - test('copilot → .github/agents/csp-search.md', () => { - expect(_agentPath(Agent.Copilot)).toBe('.github/agents/csp-search.md') - }) - test('cursor → .cursor/agents/csp-search.md', () => { - expect(_agentPath(Agent.Cursor)).toBe('.cursor/agents/csp-search.md') - }) - test('opencode → .opencode/agents/csp-search.md', () => { - expect(_agentPath(Agent.Opencode)).toBe('.opencode/agents/csp-search.md') - }) - test('antigravity → .antigravity/agents/csp-search.md', () => { - expect(_agentPath(Agent.Antigravity)).toBe('.antigravity/agents/csp-search.md') - }) - test('reasonix → .reasonix/agents/csp-search.md', () => { - expect(_agentPath(Agent.Reasonix)).toBe('.reasonix/agents/csp-search.md') - }) -}) - -describe('parseArgs', () => { - test('subcommand and positional', () => { - const r = parseArgs(['search', 'foo', '.']) - expect(r.command).toBe('search') - expect(r.positional).toEqual(['foo', '.']) - }) - test('--flag value', () => { - const r = parseArgs(['index', '.', '--out', 'idx']) - expect(r.flags.out).toBe('idx') - }) - test('--flag=value', () => { - const r = parseArgs(['search', 'q', '--top-k=10']) - expect(r.flags['top-k']).toBe('10') - }) - test('boolean flag', () => { - const r = parseArgs(['savings', '--verbose']) - expect(r.flags.verbose).toBe(true) - }) - test('multi-value --content', () => { - const r = parseArgs(['search', 'q', '--content', 'code', 'docs']) - expect(r.flags.content).toEqual(['code', 'docs']) - }) - test('short -k', () => { - const r = parseArgs(['search', 'q', '-k', '20']) - expect(r.flags.k).toBe('20') - }) -}) - -describe('_resolveContent', () => { - test('default code', () => { - expect(_resolveContent(['code'], false)).toEqual([ContentType.CODE]) - }) - test('all expands', () => { - expect(_resolveContent(['all'], false)).toEqual([ContentType.CODE, ContentType.DOCS, ContentType.CONFIG]) - }) - test('--include-text-files expands like all', () => { - expect(_resolveContent(['code'], true)).toEqual([ContentType.CODE, ContentType.DOCS, ContentType.CONFIG]) - }) - test('multiple types', () => { - expect(_resolveContent(['code', 'docs'], false)).toEqual([ContentType.CODE, ContentType.DOCS]) - }) - test('unknown throws', () => { - expect(() => _resolveContent(['bogus'], false)).toThrow() - }) -}) - -describe('runCli --help', () => { - test('help mentions all subcommands', async () => { - const writes: string[] = [] - const origWrite = process.stdout.write.bind(process.stdout) - process.stdout.write = (chunk: string | Uint8Array) => { - writes.push(typeof chunk === 'string' ? chunk : Buffer.from(chunk).toString('utf8')) - return true - } - try { - const code = await runCli(['--help']) - expect(code).toBe(0) - } - finally { - process.stdout.write = origWrite - } - const out = writes.join('') - expect(out).toContain('search') - expect(out).toContain('index') - expect(out).toContain('find-related') - expect(out).toContain('init') - expect(out).toContain('savings') - expect(out).toContain('mcp') - }) -}) - -describe('csp init', () => { - let tmpDir: string - let origCwd: string - - beforeEach(async () => { - tmpDir = await mkdtemp(join(tmpdir(), 'csp-cli-test-')) - origCwd = process.cwd() - process.chdir(tmpDir) - }) - - afterEach(async () => { - process.chdir(origCwd) - await rm(tmpDir, { recursive: true, force: true }) - }) - - test('--agent claude writes .claude/agents/csp-search.md', async () => { - await _runInit({ - agent: Agent.Claude, - cwd: tmpDir, - readAgentFile: async () => '# stub agent file\n', - }) - const path = join(tmpDir, '.claude/agents/csp-search.md') - expect(existsSync(path)).toBe(true) - const content = await readFile(path, 'utf8') - expect(content).toBe('# stub agent file\n') - }) - - test('--agent copilot writes .github/agents/csp-search.md', async () => { - await _runInit({ - agent: Agent.Copilot, - cwd: tmpDir, - readAgentFile: async () => '# stub copilot\n', - }) - const path = join(tmpDir, '.github/agents/csp-search.md') - expect(existsSync(path)).toBe(true) - }) - - test('without --force errors if file exists', async () => { - await _runInit({ - agent: Agent.Claude, - cwd: tmpDir, - readAgentFile: async () => 'first\n', - }) - // Second call should reject with an "already exists" error — callers - // (i.e. runCli) translate this into exit code 1 + stderr message. - await expect(_runInit({ - agent: Agent.Claude, - cwd: tmpDir, - readAgentFile: async () => 'second\n', - })).rejects.toThrow('already exists') - // Original content preserved. - const content = await readFile(join(tmpDir, '.claude/agents/csp-search.md'), 'utf8') - expect(content).toBe('first\n') - }) - - test('--force overwrites', async () => { - await _runInit({ - agent: Agent.Claude, - cwd: tmpDir, - readAgentFile: async () => 'first\n', - }) - await _runInit({ - agent: Agent.Claude, - force: true, - cwd: tmpDir, - readAgentFile: async () => 'second\n', - }) - const content = await readFile(join(tmpDir, '.claude/agents/csp-search.md'), 'utf8') - expect(content).toBe('second\n') - }) -}) - -describe('csp search (stub-mocked)', () => { - test('calls index.search with topK', async () => { - let captured: { query?: string, topK?: number } = {} - const fakeIndex: Partial = { - chunks: [], - search: (query: string, opts?: { topK?: number }): SearchResult[] => { - captured = { query, ...(opts ?? {}) } - return [] - }, - } - const writes: string[] = [] - const origWrite = process.stdout.write.bind(process.stdout) - process.stdout.write = (chunk: string | Uint8Array) => { - writes.push(typeof chunk === 'string' ? chunk : Buffer.from(chunk).toString('utf8')) - return true - } - try { - const code = await runCli(['search', 'foo', '.', '-k', '7'], { - loadOrBuild: async () => fakeIndex as CspIndex, - }) - expect(code).toBe(0) - } - finally { - process.stdout.write = origWrite - } - expect(captured).toEqual({ query: 'foo', topK: 7 }) - // Output should be JSON {"error":"No results found."} - const out = writes.join('').trim() - expect(JSON.parse(out)).toEqual({ error: 'No results found.' }) - }) - - test('formats non-empty results as JSON', async () => { - const fakeIndex: Partial = { - chunks: [], - search: (): SearchResult[] => [ - { - chunk: { content: 'def foo()', filePath: 'a.py', startLine: 1, endLine: 3, language: 'python' }, - score: 0.9, - // Mirrors search.ts's snake_case wire format that utils.formatResults consumes. - toDict: () => ({ - chunk: { - content: 'def foo()', - file_path: 'a.py', - start_line: 1, - end_line: 3, - language: 'python', - location: 'a.py:1-3', - }, - score: 0.9, - }), - }, - ], - } - const writes: string[] = [] - const origWrite = process.stdout.write.bind(process.stdout) - process.stdout.write = (chunk: string | Uint8Array) => { - writes.push(typeof chunk === 'string' ? chunk : Buffer.from(chunk).toString('utf8')) - return true - } - try { - await runCli(['search', 'foo', '.'], { - loadOrBuild: async () => fakeIndex as CspIndex, - }) - } - finally { - process.stdout.write = origWrite - } - const out = JSON.parse(writes.join('').trim()) as { - query: string - results: { chunk: { file_path: string, location: string } }[] - } - expect(out.query).toBe('foo') - expect(out.results).toHaveLength(1) - expect(out.results[0]!.chunk.file_path).toBe('a.py') - expect(out.results[0]!.chunk.location).toBe('a.py:1-3') - }) -}) - -describe('csp savings', () => { - test('prints the report', async () => { - const writes: string[] = [] - const origWrite = process.stdout.write.bind(process.stdout) - process.stdout.write = (chunk: string | Uint8Array) => { - writes.push(typeof chunk === 'string' ? chunk : Buffer.from(chunk).toString('utf8')) - return true - } - try { - const code = await runCli(['savings'], { - formatSavings: ({ verbose }) => `SAVINGS verbose=${verbose ? '1' : '0'}`, - }) - expect(code).toBe(0) - } - finally { - process.stdout.write = origWrite - } - expect(writes.join('')).toBe('SAVINGS verbose=0') - }) - - test('--verbose is forwarded', async () => { - const writes: string[] = [] - const origWrite = process.stdout.write.bind(process.stdout) - process.stdout.write = (chunk: string | Uint8Array) => { - writes.push(typeof chunk === 'string' ? chunk : Buffer.from(chunk).toString('utf8')) - return true - } - try { - await runCli(['savings', '--verbose'], { - formatSavings: ({ verbose }) => `SAVINGS verbose=${verbose ? '1' : '0'}`, - }) - } - finally { - process.stdout.write = origWrite - } - expect(writes.join('')).toBe('SAVINGS verbose=1') - }) -}) - -describe('csp clear', () => { - function captureStdout(): { writes: string[], restore: () => void } { - const writes: string[] = [] - const origWrite = process.stdout.write.bind(process.stdout) - process.stdout.write = (chunk: string | Uint8Array) => { - writes.push(typeof chunk === 'string' ? chunk : Buffer.from(chunk).toString('utf8')) - return true - } - return { writes, restore: () => { - process.stdout.write = origWrite - } } - } - - test('clear savings deletes the file and reports the path', async () => { - const { writes, restore } = captureStdout() - let called = 0 - try { - const code = await runCli(['clear', 'savings'], { - clearSavings: () => { - called++ - return { path: '/tmp/x/savings.jsonl', cleared: true } - }, - }) - expect(code).toBe(0) - } - finally { - restore() - } - expect(called).toBe(1) - expect(writes.join('')).toContain('Cleared savings at `/tmp/x/savings.jsonl`') - }) - - test('clear savings reports when no file exists', async () => { - const { writes, restore } = captureStdout() - try { - await runCli(['clear', 'savings'], { - clearSavings: () => ({ path: '/tmp/x/savings.jsonl', cleared: false }), - }) - } - finally { - restore() - } - expect(writes.join('')).toContain('No savings file found at `/tmp/x/savings.jsonl`') - }) - - test('clear index deletes the index cache and leaves savings untouched', async () => { - const { writes, restore } = captureStdout() - let savingsCalled = 0 - let indexCalled = 0 - try { - await runCli(['clear', 'index'], { - clearSavings: () => { - savingsCalled++ - return { path: '/tmp/x/savings.jsonl', cleared: true } - }, - clearIndex: () => { - indexCalled++ - return { path: '/tmp/x/index', cleared: true, entries: 3 } - }, - }) - } - finally { - restore() - } - expect(indexCalled).toBe(1) - expect(savingsCalled).toBe(0) // index-only must not touch savings - const out = writes.join('') - expect(out).toContain('Cleared 3 cached index entries') - expect(out).toContain('/tmp/x/index') - }) - - test('clear index reports when no index cache exists', async () => { - const { writes, restore } = captureStdout() - try { - await runCli(['clear', 'index'], { - clearSavings: () => ({ path: '/tmp/x/savings.jsonl', cleared: true }), - clearIndex: () => ({ path: '/tmp/x/index', cleared: false, entries: 0 }), - }) - } - finally { - restore() - } - expect(writes.join('')).toContain('No index cache found at `/tmp/x/index`') - }) - - test('clear all clears index and savings as two independent actions', async () => { - const { writes, restore } = captureStdout() - let savingsCalled = 0 - let indexCalled = 0 - try { - await runCli(['clear', 'all'], { - clearSavings: () => { - savingsCalled++ - return { path: '/tmp/x/savings.jsonl', cleared: true } - }, - clearIndex: () => { - indexCalled++ - return { path: '/tmp/x/index', cleared: true, entries: 2 } - }, - }) - } - finally { - restore() - } - // Both seams invoked independently — savings cleared via its own call, not - // as a side effect of removing the index root. - expect(indexCalled).toBe(1) - expect(savingsCalled).toBe(1) - const out = writes.join('') - expect(out).toContain('Cleared 2 cached index entries') - expect(out).toContain('Cleared savings at') - }) - - test('clear index over a real temp home removes only index/ and preserves savings (AC-015)', async () => { - const { mkdirSync, writeFileSync, existsSync: exists } = require('node:fs') as typeof import('node:fs') - const { clearIndexCache, resolveIndexRoot } = require('./indexing/cache.ts') as typeof import('./indexing/cache.ts') - const tmpHome = await mkdtemp(join(tmpdir(), 'csp-cli-clear-')) - const base = join(tmpHome, '.csp') - const indexRoot = resolveIndexRoot({ baseDir: base }) - const savings = join(base, 'savings.jsonl') - try { - mkdirSync(join(indexRoot, 'key-a'), { recursive: true }) - writeFileSync(savings, '{"call":"search"}\n') - - const { restore } = captureStdout() - try { - await runCli(['clear', 'index'], { - clearIndex: () => clearIndexCache({ baseDir: base }), - }) - } - finally { - restore() - } - - // Index gone; home directory and savings file still present. - expect(exists(indexRoot)).toBe(false) - expect(exists(savings)).toBe(true) - expect(exists(base)).toBe(true) - } - finally { - await rm(tmpHome, { recursive: true, force: true }) - } - }) - - test('clear with an invalid type exits 1', async () => { - const code = await runCli(['clear', 'bogus'], { - clearSavings: () => ({ path: '/tmp/x/savings.jsonl', cleared: true }), - }) - expect(code).toBe(1) - }) - - test('clear with no type exits 1', async () => { - const code = await runCli(['clear'], { - clearSavings: () => ({ path: '/tmp/x/savings.jsonl', cleared: true }), - }) - expect(code).toBe(1) - }) -}) - -describe('csp mcp', () => { - test('dispatches to serve with path and content', async () => { - let captured: { path?: string | undefined, ref?: string | undefined, content?: ContentType[] } = {} - const code = await runCli(['mcp', '.', '--ref', 'main', '--content', 'all'], { - serveMcp: async (p, o) => { - captured = { path: p, ref: o.ref, content: o.content } - }, - }) - expect(code).toBe(0) - expect(captured.path).toBe('.') - expect(captured.ref).toBe('main') - expect(captured.content).toEqual([ContentType.CODE, ContentType.DOCS, ContentType.CONFIG]) - }) - - test('mcp with no path forwards undefined', async () => { - let captured: { path?: string | undefined } = {} - const code = await runCli(['mcp'], { - serveMcp: async (p) => { - captured = { path: p } - }, - }) - expect(code).toBe(0) - expect(captured.path).toBeUndefined() - }) -}) - -describe('csp find-related validates line', () => { - test('non-integer line errors with code 1', async () => { - const errs: string[] = [] - const origStderr = process.stderr.write.bind(process.stderr) - process.stderr.write = (chunk: string | Uint8Array) => { - errs.push(typeof chunk === 'string' ? chunk : Buffer.from(chunk).toString('utf8')) - return true - } - try { - const code = await runCli(['find-related', 'src/auth.ts', '42abc', '.'], { - loadOrBuild: async () => ({ chunks: [] }) as unknown as CspIndex, - }) - expect(code).toBe(1) - } - finally { - process.stderr.write = origStderr - } - expect(errs.join('')).toContain('line must be an integer') - }) -}) - -describe('_readAgentFile', () => { - test('reads src/agents/claude.md', async () => { - const text = await _readAgentFile(Agent.Claude) - expect(text.length).toBeGreaterThan(0) - expect(text).toContain('csp') - }) - test('a bundled template exists for every agent', async () => { - for (const agent of Object.values(Agent)) { - const text = await _readAgentFile(agent) - expect(text).toContain('name: csp-search') - expect(text).toContain('csp search') - } - }) -}) - -describe('runCli error handling', () => { - test('unknown subcommand returns exit 1', async () => { - const errs: string[] = [] - const outs: string[] = [] - const origErr = process.stderr.write.bind(process.stderr) - const origOut = process.stdout.write.bind(process.stdout) - process.stderr.write = (chunk: string | Uint8Array) => { - errs.push(typeof chunk === 'string' ? chunk : Buffer.from(chunk).toString('utf8')) - return true - } - process.stdout.write = (chunk: string | Uint8Array) => { - outs.push(typeof chunk === 'string' ? chunk : Buffer.from(chunk).toString('utf8')) - return true - } - try { - const code = await runCli(['bogus-cmd']) - expect(code).toBe(1) - } - finally { - process.stderr.write = origErr - process.stdout.write = origOut - } - expect(errs.join('')).toContain('Unknown command: bogus-cmd') - }) - - test('invalid --agent returns exit 1 with stderr message', async () => { - const errs: string[] = [] - const origErr = process.stderr.write.bind(process.stderr) - process.stderr.write = (chunk: string | Uint8Array) => { - errs.push(typeof chunk === 'string' ? chunk : Buffer.from(chunk).toString('utf8')) - return true - } - try { - const code = await runCli(['init', '--agent', 'bogus']) - expect(code).toBe(1) - } - finally { - process.stderr.write = origErr - } - expect(errs.join('')).toContain('Invalid agent: bogus') - }) - - test('invalid --content returns exit 1 with stderr message', async () => { - const errs: string[] = [] - const origErr = process.stderr.write.bind(process.stderr) - process.stderr.write = (chunk: string | Uint8Array) => { - errs.push(typeof chunk === 'string' ? chunk : Buffer.from(chunk).toString('utf8')) - return true - } - try { - const code = await runCli(['search', 'foo', '--content', 'bogus'], { - loadOrBuild: async () => ({ chunks: [] }) as unknown as CspIndex, - }) - expect(code).toBe(1) - } - finally { - process.stderr.write = origErr - } - expect(errs.join('')).toContain('Invalid content type: bogus') - }) - - test('init rejection is translated to exit 1 by runCli', async () => { - const tmp = await mkdtemp(join(tmpdir(), 'csp-cli-runcli-')) - const errs: string[] = [] - const outs: string[] = [] - const origErr = process.stderr.write.bind(process.stderr) - const origOut = process.stdout.write.bind(process.stdout) - process.stderr.write = (chunk: string | Uint8Array) => { - errs.push(typeof chunk === 'string' ? chunk : Buffer.from(chunk).toString('utf8')) - return true - } - process.stdout.write = (chunk: string | Uint8Array) => { - outs.push(typeof chunk === 'string' ? chunk : Buffer.from(chunk).toString('utf8')) - return true - } - try { - // First run: succeeds. - const code1 = await runCli(['init', '--agent', 'claude'], { - cwd: () => tmp, - readAgentFile: async () => '# stub\n', - }) - expect(code1).toBe(0) - // Second run without --force: should exit 1 with stderr message, not crash. - const code2 = await runCli(['init', '--agent', 'claude'], { - cwd: () => tmp, - readAgentFile: async () => '# stub\n', - }) - expect(code2).toBe(1) - } - finally { - process.stderr.write = origErr - process.stdout.write = origOut - await rm(tmp, { recursive: true, force: true }) - } - expect(errs.join('')).toContain('already exists') - }) -}) - -describe('csp index --content', () => { - test('passes resolved content types to fromPath', async () => { - let captured: { path?: string, content?: ContentType[] } = {} - const fakeIndex: Partial = { - chunks: [], - save: async () => { - // no-op - }, - } - const tmp = await mkdtemp(join(tmpdir(), 'csp-cli-index-')) - try { - const code = await runCli(['index', '.', '-o', join(tmp, 'idx'), '--content', 'all'], { - fromPath: async (p, o) => { - captured = { path: p, content: o.content } - return fakeIndex as CspIndex - }, - }) - expect(code).toBe(0) - } - finally { - await rm(tmp, { recursive: true, force: true }) - } - expect(captured.path).toBe('.') - expect(captured.content).toEqual([ContentType.CODE, ContentType.DOCS, ContentType.CONFIG]) - }) -}) - -describe('csp index -o (explicit path persistence)', () => { - test('saves the built index to the explicit -o directory', async () => { - let savedTo: string | undefined - const fakeIndex: Partial = { - chunks: [], - save: async (dir: string) => { savedTo = dir }, - } - const tmp = await mkdtemp(join(tmpdir(), 'csp-cli-index-out-')) - const out = join(tmp, 'idx') - try { - const code = await runCli(['index', '.', '-o', out], { - fromPath: async () => fakeIndex as CspIndex, - }) - expect(code).toBe(0) - } - finally { - await rm(tmp, { recursive: true, force: true }) - } - // The explicit -o path must be the directory passed to save (no cache rerouting). - expect(savedTo).toBe(out) - }) - - test('without -o keeps the required-flag error and exits 1', async () => { - const errs: string[] = [] - const origErr = process.stderr.write.bind(process.stderr) - process.stderr.write = (chunk: string | Uint8Array) => { - errs.push(typeof chunk === 'string' ? chunk : Buffer.from(chunk).toString('utf8')) - return true - } - try { - const code = await runCli(['index', '.'], { - fromPath: async () => ({ chunks: [], save: async () => {} }) as unknown as CspIndex, - }) - expect(code).toBe(1) - } - finally { - process.stderr.write = origErr - } - expect(errs.join('')).toContain('--out / -o is required for `index`') - }) -}) - -describe('csp search/find-related --index (explicit path respected)', () => { - test('search --index loads via loadFromDisk seam with the explicit path', async () => { - let loadedFrom: string | undefined - const fakeIndex: Partial = { - chunks: [], - search: (): SearchResult[] => [], - } - const writes: string[] = [] - const origWrite = process.stdout.write.bind(process.stdout) - process.stdout.write = (chunk: string | Uint8Array) => { - writes.push(typeof chunk === 'string' ? chunk : Buffer.from(chunk).toString('utf8')) - return true - } - try { - const code = await runCli(['search', 'foo', '--index', '/some/explicit/idx'], { - readIndex: async (p: string) => { - loadedFrom = p - return fakeIndex as CspIndex - }, - // fromPath provided to prove it is NOT used when --index is set. - fromPath: async () => { throw new Error('fromPath must not run when --index is given') }, - }) - expect(code).toBe(0) - } - finally { - process.stdout.write = origWrite - } - expect(loadedFrom).toBe('/some/explicit/idx') - expect(JSON.parse(writes.join('').trim())).toEqual({ error: 'No results found.' }) - }) - - test('find-related --index loads via loadFromDisk seam with the explicit path', async () => { - let loadedFrom: string | undefined - const seedChunk = { content: 'x', filePath: 'a.ts', startLine: 1, endLine: 5, language: 'typescript' } - const fakeIndex: Partial = { - chunks: [seedChunk], - findRelated: (): SearchResult[] => [], - } - const writes: string[] = [] - const origWrite = process.stdout.write.bind(process.stdout) - process.stdout.write = (chunk: string | Uint8Array) => { - writes.push(typeof chunk === 'string' ? chunk : Buffer.from(chunk).toString('utf8')) - return true - } - try { - const code = await runCli(['find-related', 'a.ts', '2', '--index', '/explicit/idx2'], { - readIndex: async (p: string) => { - loadedFrom = p - return fakeIndex as CspIndex - }, - fromPath: async () => { throw new Error('fromPath must not run when --index is given') }, - }) - expect(code).toBe(0) - } - finally { - process.stdout.write = origWrite - } - expect(loadedFrom).toBe('/explicit/idx2') - }) - - test('search --index with a missing path surfaces a clear error and exits 1', async () => { - const errs: string[] = [] - const origErr = process.stderr.write.bind(process.stderr) - process.stderr.write = (chunk: string | Uint8Array) => { - errs.push(typeof chunk === 'string' ? chunk : Buffer.from(chunk).toString('utf8')) - return true - } - const missing = join(tmpdir(), `csp-no-such-index-${Date.now()}`) - try { - // No readIndex seam → real CspIndex.loadFromDisk runs and must throw a clear error. - const code = await runCli(['search', 'foo', '--index', missing]) - expect(code).toBe(1) - } - finally { - process.stderr.write = origErr - } - expect(errs.join('')).toContain('Index not found:') - expect(errs.join('')).toContain(missing) - }) -}) - -describe('csp index -o → search --index (real roundtrip, no seams)', () => { - test('persisted index is loadable and searchable via the explicit path', async () => { - const tmp = await mkdtemp(join(tmpdir(), 'csp-cli-roundtrip-')) - const out = join(tmp, 'idx') - const writes: string[] = [] - const origWrite = process.stdout.write.bind(process.stdout) - process.stdout.write = (chunk: string | Uint8Array) => { - writes.push(typeof chunk === 'string' ? chunk : Buffer.from(chunk).toString('utf8')) - return true - } - try { - // Build a real CspIndex from a tiny source dir, persist via `csp index -o`. - const src = join(tmp, 'src') - await mkdir(src, { recursive: true }) - await writeFile(join(src, 'auth.ts'), 'export function login(user: string) { return user }\n', 'utf8') - const idxCode = await runCli(['index', src, '-o', out]) - expect(idxCode).toBe(0) - // The manifest proves persistence happened at the explicit path. - expect(existsSync(join(out, 'manifest.json'))).toBe(true) - - // Load it back through the explicit --index path and search. - const searchCode = await runCli(['search', 'login', '--index', out, '-k', '3']) - expect(searchCode).toBe(0) - } - finally { - process.stdout.write = origWrite - await rm(tmp, { recursive: true, force: true }) - } - // A non-empty result set (or an explicit "No results") must be valid JSON. - const out2 = JSON.parse(writes.join('').trim().split('\n').pop() ?? '{}') as unknown - expect(out2).toBeDefined() - }) -}) - -describe('csp search/find-related (no --index) auto-caches via loadOrBuildIndex (T011)', () => { - function captureStdout(): { writes: string[], restore: () => void } { - const writes: string[] = [] - const origWrite = process.stdout.write.bind(process.stdout) - process.stdout.write = (chunk: string | Uint8Array) => { - writes.push(typeof chunk === 'string' ? chunk : Buffer.from(chunk).toString('utf8')) - return true - } - return { writes, restore: () => { - process.stdout.write = origWrite - } } - } - - test('search without --index routes through the loadOrBuild seam with source + content + topK', async () => { - let captured: { source?: string, content?: ContentType[], ref?: string | undefined } = {} - const fakeIndex: Partial = { - chunks: [], - search: (): SearchResult[] => [], - } - const { writes, restore } = captureStdout() - try { - const code = await runCli(['search', 'foo', './my-project', '-k', '3'], { - loadOrBuild: async (source, opts) => { - captured = { source, content: opts.content, ref: opts.ref } - return fakeIndex as CspIndex - }, - // fromPath must NOT be used for the build branch anymore. - fromPath: async () => { throw new Error('fromPath must not run when auto-cache is wired') }, - }) - expect(code).toBe(0) - } - finally { - restore() - } - expect(captured.source).toBe('./my-project') - expect(captured.content).toEqual([ContentType.CODE]) - expect(JSON.parse(writes.join('').trim())).toEqual({ error: 'No results found.' }) - }) - - test('search without a path argument defaults the source to "."', async () => { - let capturedSource: string | undefined - const fakeIndex: Partial = { chunks: [], search: (): SearchResult[] => [] } - const { restore } = captureStdout() - try { - const code = await runCli(['search', 'foo'], { - loadOrBuild: async (source) => { - capturedSource = source - return fakeIndex as CspIndex - }, - }) - expect(code).toBe(0) - } - finally { - restore() - } - expect(capturedSource).toBe('.') - }) - - test('find-related without --index routes through the loadOrBuild seam with its path source', async () => { - let capturedSource: string | undefined - const seedChunk = { content: 'x', filePath: 'a.ts', startLine: 1, endLine: 5, language: 'typescript' } - const fakeIndex: Partial = { - chunks: [seedChunk], - findRelated: (): SearchResult[] => [], - } - const { restore } = captureStdout() - try { - const code = await runCli(['find-related', 'a.ts', '2', './repo'], { - loadOrBuild: async (source) => { - capturedSource = source - return fakeIndex as CspIndex - }, - fromPath: async () => { throw new Error('fromPath must not run when auto-cache is wired') }, - }) - expect(code).toBe(0) - } - finally { - restore() - } - expect(capturedSource).toBe('./repo') - }) - - test('--index still bypasses the auto-cache seam (T008 guarantee preserved)', async () => { - let loadedFrom: string | undefined - let autoCacheCalled = false - const fakeIndex: Partial = { chunks: [], search: (): SearchResult[] => [] } - const { restore } = captureStdout() - try { - const code = await runCli(['search', 'foo', '--index', '/explicit/idx'], { - readIndex: async (p: string) => { - loadedFrom = p - return fakeIndex as CspIndex - }, - loadOrBuild: async () => { - autoCacheCalled = true - return fakeIndex as CspIndex - }, - }) - expect(code).toBe(0) - } - finally { - restore() - } - expect(loadedFrom).toBe('/explicit/idx') - expect(autoCacheCalled).toBe(false) - }) - - test('ref flag is forwarded to the loadOrBuild seam', async () => { - let capturedRef: string | undefined - const fakeIndex: Partial = { chunks: [], search: (): SearchResult[] => [] } - const { restore } = captureStdout() - try { - const code = await runCli(['search', 'foo', 'https://github.com/o/r', '--ref', 'v1.2.3'], { - loadOrBuild: async (_source, opts) => { - capturedRef = opts.ref - return fakeIndex as CspIndex - }, - }) - expect(code).toBe(0) - } - finally { - restore() - } - expect(capturedRef).toBe('v1.2.3') - }) -}) diff --git a/src/cli.ts b/src/cli.ts deleted file mode 100644 index 97105a3..0000000 --- a/src/cli.ts +++ /dev/null @@ -1,597 +0,0 @@ -#!/usr/bin/env node -// Port of src/semble/cli.py -import { mkdir, readFile, stat, writeFile } from 'node:fs/promises' -import { dirname, resolve } from 'node:path' -import process from 'node:process' -import { fileURLToPath } from 'node:url' - -import { clearIndexCache, loadOrBuildIndex } from './indexing/cache.ts' -import { CspIndex } from './indexing/index.ts' -import { serve } from './mcp/server.ts' -import { clearSavings, formatSavingsReport } from './stats.ts' -import { ContentType } from './types.ts' -import { formatResults, isGitUrl, resolveChunk } from './utils.ts' -import { version } from './version.ts' - -export enum Agent { - Antigravity = 'antigravity', - Claude = 'claude', - Commandcode = 'commandcode', - Copilot = 'copilot', - Cursor = 'cursor', - Gemini = 'gemini', - Kiro = 'kiro', - Opencode = 'opencode', - Pi = 'pi', - Reasonix = 'reasonix', -} - -const DEFAULT_AGENT = Agent.Claude -const CLI_DISPATCH_ARGS = new Set([ - 'search', - 'find-related', - 'init', - 'savings', - 'clear', - 'index', - 'mcp', - '-h', - '--help', -]) - -const CLEAR_CHOICES = ['all', 'index', 'savings'] as const - -const CONTENT_CHOICES = ['code', 'docs', 'config', 'all'] as const - -export function _agentPath(agent: Agent): string { - const baseDir = agent === Agent.Copilot ? '.github' : `.${agent}` - return `${baseDir}/agents/csp-search.md` -} - -export interface ParsedArgs { - command: string | null - positional: string[] - flags: Record -} - -export function parseArgs(argv: string[]): ParsedArgs { - const positional: string[] = [] - const flags: Record = {} - let command: string | null = null - let i = 0 - - if (argv.length > 0 && !argv[0]!.startsWith('-')) { - command = argv[0]! - i = 1 - } - - while (i < argv.length) { - const token = argv[i]! - if (token === '--') { - for (let j = i + 1; j < argv.length; j++) { - positional.push(argv[j]!) - } - break - } - if (token.startsWith('--')) { - const eqIdx = token.indexOf('=') - let name: string - let value: string | undefined - if (eqIdx !== -1) { - name = token.slice(2, eqIdx) - value = token.slice(eqIdx + 1) - } - else { - name = token.slice(2) - } - // collect multi-value flag (e.g. --content code docs) - if (name === 'content' && value === undefined) { - const values: string[] = [] - let j = i + 1 - while (j < argv.length && !argv[j]!.startsWith('-')) { - values.push(argv[j]!) - j++ - } - if (values.length > 0) { - flags[name] = values - i = j - continue - } - } - if (value === undefined) { - // boolean or value-from-next - const next = argv[i + 1] - if (next !== undefined && !next.startsWith('-')) { - flags[name] = next - i += 2 - continue - } - flags[name] = true - i += 1 - continue - } - flags[name] = value - i += 1 - continue - } - if (token.startsWith('-') && token.length > 1) { - // short flag - const name = token.slice(1) - const next = argv[i + 1] - if (next !== undefined && !next.startsWith('-')) { - flags[name] = next - i += 2 - continue - } - flags[name] = true - i += 1 - continue - } - positional.push(token) - i += 1 - } - - return { command, positional, flags } -} - -function _getFlag(flags: Record, ...names: string[]): string | boolean | string[] | undefined { - for (const name of names) { - if (name in flags) { - return flags[name] - } - } - return undefined -} - -function _getStringFlag(flags: Record, ...names: string[]): string | undefined { - const v = _getFlag(flags, ...names) - if (typeof v === 'string') { - return v - } - return undefined -} - -function _getNumberFlag(flags: Record, ...names: string[]): number | undefined { - const s = _getStringFlag(flags, ...names) - if (s === undefined) { - return undefined - } - const n = Number(s) - if (Number.isNaN(n)) { - return undefined - } - return n -} - -function _getBoolFlag(flags: Record, ...names: string[]): boolean { - const v = _getFlag(flags, ...names) - return v === true -} - -function _getContentFlag(flags: Record): string[] { - const v = flags.content - if (Array.isArray(v)) { - return v - } - if (typeof v === 'string') { - return [v] - } - return ['code'] -} - -export function _resolveContent(content: string[], includeTextFiles: boolean): ContentType[] { - if (includeTextFiles) { - process.emitWarning( - '--include-text-files is deprecated and will be removed in a future version. Use --content all instead.', - 'DeprecationWarning', - ) - } - if (includeTextFiles || content.includes('all')) { - return [ContentType.CODE, ContentType.DOCS, ContentType.CONFIG] - } - const result: ContentType[] = [] - for (const c of content) { - if (c === 'code') { - result.push(ContentType.CODE) - } - else if (c === 'docs') { - result.push(ContentType.DOCS) - } - else if (c === 'config') { - result.push(ContentType.CONFIG) - } - else { throw new Error(`Invalid content type: ${c}. Choices: ${CONTENT_CHOICES.join(', ')}`) } - } - return result -} - -function _printHelp(): void { - const help = `csp — Instant local code search for agents. - -Usage: - csp [options] - -Commands: - search [path] Search a codebase. - index Index and store a codebase. - find-related [path] Find code similar to a specific location. - init Write a csp sub-agent file for your coding agent. - savings Show token savings and usage stats. - clear Clear cached data (savings telemetry). - mcp [path] Start the MCP server (optionally pre-index path). - -Common options: - --top-k , -k Number of results (default: 5). - --content Content types: code, docs, config, all (default: code). - --index Path to a pre-built index. - --agent , -a One of: antigravity, claude, commandcode, copilot, cursor, gemini, kiro, opencode, pi, reasonix. - --force Overwrite if file already exists (init). - -o, --out Write the pre-built index to this path (index). - --ref Branch or tag for git URLs (mcp). - --verbose Verbose output (savings). - --include-text-files Deprecated. Use --content all instead. - -Examples: - csp search "authentication flow" ./my-project - csp index ./my-project -o my_index - csp find-related src/auth.ts 42 ./my-project - csp init --agent claude - csp savings --verbose - csp mcp ./my-project -` - process.stdout.write(help) -} - -interface RunOptions { - readIndex?: (path: string) => Promise - /** - * Build-or-reuse seam for the auto-cache path (search/find-related without - * `--index`). Defaults to {@link loadOrBuildIndex}; tests inject it to avoid - * touching the real `~/.csp` home. - */ - loadOrBuild?: (source: string, opts: { content: ContentType[], ref?: string | undefined }) => Promise - fromPath?: (path: string, opts: { content: ContentType[] }) => Promise - fromGit?: (path: string, opts: { content: ContentType[] }) => Promise - serveMcp?: (path: string | undefined, opts: { ref?: string | undefined, content: ContentType[] }) => Promise - writeFileImpl?: (path: string, content: string) => Promise - readAgentFile?: (agent: Agent) => Promise - formatSavings?: (opts: { verbose: boolean }) => string - clearSavings?: () => { path: string, cleared: boolean } - /** - * Index-cache clearing seam for `clear index` / `clear all`. Defaults to - * {@link clearIndexCache} (which targets `~/.csp/index`); tests inject it with - * a temp `baseDir` so the real home is never touched. - */ - clearIndex?: () => { path: string, cleared: boolean, entries: number } - cwd?: () => string -} - -export async function _readAgentFile(agent: Agent): Promise { - const url = new URL(`./agents/${agent}.md`, import.meta.url) - return readFile(fileURLToPath(url), 'utf8') -} - -export async function _runInit(opts: { - agent?: Agent - force?: boolean - cwd?: string - readAgentFile?: (agent: Agent) => Promise - writeFileImpl?: (path: string, content: string) => Promise -}): Promise { - const agent = opts.agent ?? DEFAULT_AGENT - const force = opts.force ?? false - const cwd = opts.cwd ?? process.cwd() - const relDest = _agentPath(agent) - const dest = resolve(cwd, relDest) - - let exists = false - try { - await stat(dest) - exists = true - } - catch { - exists = false - } - if (exists && !force) { - throw new Error(`${relDest} already exists. Run with --force to overwrite.`) - } - - await mkdir(dirname(dest), { recursive: true }) - const readAgent = opts.readAgentFile ?? _readAgentFile - const content = await readAgent(agent) - const write = opts.writeFileImpl ?? (async (p: string, c: string) => writeFile(p, c, 'utf8')) - await write(dest, content) - process.stdout.write(`Created ${relDest}\n`) -} - -/** - * Default auto-cache seam: forward to {@link loadOrBuildIndex}, re-narrowing - * `ref` so an absent ref is omitted rather than passed as explicit `undefined` - * (required under `exactOptionalPropertyTypes`). - */ -async function _defaultLoadOrBuild( - source: string, - opts: { content: ContentType[], ref?: string | undefined }, -): Promise { - return loadOrBuildIndex(source, { - content: opts.content, - ...(opts.ref !== undefined ? { ref: opts.ref } : {}), - }) -} - -async function _runIndex(opts: { - path: string - out: string - content: ContentType[] - fromPath?: (path: string, opts: { content: ContentType[] }) => Promise - fromGit?: (path: string, opts: { content: ContentType[] }) => Promise -}): Promise { - const { path, out, content } = opts - const fromPath = opts.fromPath ?? (async (p: string, o: { content: ContentType[] }) => CspIndex.fromPath(p, o)) - const fromGit = opts.fromGit ?? (async (p: string, o: { content: ContentType[] }) => CspIndex.fromGit(p, o)) - const index = isGitUrl(path) - ? await fromGit(path, { content }) - : await fromPath(path, { content }) - await mkdir(out, { recursive: true }) - await index.save(out) -} - -/** Report the outcome of an index-cache clear to stdout. */ -function _reportIndexClear(result: { path: string, cleared: boolean, entries: number }): void { - process.stdout.write( - result.cleared - ? `Cleared ${result.entries} cached index entries at \`${result.path}\`\n` - : `No index cache found at \`${result.path}\`\n`, - ) -} - -/** Report the outcome of a savings clear to stdout. */ -function _reportSavingsClear(result: { path: string, cleared: boolean }): void { - process.stdout.write( - result.cleared - ? `Cleared savings at \`${result.path}\`\n` - : `No savings file found at \`${result.path}\`\n`, - ) -} - -/** - * Run the `clear` subcommand. - * - * `clear index` deletes the global on-disk index cache at `~/.csp/index/`. - * `clear savings` deletes the `~/.csp/savings.jsonl` telemetry file. `clear all` - * runs **both** as two independent actions — the index root is removed first, - * then `clearSavings()` is called separately, so removing the index never - * affects savings and vice versa. The `~/.csp` home itself is never deleted. - */ -export function _runClear( - type: string, - clearSavingsImpl: () => { path: string, cleared: boolean } = clearSavings, - clearIndexImpl: () => { path: string, cleared: boolean, entries: number } = clearIndexCache, -): number { - if (!(CLEAR_CHOICES as readonly string[]).includes(type)) { - process.stderr.write(`Invalid clear type: ${type}. Choices: ${CLEAR_CHOICES.join(', ')}\n`) - return 1 - } - - if (type === 'index' || type === 'all') { - _reportIndexClear(clearIndexImpl()) - } - - if (type === 'savings' || type === 'all') { - _reportSavingsClear(clearSavingsImpl()) - } - - return 0 -} - -export async function runCli(argv: string[], options: RunOptions = {}): Promise { - // Bare invocation prints help and exits 0; unknown subcommands are handled - // below (after parsing) so they exit 1. - if (argv.length === 0) { - _printHelp() - return 0 - } - - if (argv[0] === '-h' || argv[0] === '--help') { - _printHelp() - return 0 - } - - if (argv[0] === '-V' || argv[0] === '--version') { - process.stdout.write(`csp ${version}\n`) - return 0 - } - - try { - const { command, positional, flags } = parseArgs(argv) - - if (command === null || !CLI_DISPATCH_ARGS.has(command)) { - process.stderr.write(`Unknown command: ${command ?? ''}\n`) - _printHelp() - return 1 - } - - if (command === 'init') { - const agentRaw = _getStringFlag(flags, 'agent', 'a') ?? DEFAULT_AGENT - const agent = _coerceAgent(agentRaw) - const force = _getBoolFlag(flags, 'force') - await _runInit({ - agent, - force, - ...(options.cwd ? { cwd: options.cwd() } : {}), - ...(options.readAgentFile ? { readAgentFile: options.readAgentFile } : {}), - ...(options.writeFileImpl ? { writeFileImpl: options.writeFileImpl } : {}), - }) - return 0 - } - - if (command === 'index') { - const path = positional[0] ?? '.' - const out = _getStringFlag(flags, 'out', 'o') - if (out === undefined) { - process.stderr.write('--out / -o is required for `index`.\n') - return 1 - } - const content = _resolveContent(_getContentFlag(flags), _getBoolFlag(flags, 'include-text-files')) - await _runIndex({ - path, - out, - content, - ...(options.fromPath ? { fromPath: options.fromPath } : {}), - ...(options.fromGit ? { fromGit: options.fromGit } : {}), - }) - return 0 - } - - if (command === 'savings') { - const verbose = _getBoolFlag(flags, 'verbose') - const fmt = options.formatSavings ?? formatSavingsReport - process.stdout.write(fmt({ verbose })) - return 0 - } - - if (command === 'clear') { - const type = positional[0] - if (type === undefined) { - process.stderr.write(`clear requires a type. Choices: ${CLEAR_CHOICES.join(', ')}\n`) - return 1 - } - const clearSavingsImpl = options.clearSavings ?? clearSavings - const clearIndexImpl = options.clearIndex ?? clearIndexCache - return _runClear(type, clearSavingsImpl, clearIndexImpl) - } - - if (command === 'mcp') { - const path = positional[0] - const ref = _getStringFlag(flags, 'ref') - const content = _resolveContent(_getContentFlag(flags), _getBoolFlag(flags, 'include-text-files')) - const serveImpl = options.serveMcp ?? (async (p, o) => serve(p, o)) - await serveImpl(path, { ref, content }) - return 0 - } - - // search and find-related share index loading - if (command === 'search' || command === 'find-related') { - const indexPath = _getStringFlag(flags, 'index') - let index: CspIndex - if (indexPath !== undefined) { - // Explicit `--index`: load the pre-built index verbatim. The auto-cache - // is intentionally bypassed so an explicit path is always honored. - const loadImpl = options.readIndex ?? (async (p: string) => CspIndex.loadFromDisk(p)) - index = await loadImpl(indexPath) - } - else { - // No `--index`: route through the on-disk auto-cache, which keys on the - // source (local path or git URL), content selection, and git ref, then - // reuses a fresh entry or builds + persists one under `~/.csp/index/`. - const pathArg = command === 'search' ? positional[1] ?? '.' : positional[2] ?? '.' - const content = _resolveContent(_getContentFlag(flags), _getBoolFlag(flags, 'include-text-files')) - const ref = _getStringFlag(flags, 'ref') - const loadOrBuild = options.loadOrBuild ?? _defaultLoadOrBuild - index = await loadOrBuild(pathArg, { content, ...(ref !== undefined ? { ref } : {}) }) - } - - const topK = _getNumberFlag(flags, 'top-k', 'k') ?? 5 - - if (command === 'search') { - const query = positional[0] - if (query === undefined) { - process.stderr.write('search requires a .\n') - return 1 - } - const results = index.search(query, { topK }) - const out = results.length === 0 - ? { error: 'No results found.' } - : formatResults(query, results) - process.stdout.write(`${JSON.stringify(out)}\n`) - return 0 - } - - // find-related - const filePath = positional[0] - const lineRaw = positional[1] - if (filePath === undefined || lineRaw === undefined) { - process.stderr.write('find-related requires .\n') - return 1 - } - if (!/^-?\d+$/.test(lineRaw)) { - process.stderr.write(`line must be an integer, got: ${lineRaw}\n`) - return 1 - } - const line = Number.parseInt(lineRaw, 10) - const chunk = resolveChunk(index.chunks, filePath, line) - if (chunk === undefined || chunk === null) { - process.stderr.write(`No chunk found at ${filePath}:${line}.\n`) - return 1 - } - const related = index.findRelated(chunk, { topK }) - const out = related.length === 0 - ? { error: `No related chunks found for ${filePath}:${line}.` } - : formatResults(`Chunks related to ${filePath}:${line}`, related) - process.stdout.write(`${JSON.stringify(out)}\n`) - return 0 - } - - // Unreachable: CLI_DISPATCH_ARGS gate above filters unknown commands. - process.stderr.write(`Unknown command: ${command}\n`) - _printHelp() - return 1 - } - catch (err) { - const message = err instanceof Error ? err.message : String(err) - process.stderr.write(`${message}\n`) - return 1 - } -} - -function _coerceAgent(raw: string): Agent { - const candidates: Agent[] = [ - Agent.Antigravity, - Agent.Claude, - Agent.Commandcode, - Agent.Copilot, - Agent.Cursor, - Agent.Gemini, - Agent.Kiro, - Agent.Opencode, - Agent.Pi, - Agent.Reasonix, - ] - for (const a of candidates) { - if (a === raw) { - return a - } - } - throw new Error(`Invalid agent: ${raw}. Choices: ${candidates.join(', ')}`) -} - -async function main(): Promise { - const argv = process.argv.slice(2) - const code = await runCli(argv) - if (code !== 0) { - process.exit(code) - } -} - -// Run main only when invoked directly (not when imported as a module / under bun:test) -const invokedDirectly = (() => { - if (typeof process === 'undefined') { - return false - } - // process.argv[1] points at the entrypoint script — match against this module's URL - const entry = process.argv[1] - if (entry === undefined) { - return false - } - try { - const here = fileURLToPath(import.meta.url) - return entry === here || entry.endsWith('/cli.ts') || entry.endsWith('/cli.mjs') || entry.endsWith('/cli.js') - } - catch { - return false - } -})() - -if (invokedDirectly) { - void main() -} diff --git a/src/index.test.ts b/src/index.test.ts deleted file mode 100644 index 1356521..0000000 --- a/src/index.test.ts +++ /dev/null @@ -1,48 +0,0 @@ -// Smoke tests for the public library barrel. -// -// These don't exercise behavior — Unit 12 (CspIndex) and Unit 1 (types) own -// their own deep tests. The point here is to lock down the *shape* of the -// public surface so we'd catch: -// * an accidental rename of `CspIndex` / `ContentType` / `version`, -// * a regression of `ContentType` to a type-only export (which would -// break `import { ContentType } from '@pleaseai/csp'` at runtime). -// -// The wildcard `import * as csp` is deliberate: it also verifies the module -// is *syntactically* a valid ESM barrel (no circular value-time imports). -import { describe, expect, it } from 'bun:test' - -import * as csp from './index.ts' - -describe('public barrel', () => { - it('imports without error and exposes the documented names', () => { - // Use a `Set` so the assertion message is order-independent — easier to - // diagnose than a positional array diff when a name is missing. - const exported = new Set(Object.keys(csp)) - for (const name of ['CspIndex', 'ContentType', 'version']) { - expect(exported.has(name)).toBe(true) - } - }) - - it('exposes `version` as a string', () => { - expect(typeof csp.version).toBe('string') - // Guard against an empty string sneaking in (e.g. failed build-time - // substitution); a real version is always non-empty. - expect(csp.version.length).toBeGreaterThan(0) - }) - - it('exposes `CspIndex` as a constructable value', () => { - // `typeof X === 'function'` covers both `class` and plain functions, - // which keeps the test resilient if Unit 12 chooses a factory-style - // implementation instead of a class. - expect(typeof csp.CspIndex).toBe('function') - }) - - it('exposes `ContentType` as a runtime enum object with `code | docs | config`', () => { - // The string values are part of the on-disk / CLI contract (`--content code`, - // persisted indices). They must NOT be tweaked without coordinating with - // the semble compatibility story documented in CLAUDE.md. - expect(csp.ContentType.CODE).toBe('code') - expect(csp.ContentType.DOCS).toBe('docs') - expect(csp.ContentType.CONFIG).toBe('config') - }) -}) diff --git a/src/index.ts b/src/index.ts deleted file mode 100644 index 5f7cc7b..0000000 --- a/src/index.ts +++ /dev/null @@ -1,22 +0,0 @@ -// Public library barrel — port of `src/semble/__init__.py`. -// -// External consumers `import { CspIndex, ContentType, ... } from '@pleaseai/csp'`, -// so this file's surface is load-bearing and matches the README. -// -// `ContentType` is intentionally re-exported as a *value* (not via -// `export type`) because Unit 1's port models it as a `const`-object enum: -// the identifier carries both a runtime value and a same-named type alias. -// With `verbatimModuleSyntax`, exporting it via `export {}` carries both -// forms; listing it under `export type {}` would erase the runtime side. - -export { CspIndex } from './indexing/index.ts' - -export type { - Chunk, - IndexStats, - SearchResult, -} from './types.ts' - -export { ContentType } from './types.ts' - -export { version } from './version.ts' diff --git a/src/indexing/.gitkeep b/src/indexing/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/src/indexing/cache.test.ts b/src/indexing/cache.test.ts deleted file mode 100644 index fa9e3b3..0000000 --- a/src/indexing/cache.test.ts +++ /dev/null @@ -1,328 +0,0 @@ -// Unit tests for the index cache module (T009): cache-dir resolution, -// content hashing, and 0700 directory hardening. -// T010 adds loadOrBuildIndex orchestration tests at the bottom. - -import { existsSync, mkdtempSync, rmSync, statSync, writeFileSync } from 'node:fs' -import { tmpdir } from 'node:os' -import { join, sep } from 'node:path' -import { afterEach, beforeEach, describe, expect, it } from 'bun:test' -import { ContentType } from '../types.ts' -import { clearIndexCache, computeContentHash, ensureCacheDir, loadOrBuildIndex, resolveCacheDir, resolveIndexRoot } from './cache.ts' -import { CspIndex } from './index.ts' - -describe('resolveCacheDir', () => { - it('returns a path under /index/', () => { - const base = '/some/home/.csp' - const dir = resolveCacheDir('/repo', [ContentType.CODE], { baseDir: base }) - expect(dir.startsWith(`${base}${sep}index${sep}`)).toBe(true) - }) - - it('is deterministic for the same (source, content, ref)', () => { - const opts = { baseDir: '/h/.csp' } - const a = resolveCacheDir('/repo', [ContentType.CODE], opts) - const b = resolveCacheDir('/repo', [ContentType.CODE], opts) - expect(a).toBe(b) - }) - - it('is insensitive to content selection ordering', () => { - const opts = { baseDir: '/h/.csp' } - const a = resolveCacheDir('/repo', [ContentType.CODE, ContentType.DOCS], opts) - const b = resolveCacheDir('/repo', [ContentType.DOCS, ContentType.CODE], opts) - expect(a).toBe(b) - }) - - it('produces a different key for a different content selection', () => { - const opts = { baseDir: '/h/.csp' } - const a = resolveCacheDir('/repo', [ContentType.CODE], opts) - const b = resolveCacheDir('/repo', [ContentType.CODE, ContentType.DOCS], opts) - expect(a).not.toBe(b) - }) - - it('produces a different key for a different source', () => { - const opts = { baseDir: '/h/.csp' } - const a = resolveCacheDir('/repo-a', [ContentType.CODE], opts) - const b = resolveCacheDir('/repo-b', [ContentType.CODE], opts) - expect(a).not.toBe(b) - }) - - it('produces a different key for a different ref', () => { - const opts = { baseDir: '/h/.csp' } - const a = resolveCacheDir('https://x/r.git', [ContentType.CODE], { ...opts, ref: 'main' }) - const b = resolveCacheDir('https://x/r.git', [ContentType.CODE], { ...opts, ref: 'dev' }) - expect(a).not.toBe(b) - }) - - it('treats an omitted ref distinctly from an empty ref consistently', () => { - const opts = { baseDir: '/h/.csp' } - const a = resolveCacheDir('https://x/r.git', [ContentType.CODE], opts) - const b = resolveCacheDir('https://x/r.git', [ContentType.CODE], opts) - expect(a).toBe(b) - }) -}) - -describe('computeContentHash', () => { - it('is order-independent across the file list', () => { - const a = computeContentHash([ - { path: 'a.ts', content: 'one' }, - { path: 'b.ts', content: 'two' }, - ]) - const b = computeContentHash([ - { path: 'b.ts', content: 'two' }, - { path: 'a.ts', content: 'one' }, - ]) - expect(a).toBe(b) - }) - - it('changes when any byte of content changes', () => { - const a = computeContentHash([{ path: 'a.ts', content: 'hello' }]) - const b = computeContentHash([{ path: 'a.ts', content: 'hellp' }]) - expect(a).not.toBe(b) - }) - - it('changes when a path changes', () => { - const a = computeContentHash([{ path: 'a.ts', content: 'x' }]) - const b = computeContentHash([{ path: 'b.ts', content: 'x' }]) - expect(a).not.toBe(b) - }) - - it('treats Uint8Array and equivalent string content identically', () => { - const a = computeContentHash([{ path: 'a.ts', content: 'abc' }]) - const b = computeContentHash([ - { path: 'a.ts', content: new Uint8Array([0x61, 0x62, 0x63]) }, - ]) - expect(a).toBe(b) - }) - - it('returns a stable hex sha256 string', () => { - const h = computeContentHash([{ path: 'a.ts', content: 'x' }]) - expect(h).toMatch(/^[0-9a-f]{64}$/) - }) -}) - -describe('ensureCacheDir', () => { - let tmpHome: string - - beforeEach(() => { - tmpHome = mkdtempSync(join(tmpdir(), 'csp-cache-test-')) - }) - - afterEach(() => { - rmSync(tmpHome, { recursive: true, force: true }) - }) - - it('creates the directory chain with mode 0700', () => { - const base = join(tmpHome, '.csp') - const leaf = resolveCacheDir('/repo', [ContentType.CODE], { baseDir: base }) - ensureCacheDir(leaf, { baseDir: base }) - - expect(statSync(leaf).mode & 0o777).toBe(0o700) - expect(statSync(join(base, 'index')).mode & 0o777).toBe(0o700) - expect(statSync(base).mode & 0o777).toBe(0o700) - }) - - it('tightens an already-existing directory to 0700', () => { - const base = join(tmpHome, '.csp') - const leaf = resolveCacheDir('/repo', [ContentType.CODE], { baseDir: base }) - // First call creates everything. - ensureCacheDir(leaf, { baseDir: base }) - // Loosen, then re-ensure should re-tighten. - const { chmodSync } = require('node:fs') as typeof import('node:fs') - chmodSync(base, 0o755) - chmodSync(join(base, 'index'), 0o755) - ensureCacheDir(leaf, { baseDir: base }) - - expect(statSync(base).mode & 0o777).toBe(0o700) - expect(statSync(join(base, 'index')).mode & 0o777).toBe(0o700) - expect(statSync(leaf).mode & 0o777).toBe(0o700) - }) - - it('does not touch the real home .csp directory', () => { - const base = join(tmpHome, '.csp') - const leaf = resolveCacheDir('/repo', [ContentType.CODE], { baseDir: base }) - ensureCacheDir(leaf, { baseDir: base }) - // The created tree must live under the injected base, never the real home. - expect(leaf.startsWith(tmpHome)).toBe(true) - }) -}) - -describe('loadOrBuildIndex', () => { - let tmpHome: string - let srcDir: string - let base: string - - beforeEach(() => { - tmpHome = mkdtempSync(join(tmpdir(), 'csp-lob-home-')) - srcDir = mkdtempSync(join(tmpdir(), 'csp-lob-src-')) - base = join(tmpHome, '.csp') - // A minimal indexable source: one code file. - writeFileSync(join(srcDir, 'a.ts'), 'export function alpha() { return 1 }\n') - }) - - afterEach(() => { - rmSync(tmpHome, { recursive: true, force: true }) - rmSync(srcDir, { recursive: true, force: true }) - }) - - it('cache miss: builds the index and writes a manifest to the cache dir', async () => { - const index = await loadOrBuildIndex(srcDir, { baseDir: base }) - - expect(index).toBeInstanceOf(CspIndex) - expect(index.chunks.length).toBeGreaterThan(0) - - const cacheDir = resolveCacheDir(srcDir, [ContentType.CODE], { baseDir: base }) - expect(existsSync(join(cacheDir, 'manifest.json'))).toBe(true) - }) - - it('cache hit: a second call reuses the cache without rebuilding', async () => { - await loadOrBuildIndex(srcDir, { baseDir: base }) - - // Spy on the build path: fromPath must NOT be called on the cache hit. - const original = CspIndex.fromPath - let buildCalls = 0 - CspIndex.fromPath = async (...args: Parameters) => { - buildCalls += 1 - return original.apply(CspIndex, args) - } - try { - const index = await loadOrBuildIndex(srcDir, { baseDir: base }) - expect(index).toBeInstanceOf(CspIndex) - expect(index.chunks.length).toBeGreaterThan(0) - expect(buildCalls).toBe(0) - } - finally { - CspIndex.fromPath = original - } - }) - - it('invalidation: a source change rebuilds and reflects new content', async () => { - const first = await loadOrBuildIndex(srcDir, { baseDir: base }) - const firstChunkCount = first.chunks.length - - // Mutate the source: add a second file so the content hash changes. - writeFileSync(join(srcDir, 'b.ts'), 'export function beta() { return 2 }\n') - - const original = CspIndex.fromPath - let buildCalls = 0 - CspIndex.fromPath = async (...args: Parameters) => { - buildCalls += 1 - return original.apply(CspIndex, args) - } - try { - const second = await loadOrBuildIndex(srcDir, { baseDir: base }) - // Stale cache → rebuild happened. - expect(buildCalls).toBe(1) - // New file's content is now indexed. - const paths = new Set(second.chunks.map(c => c.filePath)) - expect(paths.has('b.ts')).toBe(true) - expect(second.chunks.length).toBeGreaterThanOrEqual(firstChunkCount) - } - finally { - CspIndex.fromPath = original - } - }) -}) - -describe('resolveIndexRoot', () => { - it('returns /index for an explicit baseDir', () => { - const base = join('/h', '.csp') - expect(resolveIndexRoot({ baseDir: base })).toBe(join(base, 'index')) - }) - - it('shares the cache home with resolveCacheDir', () => { - const base = join('/h', '.csp') - const root = resolveIndexRoot({ baseDir: base }) - const leaf = resolveCacheDir('/repo', [ContentType.CODE], { baseDir: base }) - // Every cache leaf must live under the resolved index root. - expect(leaf.startsWith(`${root}${sep}`)).toBe(true) - }) -}) - -describe('clearIndexCache', () => { - let tmpHome: string - let base: string - - beforeEach(() => { - tmpHome = mkdtempSync(join(tmpdir(), 'csp-clear-test-')) - base = join(tmpHome, '.csp') - }) - - afterEach(() => { - rmSync(tmpHome, { recursive: true, force: true }) - }) - - it('deletes the index root and counts the removed entries', () => { - const indexRoot = resolveIndexRoot({ baseDir: base }) - const { mkdirSync, writeFileSync: write } = require('node:fs') as typeof import('node:fs') - mkdirSync(join(indexRoot, 'key-a'), { recursive: true }) - mkdirSync(join(indexRoot, 'key-b'), { recursive: true }) - write(join(indexRoot, 'key-a', 'manifest.json'), '{}') - - const result = clearIndexCache({ baseDir: base }) - - expect(result.cleared).toBe(true) - expect(result.entries).toBe(2) - expect(result.path).toBe(indexRoot) - expect(existsSync(indexRoot)).toBe(false) - }) - - it('preserves savings.jsonl alongside the index root', () => { - const indexRoot = resolveIndexRoot({ baseDir: base }) - const { mkdirSync, writeFileSync: write } = require('node:fs') as typeof import('node:fs') - mkdirSync(join(indexRoot, 'key-a'), { recursive: true }) - const savings = join(base, 'savings.jsonl') - write(savings, '{"call":"search"}\n') - - clearIndexCache({ baseDir: base }) - - // Index gone, savings + home untouched. - expect(existsSync(indexRoot)).toBe(false) - expect(existsSync(savings)).toBe(true) - expect(existsSync(base)).toBe(true) - }) - - it('reports no index cache when the root does not exist', () => { - const result = clearIndexCache({ baseDir: base }) - expect(result.cleared).toBe(false) - expect(result.entries).toBe(0) - expect(result.path).toBe(resolveIndexRoot({ baseDir: base })) - }) - - it('refuses to delete a path that is not an index root (safety guard)', () => { - // A baseDir whose index root resolves to the home itself would be unsafe. - // Guard: the deletion target must end with the `index` segment. - const indexRoot = resolveIndexRoot({ baseDir: base }) - expect(indexRoot.endsWith(`${sep}index`)).toBe(true) - expect(indexRoot).not.toBe(base) - }) - - it('refuses to follow a symlinked index root to an outside target', () => { - const { mkdirSync, writeFileSync: write, symlinkSync } = require('node:fs') as typeof import('node:fs') - // A victim directory outside the cache tree whose content must survive. - const victim = join(tmpHome, 'victim') - mkdirSync(victim, { recursive: true }) - write(join(victim, 'precious.txt'), 'do not delete') - // Make `~/.csp/index` a symlink pointing at the victim. - mkdirSync(base, { recursive: true }) - symlinkSync(victim, resolveIndexRoot({ baseDir: base })) - - // The guard resolves the symlink: realpath's basename is `victim`, not - // `index`, so it refuses — rmSync never follows the link. - expect(() => clearIndexCache({ baseDir: base })).toThrow(/Refusing to clear unsafe/) - expect(existsSync(join(victim, 'precious.txt'))).toBe(true) - }) - - it('refuses a symlinked index resolving to another `index` dir outside home', () => { - const { mkdirSync, writeFileSync: write, symlinkSync } = require('node:fs') as typeof import('node:fs') - // A directory literally named `index` but OUTSIDE the cache home — the - // basename check alone would pass, so the direct-child (parent === home) - // check must catch it. - const outsideIndex = join(tmpHome, 'elsewhere', 'index') - mkdirSync(outsideIndex, { recursive: true }) - write(join(outsideIndex, 'precious.txt'), 'do not delete') - mkdirSync(base, { recursive: true }) - symlinkSync(outsideIndex, resolveIndexRoot({ baseDir: base })) - - expect(() => clearIndexCache({ baseDir: base })).toThrow(/Refusing to clear unsafe/) - expect(existsSync(join(outsideIndex, 'precious.txt'))).toBe(true) - }) -}) diff --git a/src/indexing/cache.ts b/src/indexing/cache.ts deleted file mode 100644 index 1b0bca2..0000000 --- a/src/indexing/cache.ts +++ /dev/null @@ -1,394 +0,0 @@ -// Global on-disk index cache location + content hashing (T009). -// -// The cache lives under `~/.csp/index//`, sharing the `~/.csp/` home that -// `stats.ts` already uses for `savings.jsonl`. This module covers the *pure* -// pieces of the caching model: -// - `resolveCacheDir` — deterministic cache directory for a (source, -// content, ref) triple. -// - `computeContentHash`— order-independent hash of a file set's contents. -// - `ensureCacheDir` — create the `~/.csp` → `~/.csp/index` → leaf chain -// with 0700 permissions (NFR-003), tightening any -// pre-existing directory. -// -// The auto build/reuse orchestration (`loadOrBuildIndex`) lands in T010 and -// composes these primitives. - -import type { ContentType } from '../types.ts' -import type { CspIndexFromGitOptions } from './index.ts' -import { createHash } from 'node:crypto' -import { chmodSync, existsSync, mkdirSync, readdirSync, realpathSync, rmSync } from 'node:fs' -import { readFile, stat } from 'node:fs/promises' -import { homedir } from 'node:os' -import { basename, dirname, join, normalize, relative } from 'node:path' -import { getExtensions } from '../languages.ts' -import { isGitUrl } from '../utils.ts' -import { MAX_FILE_BYTES } from './create.ts' -import { walkFiles } from './file-walker.ts' -import { CspIndex, DEFAULT_CONTENT, parseManifest } from './index.ts' - -/** Directory permissions for every cache directory (owner-only). NFR-003. */ -const CACHE_DIR_MODE = 0o700 - -/** Length of the hex cache key kept from the full sha256 digest. */ -const KEY_LENGTH = 32 - -/** - * Options shared by the cache helpers. `baseDir` overrides the `~/.csp` home, - * which keeps tests from touching the real user home — production callers omit - * it and get `homedir()/.csp`. - */ -export interface CacheLocationOptions { - /** Override for the `~/.csp` home directory (defaults to `homedir()/.csp`). */ - baseDir?: string - /** Git ref (branch/tag/SHA) participating in the cache key, for `fromGit`. */ - ref?: string -} - -/** A single file's identity for content hashing: relative path + raw content. */ -export interface CacheFile { - path: string - content: string | Uint8Array -} - -/** Resolve the `~/.csp` home, honoring an explicit `baseDir` override. */ -function cacheHome(options: CacheLocationOptions): string { - return options.baseDir ?? join(homedir(), '.csp') -} - -/** - * Resolve the cache directory for an indexed source. - * - * The key is a sha256 over the source identity, the (order-normalized) content - * selection, and the optional git ref — so the same inputs always map to the - * same directory, and a change in source / content / ref maps elsewhere. Local - * paths are normalized so equivalent spellings collapse to one key; git URLs - * are used verbatim (plus ref). - * - * @returns an absolute path of the form `/index/`. - */ -export function resolveCacheDir( - source: string, - content: readonly ContentType[], - options: CacheLocationOptions = {}, -): string { - const sourceId = normalizeSource(source) - // Sort content so selection ordering does not change the key. - const contentKey = [...content].map(String).sort() - const ref = options.ref ?? null - - const digest = createHash('sha256') - .update(JSON.stringify({ sourceId, content: contentKey, ref })) - .digest('hex') - .slice(0, KEY_LENGTH) - - return join(cacheHome(options), 'index', digest) -} - -/** - * Resolve the root directory that holds every cached index, i.e. the parent of - * all {@link resolveCacheDir} leaves. Returns `/index`, reusing the same - * `~/.csp` home (and `baseDir` override) as the rest of the cache helpers. - * - * This is the *only* directory `csp clear index` may remove — never the - * `~/.csp` home itself (which also holds `savings.jsonl`). - */ -export function resolveIndexRoot(options: CacheLocationOptions = {}): string { - return join(cacheHome(options), 'index') -} - -/** - * Compute a deterministic, order-independent content hash for a file set. - * - * Files are sorted by path, then each path and its content are folded into a - * single sha256 in order. Equivalent string / `Uint8Array` content hashes - * identically. The same file set in any order yields the same digest; a change - * to any path or byte yields a different one. - */ -export function computeContentHash(files: readonly CacheFile[]): string { - const sorted = [...files].sort((a, b) => (a.path < b.path ? -1 : a.path > b.path ? 1 : 0)) - const hash = createHash('sha256') - for (const file of sorted) { - // Length-prefix the path so path/content boundaries are unambiguous. - hash.update(`${file.path.length}:${file.path}`) - hash.update(toBytes(file.content)) - } - return hash.digest('hex') -} - -/** - * Ensure the cache directory chain exists with 0700 permissions. - * - * Creates every directory from the `~/.csp` home down to `dir` (a leaf returned - * by {@link resolveCacheDir}). A recursive `mkdir` only applies the mode to - * directories it newly creates, so any pre-existing directory in the chain is - * separately tightened with `chmod 0700` (NFR-003). - */ -export function ensureCacheDir(dir: string, options: CacheLocationOptions = {}): void { - mkdirSync(dir, { recursive: true, mode: CACHE_DIR_MODE }) - for (const segment of chainTo(dir, cacheHome(options))) { - chmodSync(segment, CACHE_DIR_MODE) - } -} - -/** Outcome of {@link clearIndexCache}: the targeted path, whether it was removed, and the entry count. */ -export interface ClearIndexResult { - /** The index root that was targeted (`/index`). */ - path: string - /** True when an existing index root was removed; false when none existed. */ - cleared: boolean - /** Number of top-level cache entries removed (0 when nothing existed). */ - entries: number -} - -/** - * Remove the cached-index root (`/index`) and report how many entries it - * held. **Safety-critical (AC-015):** this deletes *only* the `index` directory - * — never the `~/.csp` home or its `savings.jsonl`. The target is asserted to - * end with the `index` segment and to differ from the home before any removal, - * so a misconfigured `baseDir` cannot escalate into a home-wide rmtree. - * - * Returns `{ cleared: false, entries: 0 }` when no index root exists (not an - * error — the CLI reports it as "No index cache found"). - */ -export function clearIndexCache(options: CacheLocationOptions = {}): ClearIndexResult { - const home = cacheHome(options) - const indexRoot = resolveIndexRoot(options) - - if (!existsSync(indexRoot)) { - return { path: indexRoot, cleared: false, entries: 0 } - } - - // Resolve symlinks before the guard so a symlinked `index` (or home) cannot - // redirect the delete outside the cache tree: rmSync follows the link and - // would otherwise wipe the target's contents. realpath needs the path to - // exist, which the existsSync above guarantees for indexRoot. - const realIndexRoot = realpathSync(indexRoot) - const realHome = existsSync(home) ? realpathSync(home) : normalize(home) - - // Guard: the (resolved) deletion target must be the **direct** `index` child - // of the resolved home. Checking the parent (not just `basename === 'index'`) - // also rejects a symlinked `index` that resolves to some *other* `.../index` - // directory outside the cache home. If the invariant fails we delete nothing. - if (basename(realIndexRoot) !== 'index' || normalize(dirname(realIndexRoot)) !== normalize(realHome)) { - throw new Error(`Refusing to clear unsafe index path: ${realIndexRoot}`) - } - - let entries = 0 - try { - entries = readdirSync(realIndexRoot).length - } - catch { - entries = 0 - } - - rmSync(realIndexRoot, { recursive: true, force: true }) - return { path: indexRoot, cleared: true, entries } -} - -/** - * Directories from the `~/.csp` home down to `leaf` (inclusive), ordered - * home-first. When `leaf` is not under `home`, only `leaf` itself is returned - * so we never chmod paths outside the cache tree. - */ -function chainTo(leaf: string, home: string): string[] { - const normalizedHome = normalize(home) - const segments: string[] = [] - let current = normalize(leaf) - while (true) { - segments.push(current) - if (current === normalizedHome) { - break - } - const parent = dirname(current) - if (parent === current || !current.startsWith(normalizedHome)) { - break - } - current = parent - } - return segments.reverse() -} - -/** Normalize a source identity: local paths are path-normalized, URLs kept verbatim. */ -function normalizeSource(source: string): string { - if (/^[a-z][a-z0-9+.-]*:\/\//i.test(source) || source.startsWith('git@')) { - return source - } - return normalize(source) -} - -/** Coerce string / `Uint8Array` content to bytes for hashing. */ -function toBytes(content: string | Uint8Array): Uint8Array { - return typeof content === 'string' ? new TextEncoder().encode(content) : content -} - -/** Options for {@link loadOrBuildIndex}. */ -export interface LoadOrBuildOptions extends CacheLocationOptions { - /** Content selection to index (defaults to {@link DEFAULT_CONTENT}). */ - content?: readonly ContentType[] - /** Embedding model identifier forwarded to the build path. */ - modelPath?: string -} - -/** - * Collect the source files {@link CspIndex.fromPath} would index, as - * {@link CacheFile} entries (relative path + raw content), for content hashing. - * - * Uses the same walk + extension resolution as `createIndexFromPath`: the - * configured content selection drives `getExtensions`, `walkFiles` applies the - * `.gitignore`/`.cspignore` + default-ignore rules, and over-large files are - * skipped (matching the index's own `MAX_FILE_BYTES` cutoff). Paths are made - * relative to `root` so the hash is stable across machines / mount points. - */ -async function collectSourceFiles( - root: string, - content: readonly ContentType[], -): Promise { - const extensions = getExtensions(content.map(c => c as `${ContentType}`), undefined) - const files: CacheFile[] = [] - for await (const filePath of walkFiles(root, extensions)) { - let size: number - try { - size = (await stat(filePath)).size - } - catch { - continue - } - if (size > MAX_FILE_BYTES) { - continue - } - let raw: string - try { - raw = await readFile(filePath, 'utf8') - } - catch { - continue - } - files.push({ path: relative(root, filePath), content: raw }) - } - return files -} - -/** - * Load a cached index for `source` if one exists and is still valid, otherwise - * build it, persist it to the cache, and return it. - * - * Local paths: the live source file set is hashed ({@link computeContentHash}) - * and compared against the cached manifest's `contentHash`. A match means the - * cache is fresh → reuse via {@link CspIndex.loadFromDisk}. A mismatch (the - * source changed) invalidates the cache → rebuild and overwrite. The source - * hash is injected into {@link CspIndex.save} so the manifest records a value - * recomputed the same way on the next call. - * - * Git URLs (T009 STOP fallback): re-hashing a remote without a clone is not - * possible, and a temp checkout's metadata makes a content hash - * non-deterministic — so git sources are keyed by URL + ref alone - * ({@link resolveCacheDir}). An existing cache for that key is reused; otherwise - * the index is cloned, built, and saved (with the build-time content hash - * recorded for transparency, not validation). - */ -export async function loadOrBuildIndex( - source: string, - options: LoadOrBuildOptions = {}, -): Promise { - const content = options.content ?? DEFAULT_CONTENT - const { baseDir, ref, modelPath } = options - const isGit = isGitUrl(source) - - const locationOptions: CacheLocationOptions = {} - if (baseDir !== undefined) { - locationOptions.baseDir = baseDir - } - if (ref !== undefined) { - locationOptions.ref = ref - } - - const cacheDir = resolveCacheDir(source, content, locationOptions) - ensureCacheDir(cacheDir, baseDir !== undefined ? { baseDir } : {}) - - // The source-file hash is the cache-validity oracle for local paths; git - // sources have no cheap live hash, so their key alone gates reuse. - const sourceHash = isGit ? null : computeContentHash(await collectSourceFiles(source, content)) - - const cached = await tryReuse(cacheDir, isGit, sourceHash) - if (cached !== null) { - return cached - } - - const buildOptions: { ref?: string, modelPath?: string } = {} - if (ref !== undefined) { - buildOptions.ref = ref - } - if (modelPath !== undefined) { - buildOptions.modelPath = modelPath - } - - const index = await buildIndex(source, isGit, content, buildOptions) - await index.save(cacheDir, sourceHash !== null ? { contentHash: sourceHash } : {}) - return index -} - -/** - * Reuse a cached index when present and valid, else `null`. For git sources a - * present manifest is enough (URL+ref keyed); for local paths the manifest's - * `contentHash` must equal the live `sourceHash`. - */ -async function tryReuse( - cacheDir: string, - isGit: boolean, - sourceHash: string | null, -): Promise { - const manifestPath = join(cacheDir, 'manifest.json') - if (!existsSync(manifestPath)) { - return null - } - - // For local sources, compare the content hash *before* the expensive full - // load (chunks + bm25 + dense vectors + model). On a cache miss this skips - // loading an index we are about to discard. Git sources are URL+ref keyed, - // so a present manifest is sufficient. - if (!isGit) { - let manifest - try { - manifest = parseManifest(JSON.parse(await readFile(manifestPath, 'utf8'))) - } - catch { - // Corrupt/partial manifest — treat as a miss and rebuild. - return null - } - if (manifest.contentHash !== sourceHash) { - return null - } - } - - try { - return await CspIndex.loadFromDisk(cacheDir) - } - catch { - // Corrupt/partial cache entry — treat as a miss and rebuild. - return null - } -} - -/** Build a fresh index from a local path or git URL. */ -async function buildIndex( - source: string, - isGit: boolean, - content: readonly ContentType[], - options: { ref?: string, modelPath?: string }, -): Promise { - if (isGit) { - const gitOptions: CspIndexFromGitOptions = { content } - if (options.ref !== undefined) { - gitOptions.ref = options.ref - } - if (options.modelPath !== undefined) { - gitOptions.modelPath = options.modelPath - } - return CspIndex.fromGit(source, gitOptions) - } - const fromPathOptions: { content: readonly ContentType[], modelPath?: string } = { content } - if (options.modelPath !== undefined) { - fromPathOptions.modelPath = options.modelPath - } - return CspIndex.fromPath(source, fromPathOptions) -} diff --git a/src/indexing/create.test.ts b/src/indexing/create.test.ts deleted file mode 100644 index 2cc4d78..0000000 --- a/src/indexing/create.test.ts +++ /dev/null @@ -1,79 +0,0 @@ -// Tests for src/indexing/create.ts - -import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs' -import { tmpdir } from 'node:os' -import { join } from 'node:path' -import { afterEach, beforeEach, describe, expect, it } from 'bun:test' -import { ContentType } from '../types.ts' -import { createIndexFromPath } from './create.ts' -import { makeStubModel } from './dense.ts' - -describe('createIndexFromPath', () => { - let dir: string - - beforeEach(() => { - dir = mkdtempSync(join(tmpdir(), 'csp-create-')) - }) - afterEach(() => { - rmSync(dir, { recursive: true, force: true }) - }) - - it('builds chunks/bm25/semantic indexes for a small TS file', async () => { - const src = join(dir, 'sample.ts') - writeFileSync( - src, - 'export function greet(name: string) {\n return `hi ${name}`\n}\n', - ) - const model = makeStubModel(4) - const result = await createIndexFromPath(dir, { model, displayRoot: dir }) - expect(result.chunks.length).toBeGreaterThan(0) - // Path is stored relative to displayRoot. - expect(result.chunks[0]!.filePath).toBe('sample.ts') - expect(result.semanticIndex.vectors.length).toBe(result.chunks.length) - expect(result.bm25Index.documents.length).toBe(result.chunks.length) - }) - - it('throws when no supported files are found', async () => { - // Only an unsupported binary extension present. - writeFileSync(join(dir, 'data.bin'), 'binary') - const model = makeStubModel(4) - await expect(createIndexFromPath(dir, { model })).rejects.toThrow( - /No supported files found/, - ) - }) - - it('respects an explicit extensions override', async () => { - writeFileSync(join(dir, 'a.txt'), 'hello world') - const model = makeStubModel(4) - const result = await createIndexFromPath(dir, { - model, - extensions: ['.txt'], - content: ContentType.DOCS, - displayRoot: dir, - }) - expect(result.chunks.length).toBe(1) - expect(result.chunks[0]!.filePath).toBe('a.txt') - }) - - it('skips files larger than MAX_FILE_BYTES', async () => { - // Write 2 MB of code-like content; should be skipped. - const big = 'a'.repeat(2_000_000) - writeFileSync(join(dir, 'big.ts'), big) - writeFileSync(join(dir, 'small.ts'), 'export const x = 1\n') - const model = makeStubModel(4) - const result = await createIndexFromPath(dir, { model, displayRoot: dir }) - const paths = result.chunks.map(c => c.filePath) - expect(paths).toContain('small.ts') - expect(paths).not.toContain('big.ts') - }) - - it('descends into subdirectories', async () => { - const sub = join(dir, 'sub') - mkdirSync(sub) - writeFileSync(join(sub, 'nested.ts'), 'const a = 1\n') - const model = makeStubModel(4) - const result = await createIndexFromPath(dir, { model, displayRoot: dir }) - const paths = result.chunks.map(c => c.filePath) - expect(paths.some(p => p.endsWith('nested.ts'))).toBe(true) - }) -}) diff --git a/src/indexing/create.ts b/src/indexing/create.ts deleted file mode 100644 index 446e75c..0000000 --- a/src/indexing/create.ts +++ /dev/null @@ -1,94 +0,0 @@ -// Port of src/semble/index/create.py - -import type { Chunk } from '../types.ts' -import type { Model } from './dense.ts' -import { readFileSync, statSync } from 'node:fs' -import { relative } from 'node:path' -import { chunkSource } from '../chunking/chunk-source.ts' -import { detectLanguage, getExtensions } from '../languages.ts' -import { tokenize } from '../tokens.ts' -import { ContentType } from '../types.ts' -import { embedChunks, SelectableBasicBackend } from './dense.ts' -import { walkFiles } from './file-walker.ts' -import { Bm25Index, enrichForBm25 } from './sparse.ts' - -/** 1 MB max file size to read and index. */ -export const MAX_FILE_BYTES = 1_000_000 - -export interface CreateIndexOptions { - model: Model - extensions?: readonly string[] - content?: ContentType | readonly ContentType[] - displayRoot?: string -} - -export interface CreateIndexResult { - bm25Index: Bm25Index - semanticIndex: SelectableBasicBackend - chunks: Chunk[] -} - -/** - * Create an index from a resolved directory. - * - * Walks files matching `extensions`, chunks them, enriches text for BM25, - * tokenizes it, embeds chunks, and returns the populated indexes. - * - * @throws if no chunks are produced. - */ -export async function createIndexFromPath( - path: string, - options: CreateIndexOptions, -): Promise { - const { model, extensions, content, displayRoot } = options - - const normalized: readonly ContentType[] = normalizeContent(content) - const resolvedExtensions = getExtensions(normalized, extensions) - - const chunks: Chunk[] = [] - for await (const filePath of walkFiles(path, resolvedExtensions)) { - const language = detectLanguage(filePath) - let size: number - try { - size = statSync(filePath).size - } - catch { - continue - } - if (size > MAX_FILE_BYTES) { - continue - } - let source: string - try { - source = readFileSync(filePath, 'utf8') - } - catch { - continue - } - const chunkPath = displayRoot !== undefined ? relative(displayRoot, filePath) : filePath - chunks.push(...(await chunkSource(source, chunkPath, language ?? null))) - } - - if (chunks.length === 0) { - throw new Error(`No supported files found under ${path}.`) - } - - const embeddings = embedChunks(model, chunks) - const bm25Index = Bm25Index.build(chunks.map(c => tokenize(enrichForBm25(c)))) - const semanticIndex = new SelectableBasicBackend(embeddings) - - return { bm25Index, semanticIndex, chunks } -} - -function normalizeContent( - content: ContentType | readonly ContentType[] | undefined, -): readonly ContentType[] { - if (content === undefined) { - // Default: code-only. Mirrors _DEFAULT_CONTENT in semble. - return [ContentType.CODE] - } - if (Array.isArray(content)) { - return content as readonly ContentType[] - } - return [content as ContentType] -} diff --git a/src/indexing/dense.test.ts b/src/indexing/dense.test.ts deleted file mode 100644 index 3b2c9be..0000000 --- a/src/indexing/dense.test.ts +++ /dev/null @@ -1,222 +0,0 @@ -// Port of src/semble/index/dense.py — unit tests - -import type { Chunk } from './dense' -import { Buffer } from 'node:buffer' -import { mkdtemp, rm, writeFile } from 'node:fs/promises' -import { tmpdir } from 'node:os' -import { join } from 'node:path' -import { afterEach, beforeEach, describe, expect, it } from 'bun:test' -import { - - DEFAULT_MODEL_NAME, - embedChunks, - loadModel, - SelectableBasicBackend, -} from './dense' - -function chunk(content: string): Chunk { - return { - content, - filePath: 'a.ts', - startLine: 1, - endLine: 1, - language: 'typescript', - } -} - -describe('loadModel', () => { - it('resolves with a Model exposing a positive dim', async () => { - const { model, modelPath } = await loadModel() - expect(modelPath).toBe(DEFAULT_MODEL_NAME) - expect(model.dim).toBeGreaterThan(0) - }) - - it('caches models by path', async () => { - const a = await loadModel('test/path-A') - const b = await loadModel('test/path-A') - expect(a.model).toBe(b.model) - }) - - it('returns distinct entries for different paths', async () => { - const a = await loadModel('test/path-X') - const b = await loadModel('test/path-Y') - expect(a.modelPath).toBe('test/path-X') - expect(b.modelPath).toBe('test/path-Y') - }) -}) - -describe('embedChunks', () => { - it('returns [] for an empty input', async () => { - const { model } = await loadModel() - expect(embedChunks(model, [])).toEqual([]) - }) - - it('returns one vector per chunk with model.dim length', async () => { - const { model } = await loadModel() - const vectors = embedChunks(model, [chunk('hello'), chunk('world')]) - expect(vectors).toHaveLength(2) - for (const v of vectors) { - expect(v).toBeInstanceOf(Float32Array) - expect(v.length).toBe(model.dim) - } - }) - - it('is deterministic: same content → same vector', async () => { - const { model } = await loadModel() - const [v1] = embedChunks(model, [chunk('def search()')]) - const [v2] = embedChunks(model, [chunk('def search()')]) - expect(v1).toBeDefined() - expect(v2).toBeDefined() - expect(Array.from(v1!)).toEqual(Array.from(v2!)) - }) - - it('produces different vectors for different content', async () => { - const { model } = await loadModel() - const [v1, v2] = embedChunks(model, [chunk('foo'), chunk('bar')]) - expect(v1).toBeDefined() - expect(v2).toBeDefined() - expect(Array.from(v1!)).not.toEqual(Array.from(v2!)) - }) -}) - -describe('SelectableBasicBackend.query', () => { - it('throws when k < 1', async () => { - const { model } = await loadModel() - const vectors = embedChunks(model, [chunk('a'), chunk('b')]) - const backend = new SelectableBasicBackend(vectors) - expect(() => backend.query([vectors[0]!], 0)).toThrow() - }) - - it('throws when constructed with inconsistent vector dimensions', async () => { - const { model } = await loadModel() - const [v0] = embedChunks(model, [chunk('a')]) - const truncated = new Float32Array(v0!.length - 1) - expect(() => new SelectableBasicBackend([v0!, truncated])).toThrow( - /Inconsistent vector dimensions/, - ) - }) - - it('throws when a query vector dimension differs from the index dim', async () => { - const { model } = await loadModel() - const vectors = embedChunks(model, [chunk('a'), chunk('b')]) - const backend = new SelectableBasicBackend(vectors) - const bad = new Float32Array(backend.dim - 1) - expect(() => backend.query([bad], 1)).toThrow(/Query vector dimension mismatch/) - }) - - it('throws when a selector index is out of bounds', async () => { - const { model } = await loadModel() - const vectors = embedChunks(model, [chunk('a'), chunk('b')]) - const backend = new SelectableBasicBackend(vectors) - const selector = new Uint32Array([0, 5]) - expect(() => backend.query([vectors[0]!], 1, selector)).toThrow( - /Selector index out of bounds/, - ) - }) - - it('returns top-k (index, distance) pairs sorted by distance', async () => { - const { model } = await loadModel() - const vectors = embedChunks(model, [chunk('a'), chunk('b'), chunk('c'), chunk('d')]) - const backend = new SelectableBasicBackend(vectors) - - const results = backend.query([vectors[0]!], 3) - expect(results).toHaveLength(1) - const hits = results[0]! - expect(hits).toHaveLength(3) - // Self should be the nearest with ~0 distance. - expect(hits[0]![0]).toBe(0) - expect(hits[0]![1]).toBeCloseTo(0, 5) - // Distances must be monotonically non-decreasing. - for (let i = 1; i < hits.length; i++) { - expect(hits[i]![1]).toBeGreaterThanOrEqual(hits[i - 1]![1]) - } - }) - - it('only returns indices from the selector pool', async () => { - const { model } = await loadModel() - const vectors = embedChunks(model, [chunk('a'), chunk('b'), chunk('c'), chunk('d')]) - const backend = new SelectableBasicBackend(vectors) - - const selector = new Uint32Array([1, 2]) - const results = backend.query([vectors[0]!], 5, selector) - expect(results).toHaveLength(1) - const hits = results[0]! - // effective_k = min(5, 4, 2) = 2. - expect(hits).toHaveLength(2) - const indices = hits.map(h => h[0]) - for (const i of indices) { - expect([1, 2]).toContain(i) - } - }) - - it('handles multiple query vectors', async () => { - const { model } = await loadModel() - const vectors = embedChunks(model, [chunk('a'), chunk('b'), chunk('c')]) - const backend = new SelectableBasicBackend(vectors) - - const results = backend.query([vectors[0]!, vectors[1]!], 2) - expect(results).toHaveLength(2) - expect(results[0]![0]![0]).toBe(0) - expect(results[1]![0]![0]).toBe(1) - }) - - it('caps effective_k at the number of stored vectors', async () => { - const { model } = await loadModel() - const vectors = embedChunks(model, [chunk('a'), chunk('b')]) - const backend = new SelectableBasicBackend(vectors) - const results = backend.query([vectors[0]!], 10) - expect(results[0]!).toHaveLength(2) - }) -}) - -describe('SelectableBasicBackend save/load', () => { - let dir: string - beforeEach(async () => { - dir = await mkdtemp(join(tmpdir(), 'csp-dense-')) - }) - afterEach(async () => { - await rm(dir, { recursive: true, force: true }) - }) - - it('roundtrip preserves vectors and query results', async () => { - const { model } = await loadModel() - const vectors = embedChunks(model, [chunk('alpha'), chunk('beta'), chunk('gamma')]) - const original = new SelectableBasicBackend(vectors) - await original.save(dir) - - const loaded = await SelectableBasicBackend.load(dir) - expect(loaded.vectors).toHaveLength(original.vectors.length) - expect(loaded.dim).toBe(original.dim) - - for (let i = 0; i < original.vectors.length; i++) { - const a = original.vectors[i]! - const b = loaded.vectors[i]! - expect(b.length).toBe(a.length) - for (let j = 0; j < a.length; j++) { - expect(b[j]!).toBeCloseTo(a[j]!, 6) - } - } - - const origResults = original.query([vectors[0]!], 2) - const loadedResults = loaded.query([vectors[0]!], 2) - expect(loadedResults[0]!.map(h => h[0])).toEqual(origResults[0]!.map(h => h[0])) - }) - - it('rejects a truncated vectors.bin during load', async () => { - const { model } = await loadModel() - const vectors = embedChunks(model, [chunk('alpha'), chunk('beta')]) - const original = new SelectableBasicBackend(vectors) - await original.save(dir) - - // Truncate vectors.bin to half its expected size. - const truncated = new Float32Array(original.dim) // one row instead of two - await writeFile( - join(dir, 'vectors.bin'), - Buffer.from(truncated.buffer, truncated.byteOffset, truncated.byteLength), - ) - - await expect(SelectableBasicBackend.load(dir)).rejects.toThrow( - /Vector file size mismatch/, - ) - }) -}) diff --git a/src/indexing/dense.ts b/src/indexing/dense.ts deleted file mode 100644 index 829b02f..0000000 --- a/src/indexing/dense.ts +++ /dev/null @@ -1,316 +0,0 @@ -// Port of src/semble/index/dense.py -// -// Loads a Model2Vec model, embeds chunks, and provides a vector -// backend with cosine distance + optional index-selector filtering. -// -// NOTE: This unit ships a STUB Model2Vec implementation. `loadModel` and -// `embedChunks` do not download or run a real Model2Vec model. Instead -// they produce deterministic, hash-seeded float vectors so that the API -// contract is exercised by tests without requiring network I/O. -// TODO(dense): integrate real Model2Vec model loading. - -import type { Chunk } from '../types.ts' -import { Buffer } from 'node:buffer' -import { mkdir, readFile, writeFile } from 'node:fs/promises' -import { join } from 'node:path' - -// Re-exported so existing importers (e.g. dense.test.ts) keep resolving -// `Chunk` from this module after the type was unified into ../types.ts. -export type { Chunk } - -/** - * Default Model2Vec model name (kept identical to semble for parity). - */ -export const DEFAULT_MODEL_NAME = 'minishlab/potion-code-16M' - -/** - * Default embedding dimension for the stub model. The real - * `potion-code-16M` model emits 256-dim vectors, but the stub is - * dimension-agnostic — pick something small enough for fast tests. - */ -const _DEFAULT_STUB_DIM = 256 - -/** - * Loaded Model2Vec model. The real model exposes `.encode(texts)`; - * the stub provides the same shape plus a `dim` accessor. - */ -export interface Model { - readonly dim: number - encode: (texts: string[]) => Float32Array[] -} - -const _MODEL_CACHE = new Map() - -/** - * Deterministic 32-bit hash (FNV-1a) for stub seeding. - */ -function fnv1a(s: string): number { - let h = 0x811C9DC5 - for (let i = 0; i < s.length; i++) { - h ^= s.charCodeAt(i) - h = Math.imul(h, 0x01000193) >>> 0 - } - return h >>> 0 -} - -/** - * Mulberry32 PRNG — fast, deterministic, good enough for stub vectors. - */ -function mulberry32(seed: number): () => number { - let a = seed >>> 0 - return () => { - a = (a + 0x6D2B79F5) >>> 0 - let t = a - t = Math.imul(t ^ (t >>> 15), t | 1) - t ^= t + Math.imul(t ^ (t >>> 7), t | 61) - return ((t ^ (t >>> 14)) >>> 0) / 4294967296 - } -} - -/** - * Build a deterministic unit-length vector from a string. Identical - * input strings always produce identical vectors, satisfying the - * "embedding is a pure function of content" contract. - */ -function stubEmbed(text: string, dim: number): Float32Array { - const rng = mulberry32(fnv1a(text)) - const v = new Float32Array(dim) - let norm = 0 - for (let i = 0; i < dim; i++) { - // Box-Muller-ish: cheap normal-ish distribution out of two uniforms. - const u1 = Math.max(rng(), 1e-12) - const u2 = rng() - const g = Math.sqrt(-2 * Math.log(u1)) * Math.cos(2 * Math.PI * u2) - v[i] = g - norm += g * g - } - norm = Math.sqrt(norm) || 1 - for (let i = 0; i < dim; i++) { - v[i] = v[i]! / norm - } - return v -} - -export function makeStubModel(dim: number): Model { - return { - dim, - encode(texts: string[]): Float32Array[] { - return texts.map(t => stubEmbed(t, dim)) - }, - } -} - -/** - * Load (and cache) a Model2Vec model. Always async, mirroring the - * eventual real implementation that performs an HF download. - * - * @param modelPath Optional model id; defaults to {@link DEFAULT_MODEL_NAME}. - */ -export async function loadModel( - modelPath?: string, -): Promise<{ model: Model, modelPath: string }> { - const resolved = modelPath ?? DEFAULT_MODEL_NAME - let model = _MODEL_CACHE.get(resolved) - if (!model) { - // TODO(dense): replace with real Model2Vec download + inference. - model = makeStubModel(_DEFAULT_STUB_DIM) - _MODEL_CACHE.set(resolved, model) - } - return Promise.resolve({ model, modelPath: resolved }) -} - -/** - * Embed chunks using the configured model. Returns one row per chunk; - * the empty list maps to an empty result (matching semble). - */ -export function embedChunks(model: Model, chunks: Chunk[]): Float32Array[] { - if (chunks.length === 0) { - return [] - } - return model.encode(chunks.map(c => c.content)) -} - -// --------------------------------------------------------------------------- -// SelectableBasicBackend -// --------------------------------------------------------------------------- - -export interface BasicArgs { - /** Distance metric — for parity we only support cosine. */ - metric?: 'cosine' -} - -/** - * Pre-normalise a vector in place (L2). Zero vectors stay zero. - */ -function normalizeInPlace(v: Float32Array): void { - let n = 0 - for (let i = 0; i < v.length; i++) { - n += v[i]! * v[i]! - } - n = Math.sqrt(n) - if (n === 0) { - return - } - for (let i = 0; i < v.length; i++) { - v[i] = v[i]! / n - } -} - -function dot(a: Float32Array, b: Float32Array): number { - let s = 0 - const n = a.length - for (let i = 0; i < n; i++) { - s += a[i]! * b[i]! - } - return s -} - -/** - * In-memory vector backend with cosine distance and optional - * candidate-selector filtering — TS port of - * `SelectableBasicBackend(CosineBasicBackend)` from semble. - */ -export class SelectableBasicBackend { - /** Pre-normalised row vectors. */ - readonly vectors: Float32Array[] - readonly arguments: BasicArgs - readonly dim: number - - constructor(vectors: Float32Array[], options: BasicArgs = {}) { - this.arguments = { metric: 'cosine', ...options } - this.dim = vectors[0]?.length ?? 0 - // Defensive copy + normalise so cosine distance reduces to (1 - dot). - this.vectors = vectors.map((v) => { - if (v.length !== this.dim) { - throw new Error( - `Inconsistent vector dimensions: expected ${this.dim}, got ${v.length}`, - ) - } - const copy = new Float32Array(v) - normalizeInPlace(copy) - return copy - }) - } - - /** - * Batched k-NN query. - * - * @param queryVectors One row per query (raw — will be normalised here). - * @param k Number of neighbours per query. - * @param selector Optional pool of candidate indices; results are - * guaranteed to come from this set. - * @returns For each query, an array of `[chunkIndex, cosineDistance]` - * sorted by ascending distance. - * @throws Error if `k < 1`. - */ - query( - queryVectors: Float32Array[], - k: number, - selector?: Uint32Array, - ): Array> { - if (k < 1) { - throw new Error(`k should be >= 1, is now ${k}`) - } - - const numVectors = this.vectors.length - let effectiveK = Math.min(k, numVectors) - if (selector !== undefined) { - // Bounds-check selector indices up front so we fail fast with a - // descriptive error instead of crashing during the dot-product loop. - for (let i = 0; i < selector.length; i++) { - const idx = selector[i]! - if (idx >= numVectors) { - throw new Error( - `Selector index out of bounds: ${idx} (total vectors: ${numVectors})`, - ) - } - } - effectiveK = Math.min(effectiveK, selector.length) - } - - const out: Array> = [] - if (effectiveK === 0) { - for (let i = 0; i < queryVectors.length; i++) { - out.push([]) - } - return out - } - - for (const raw of queryVectors) { - if (raw.length !== this.dim) { - throw new Error( - `Query vector dimension mismatch: expected ${this.dim}, got ${raw.length}`, - ) - } - const q = new Float32Array(raw) - normalizeInPlace(q) - - const candidatePool = selector ?? null - const poolSize = candidatePool ? candidatePool.length : numVectors - const distances = new Float64Array(poolSize) - for (let i = 0; i < poolSize; i++) { - const vecIdx = candidatePool ? candidatePool[i]! : i - const target = this.vectors[vecIdx]! - distances[i] = 1 - dot(q, target) - } - - // Build [poolIdx, dist] pairs and partial-sort by distance. - const pairs: Array<[number, number]> = Array.from( - { length: poolSize }, - (_, i) => [i, distances[i]!], - ) - pairs.sort((a, b) => a[1] - b[1]) - const top = pairs.slice(0, effectiveK) - - // Map pool-relative indices back to absolute chunk indices. - const mapped: Array<[number, number]> = top.map(([poolIdx, dist]) => [ - candidatePool ? candidatePool[poolIdx]! : poolIdx, - dist, - ]) - out.push(mapped) - } - - return out - } - - /** - * Persist vectors + args to `/vectors.bin` and `/args.json`. - * Format is local to csp — vicinity's own format is not preserved. - */ - async save(dir: string): Promise { - await mkdir(dir, { recursive: true }) - const rows = this.vectors.length - const dim = this.dim - const buf = new Float32Array(rows * dim) - for (let r = 0; r < rows; r++) { - buf.set(this.vectors[r]!, r * dim) - } - const meta = { rows, dim, arguments: this.arguments } - await writeFile(join(dir, 'vectors.bin'), Buffer.from(buf.buffer, buf.byteOffset, buf.byteLength)) - await writeFile(join(dir, 'args.json'), JSON.stringify(meta)) - } - - /** - * Inverse of {@link SelectableBasicBackend.save}. - */ - static async load(dir: string): Promise { - const metaRaw = await readFile(join(dir, 'args.json'), 'utf8') - const meta = JSON.parse(metaRaw) as { rows: number, dim: number, arguments: BasicArgs } - const bytes = await readFile(join(dir, 'vectors.bin')) - const expectedBytes = meta.rows * meta.dim * 4 - if (bytes.byteLength !== expectedBytes) { - throw new Error( - `Vector file size mismatch: expected ${expectedBytes} bytes, got ${bytes.byteLength}`, - ) - } - // Copy into a fresh ArrayBuffer so alignment is guaranteed. - const ab = new ArrayBuffer(bytes.byteLength) - new Uint8Array(ab).set(bytes) - const flat = new Float32Array(ab) - const vectors: Float32Array[] = [] - for (let r = 0; r < meta.rows; r++) { - vectors.push(flat.slice(r * meta.dim, (r + 1) * meta.dim)) - } - return new SelectableBasicBackend(vectors, meta.arguments) - } -} diff --git a/src/indexing/file-walker.test.ts b/src/indexing/file-walker.test.ts deleted file mode 100644 index 49d92e2..0000000 --- a/src/indexing/file-walker.test.ts +++ /dev/null @@ -1,271 +0,0 @@ -import { mkdirSync, mkdtempSync, rmSync, symlinkSync, writeFileSync } from 'node:fs' -import os from 'node:os' -import path from 'node:path' -// Port of src/semble/index/file_walker.py — tests -import { afterEach, beforeEach, describe, expect, test } from 'bun:test' - -import { - _isIgnored, - _loadIgnoreForDir, - DEFAULT_IGNORED_DIRS, - walkFiles, -} from './file-walker.ts' - -let ignoreAvailable = true -try { - await import('ignore') -} -catch { - ignoreAvailable = false -} - -const describeWithIgnore = ignoreAvailable ? describe : describe.skip - -async function collect(iter: AsyncIterable): Promise { - const out: string[] = [] - for await (const item of iter) { - out.push(item) - } - return out -} - -describe('DEFAULT_IGNORED_DIRS', () => { - test('contains the csp cache dir instead of the semble one', () => { - expect(DEFAULT_IGNORED_DIRS.has('.csp/')).toBe(true) - expect(DEFAULT_IGNORED_DIRS.has('.semble/')).toBe(false) - }) - - test('contains canonical noisy directories', () => { - for (const d of ['.git/', 'node_modules/', 'dist/', 'build/', '.next/', '__pycache__/']) { - expect(DEFAULT_IGNORED_DIRS.has(d)).toBe(true) - } - }) -}) - -describeWithIgnore('walkFiles', () => { - let root: string - - beforeEach(() => { - root = mkdtempSync(path.join(os.tmpdir(), 'csp-walker-')) - }) - - afterEach(() => { - rmSync(root, { recursive: true, force: true }) - }) - - test('yields all .ts files under root recursively', async () => { - writeFileSync(path.join(root, 'a.ts'), 'a') - mkdirSync(path.join(root, 'sub')) - writeFileSync(path.join(root, 'sub', 'b.ts'), 'b') - writeFileSync(path.join(root, 'sub', 'c.md'), 'c') - mkdirSync(path.join(root, 'sub', 'nested')) - writeFileSync(path.join(root, 'sub', 'nested', 'd.ts'), 'd') - - const results = await collect(walkFiles(root, ['.ts'])) - const relative = results.map(p => path.relative(root, p)).sort() - expect(relative).toEqual(['a.ts', path.join('sub', 'b.ts'), path.join('sub', 'nested', 'd.ts')]) - }) - - test('skips symlinks', async () => { - writeFileSync(path.join(root, 'real.ts'), 'real') - try { - symlinkSync(path.join(root, 'real.ts'), path.join(root, 'link.ts')) - } - catch { - // Some sandboxes disallow symlinks — bail rather than fail. - return - } - const results = await collect(walkFiles(root, ['.ts'])) - const relative = results.map(p => path.relative(root, p)).sort() - expect(relative).toEqual(['real.ts']) - }) - - test('always ignores .git/ and node_modules/', async () => { - writeFileSync(path.join(root, 'keep.ts'), 'k') - mkdirSync(path.join(root, '.git')) - writeFileSync(path.join(root, '.git', 'hidden.ts'), 'h') - mkdirSync(path.join(root, 'node_modules')) - writeFileSync(path.join(root, 'node_modules', 'pkg.ts'), 'p') - - const results = await collect(walkFiles(root, ['.ts'])) - const relative = results.map(p => path.relative(root, p)).sort() - expect(relative).toEqual(['keep.ts']) - }) - - test('.gitignore excludes matching files', async () => { - writeFileSync(path.join(root, '.gitignore'), '*.log\n') - writeFileSync(path.join(root, 'foo.log'), 'foo') - writeFileSync(path.join(root, 'bar.txt'), 'bar') - - const results = await collect(walkFiles(root, ['.log', '.txt'])) - const relative = results.map(p => path.relative(root, p)).sort() - expect(relative).toEqual(['bar.txt']) - }) - - test('.gitignore negation-with-extension bypasses extension filter (found)', async () => { - // `*.log` ignores everything ending in .log; `!special.log` un-ignores - // special.log AND should be yielded even though `.log` is not in the - // extension allowlist below. - writeFileSync(path.join(root, '.gitignore'), '*.log\n!special.log\n') - writeFileSync(path.join(root, 'foo.log'), 'foo') - writeFileSync(path.join(root, 'special.log'), 'special') - writeFileSync(path.join(root, 'keep.ts'), 'k') - - const results = await collect(walkFiles(root, ['.ts'])) - const relative = results.map(p => path.relative(root, p)).sort() - expect(relative).toEqual(['keep.ts', 'special.log']) - }) - - test('.cspignore is honoured in addition to .gitignore', async () => { - writeFileSync(path.join(root, '.gitignore'), 'gitignored.ts\n') - writeFileSync(path.join(root, '.cspignore'), 'cspignored.ts\n') - writeFileSync(path.join(root, 'keep.ts'), 'k') - writeFileSync(path.join(root, 'gitignored.ts'), 'g') - writeFileSync(path.join(root, 'cspignored.ts'), 'c') - - const results = await collect(walkFiles(root, ['.ts'])) - const relative = results.map(p => path.relative(root, p)).sort() - expect(relative).toEqual(['keep.ts']) - }) - - test('respects nested .gitignore from subdirectories', async () => { - writeFileSync(path.join(root, 'top.ts'), 't') - mkdirSync(path.join(root, 'sub')) - writeFileSync(path.join(root, 'sub', '.gitignore'), 'skip.ts\n') - writeFileSync(path.join(root, 'sub', 'skip.ts'), 's') - writeFileSync(path.join(root, 'sub', 'keep.ts'), 'k') - - const results = await collect(walkFiles(root, ['.ts'])) - const relative = results.map(p => path.relative(root, p)).sort() - expect(relative).toEqual([path.join('sub', 'keep.ts'), 'top.ts']) - }) - - test('honours the extra `ignore` arg', async () => { - writeFileSync(path.join(root, 'foo.ts'), 'f') - writeFileSync(path.join(root, 'bar.ts'), 'b') - - const results = await collect(walkFiles(root, ['.ts'], ['foo.ts'])) - const relative = results.map(p => path.relative(root, p)).sort() - expect(relative).toEqual(['bar.ts']) - }) - - test('filters by extension (case-insensitive)', async () => { - writeFileSync(path.join(root, 'a.TS'), 'a') - writeFileSync(path.join(root, 'b.ts'), 'b') - writeFileSync(path.join(root, 'c.md'), 'c') - - const results = await collect(walkFiles(root, ['.ts'])) - const relative = results.map(p => path.relative(root, p)).sort() - expect(relative).toEqual(['a.TS', 'b.ts']) - }) -}) - -describeWithIgnore('_loadIgnoreForDir', () => { - let root: string - - beforeEach(() => { - root = mkdtempSync(path.join(os.tmpdir(), 'csp-walker-load-')) - }) - - afterEach(() => { - rmSync(root, { recursive: true, force: true }) - }) - - test('returns null when neither ignore file exists', async () => { - const spec = await _loadIgnoreForDir(root) - expect(spec).toBeNull() - }) - - test('combines .gitignore and .cspignore lines', async () => { - writeFileSync(path.join(root, '.gitignore'), 'a.ts\n') - writeFileSync(path.join(root, '.cspignore'), 'b.ts\n') - const spec = await _loadIgnoreForDir(root) - expect(spec).not.toBeNull() - expect(spec!.patterns.length).toBe(2) - expect(spec!.patterns.map(p => p.pattern)).toEqual(['a.ts', 'b.ts']) - }) - - test('skips blank lines and comments', async () => { - writeFileSync(path.join(root, '.gitignore'), '# comment\n\n*.log\n') - const spec = await _loadIgnoreForDir(root) - expect(spec).not.toBeNull() - expect(spec!.patterns.length).toBe(1) - expect(spec!.patterns[0]!.pattern).toBe('*.log') - }) -}) - -describeWithIgnore('_isIgnored', () => { - let root: string - - beforeEach(() => { - root = mkdtempSync(path.join(os.tmpdir(), 'csp-walker-isig-')) - }) - - afterEach(() => { - rmSync(root, { recursive: true, force: true }) - }) - - test('returns found=true for negation patterns with file extensions', async () => { - writeFileSync(path.join(root, '.gitignore'), '*.log\n!special.log\n') - const spec = await _loadIgnoreForDir(root) - expect(spec).not.toBeNull() - const check = _isIgnored(path.join(root, 'special.log'), false, [spec!]) - expect(check.ignored).toBe(false) - expect(check.found).toBe(true) - }) - - test('returns found=false for negation patterns without file extensions', async () => { - writeFileSync(path.join(root, '.gitignore'), 'vendor/\n!vendor/keep/\n') - const spec = await _loadIgnoreForDir(root) - expect(spec).not.toBeNull() - // The negation pattern `!vendor/keep/` has no extension — should NOT set found. - const check = _isIgnored(path.join(root, 'vendor', 'keep'), true, [spec!]) - expect(check.found).toBe(false) - }) - - test('returns ignored=true when pattern matches', async () => { - writeFileSync(path.join(root, '.gitignore'), '*.log\n') - const spec = await _loadIgnoreForDir(root) - expect(spec).not.toBeNull() - const check = _isIgnored(path.join(root, 'foo.log'), false, [spec!]) - expect(check.ignored).toBe(true) - }) - - test('hasNegatedExtPattern is true when a negation pattern has an extension suffix', async () => { - writeFileSync(path.join(root, '.gitignore'), '*.log\n!special.log\n') - const spec = await _loadIgnoreForDir(root) - expect(spec).not.toBeNull() - expect(spec!.hasNegatedExtPattern).toBe(true) - }) - - test('hasNegatedExtPattern is false when negation patterns have no extension suffix', async () => { - writeFileSync(path.join(root, '.gitignore'), 'vendor/\n!vendor/keep/\n') - const spec = await _loadIgnoreForDir(root) - expect(spec).not.toBeNull() - expect(spec!.hasNegatedExtPattern).toBe(false) - }) - - test('hasNegatedExtPattern is false when there are no negation patterns', async () => { - writeFileSync(path.join(root, '.gitignore'), '*.log\n*.tmp\n') - const spec = await _loadIgnoreForDir(root) - expect(spec).not.toBeNull() - expect(spec!.hasNegatedExtPattern).toBe(false) - }) - - test('preserves outer ignored state across specs when current spec has no match', async () => { - // Outer spec ignores foo.log; inner spec has unrelated rules. - writeFileSync(path.join(root, '.gitignore'), '*.log\n') - const outerSpec = await _loadIgnoreForDir(root) - expect(outerSpec).not.toBeNull() - - const sub = path.join(root, 'sub') - mkdirSync(sub) - writeFileSync(path.join(sub, '.gitignore'), '*.tmp\n') - const innerSpec = await _loadIgnoreForDir(sub) - expect(innerSpec).not.toBeNull() - - // foo.log lives under sub/, matches outer's *.log, doesn't match inner. - const check = _isIgnored(path.join(sub, 'foo.log'), false, [outerSpec!, innerSpec!]) - expect(check.ignored).toBe(true) - }) -}) diff --git a/src/indexing/file-walker.ts b/src/indexing/file-walker.ts deleted file mode 100644 index dcd575e..0000000 --- a/src/indexing/file-walker.ts +++ /dev/null @@ -1,333 +0,0 @@ -// Port of src/semble/index/file_walker.py -import { promises as fs } from 'node:fs' -import path from 'node:path' - -// The `ignore` package provides gitignore-style pattern matching. -// We use it as a fast matcher, but we also keep a parallel list of -// `{ pattern, negated, hasExtSuffix }` entries to recreate the -// Python negation-with-extension bypass logic that the npm package -// does not expose. -// -// TODO(integration): use 'ignore' package once Unit 0 lands. Until then, -// the package is referenced via dynamic import below so the rest of the -// surface compiles even when the dep is missing from the lockfile. -// The `ignore` package is published as CommonJS with `export = ignore`, so -// `typeof import('ignore')` is the factory function itself (not a module object -// with a `.default`). Treat the imported type as the callable factory. -type IgnoreFactory = typeof import('ignore') -type IgnoreInstance = ReturnType - -interface ParsedPattern { - /** Pattern string as written in the gitignore file, without the leading "!" if any. */ - pattern: string - /** True when the original line started with "!" (a negation pattern). */ - negated: boolean - /** True when the pattern (with any trailing "/" stripped) has a file-extension suffix. */ - hasExtSuffix: boolean - /** Per-pattern matcher (built from `ignore` package) used to test a single pattern. */ - matcher: IgnoreInstance -} - -export interface IgnoreSpec { - /** Base directory the patterns were sourced from. Paths are matched relative to this. */ - base: string - /** - * Aggregate ignore-package matcher containing every pattern in this spec. - * Used as a fast pre-check via `.test()` in `_isIgnored`; the per-pattern - * walk is only consulted when a negation pattern with an extension suffix - * could win, so the bypass-extension-filter (`found`) decision can be made. - */ - spec: IgnoreInstance - /** Parsed pattern list (in source order) used for the negation-bypass logic. */ - patterns: readonly ParsedPattern[] - /** - * Pre-computed flag: true when at least one pattern in this spec is both - * negated (`!`) and has a file-extension suffix. When false, `_isIgnored` - * can skip the per-pattern walk after consulting the aggregate matcher. - */ - hasNegatedExtPattern: boolean -} - -/** - * Default directories that are always ignored when walking. Trailing "/" matches - * directory semantics (gitignore-style). The Python original uses ".semble/" — - * for csp we replace it with ".csp/". - */ -export const DEFAULT_IGNORED_DIRS: ReadonlySet = new Set([ - '.git/', - '.hg/', - '.svn/', - '__pycache__/', - 'node_modules/', - '.venv/', - 'venv/', - '.tox/', - '.mypy_cache/', - '.pytest_cache/', - '.ruff_cache/', - '.cache/', - '.csp/', - '.next/', - 'dist/', - 'build/', - '.eggs/', -]) - -let cachedIgnoreFactory: IgnoreFactory | undefined - -/** - * Resolve the `ignore` package factory lazily so this file can be imported even - * when the dep is not yet installed in the worktree. - */ -async function getIgnoreFactory(): Promise { - if (cachedIgnoreFactory) { - return cachedIgnoreFactory - } - const mod = await import('ignore') - // The CJS package exports the factory as the default export under ESM interop. - const factory = ((mod as { default?: IgnoreFactory }).default - ?? mod) as unknown as IgnoreFactory - cachedIgnoreFactory = factory - return factory -} - -function hasExtensionSuffix(pattern: string): boolean { - const stripped = pattern.replace(/\/+$/, '') - return path.extname(stripped) !== '' -} - -async function buildSpec(base: string, lines: readonly string[]): Promise { - const factory = await getIgnoreFactory() - const aggregate = factory({ allowRelativePaths: true }) - const patterns: ParsedPattern[] = [] - - for (const rawLine of lines) { - const line = rawLine.replace(/\r$/, '') - const trimmed = line.trim() - if (trimmed === '' || trimmed.startsWith('#')) { - continue - } - - aggregate.add(line) - - const negated = trimmed.startsWith('!') - const pattern = negated ? trimmed.slice(1) : trimmed - if (pattern === '') { - continue - } - - const matcher = factory({ allowRelativePaths: true }).add(pattern) - patterns.push({ - pattern, - negated, - hasExtSuffix: hasExtensionSuffix(pattern), - matcher, - }) - } - - const hasNegatedExtPattern = patterns.some(p => p.negated && p.hasExtSuffix) - - return { base, spec: aggregate, patterns, hasNegatedExtPattern } -} - -/** - * Loads `.gitignore` and `.cspignore` from the given directory and merges them - * into a single IgnoreSpec, or returns `null` when neither file is present. - */ -export async function _loadIgnoreForDir(directory: string): Promise { - const gitignorePath = path.join(directory, '.gitignore') - const cspignorePath = path.join(directory, '.cspignore') - - const lines: string[] = [] - for (const file of [gitignorePath, cspignorePath]) { - try { - const stat = await fs.stat(file) - if (!stat.isFile()) { - continue - } - const text = await fs.readFile(file, 'utf8') - lines.push(...text.split(/\r?\n/)) - } - catch { - // missing file — fine - } - } - - if (lines.length === 0) { - return null - } - return buildSpec(directory, lines) -} - -/** - * Result of `_isIgnored`. `ignored` is the final gitignore decision; `found` - * signals that a negation pattern with a file-extension suffix matched, which - * lets the file bypass the extension-allowlist filter (mirrors semble). - */ -export interface IgnoreCheck { - ignored: boolean - found: boolean -} - -/** - * Check whether a path is ignored by any of the provided ignore specs. - * - * Port of `_is_ignored` in semble. Each spec's patterns are checked in source - * order; later matches override earlier ones (standard gitignore semantics). - * When the *winning* match is a negation pattern with a file-extension suffix - * (e.g. `!special.kjs`, `!*.py`), `found` becomes true so that the caller can - * include the file even if its extension is not in the allowlist. - * - * Hot-path optimization: the aggregate `ignore`-package matcher is consulted - * first via `.test()`. If no pattern in the spec matches at all, we carry the - * outer state forward. If a pattern matches and the spec contains no negated - * extension patterns, the answer is fully determined by the aggregate and the - * per-pattern walk is skipped. The per-pattern walk runs only when a negation - * could win AND the spec carries at least one negated extension pattern — - * i.e. when `found` could change to `true`. - */ -export function _isIgnored( - filePath: string, - isDir: boolean, - specs: readonly IgnoreSpec[], -): IgnoreCheck { - let ignored = false - let found = false - - for (const ignoreSpec of specs) { - const relative = path.relative(ignoreSpec.base, filePath) - if (relative === '' || relative.startsWith('..') || path.isAbsolute(relative)) { - // Not under this spec's base — skip. - continue - } - - const posixRelative = relative.split(path.sep).join('/') - const candidate = isDir ? `${posixRelative}/` : posixRelative - - let aggregateResult: { ignored: boolean, unignored: boolean } - try { - aggregateResult = ignoreSpec.spec.test(candidate) - } - catch { - // The `ignore` package rejects a few edge cases (e.g. paths outside - // the cwd when allowRelativePaths is off); treat as non-match. - aggregateResult = { ignored: false, unignored: false } - } - - const { ignored: isIgnoredBySpec, unignored: isUnignoredBySpec } = aggregateResult - - if (!isIgnoredBySpec && !isUnignoredBySpec) { - // No pattern in this spec matched — preserve outer state. - continue - } - - if (isIgnoredBySpec) { - // Winning pattern is a non-negated ignore. The original loop would set - // `ignored = true; found = false` here regardless of pattern suffix. - ignored = true - found = false - continue - } - - // isUnignoredBySpec: a negation pattern won in this spec. - if (!ignoreSpec.hasNegatedExtPattern) { - // No negation pattern in this spec has an extension suffix, so `found` - // cannot become true here. - ignored = false - found = false - continue - } - - // Fall back to the per-pattern walk to determine `found` accurately. - for (const pattern of ignoreSpec.patterns) { - let matched = false - try { - matched = pattern.matcher.ignores(candidate) - } - catch { - matched = false - } - - if (!matched) { - continue - } - - // Last winning pattern wins. - ignored = !pattern.negated - found = !ignored && pattern.hasExtSuffix - } - } - - return { ignored, found } -} - -/** - * Recursively walk `directory`, yielding files matching `extensions`. Hidden - * directories are not implicitly skipped — the caller controls this via the - * default-ignored set passed to `walkFiles`. - */ -export async function* _walk( - directory: string, - inheritedSpecs: readonly IgnoreSpec[], - extensions: ReadonlySet, -): AsyncIterable { - const dirSpec = await _loadIgnoreForDir(directory) - const specs: readonly IgnoreSpec[] = dirSpec - ? [...inheritedSpecs, dirSpec] - : inheritedSpecs - - let entries: import('node:fs').Dirent[] - try { - entries = await fs.readdir(directory, { withFileTypes: true }) - } - catch { - return - } - - entries.sort((a, b) => (a.name < b.name ? -1 : a.name > b.name ? 1 : 0)) - - for (const entry of entries) { - if (entry.isSymbolicLink()) { - continue - } - const full = path.join(directory, entry.name) - const isDir = entry.isDirectory() - const { ignored, found } = _isIgnored(full, isDir, specs) - if (ignored) { - continue - } - - if (isDir) { - yield* _walk(full, specs, extensions) - } - else if (entry.isFile()) { - if (found || extensions.has(path.extname(entry.name).toLowerCase())) { - yield full - } - } - } -} - -/** - * Yield files under `root` whose extension is in `extensions`, skipping ignored - * paths. Default-ignored directories (see `DEFAULT_IGNORED_DIRS`) are always - * skipped, plus any extra patterns in `ignore`. `.gitignore` / `.cspignore` - * files encountered during traversal are honoured recursively. - * - * @param root Root directory to walk. - * @param extensions Allowed file extensions (lowercase, including the leading dot). - * @param ignore Additional gitignore-style patterns to ignore. - */ -export async function* walkFiles( - root: string, - extensions: readonly string[], - ignore?: readonly string[], -): AsyncIterable { - const extensionsSet: ReadonlySet = new Set(extensions.map(e => e.toLowerCase())) - const dirPatterns: string[] = [ - ...[...DEFAULT_IGNORED_DIRS].sort(), - ...(ignore ?? []), - ] - const baseSpec = await buildSpec(root, dirPatterns) - yield* _walk(root, [baseSpec], extensionsSet) -} diff --git a/src/indexing/index.test.ts b/src/indexing/index.test.ts deleted file mode 100644 index 2ddb0a2..0000000 --- a/src/indexing/index.test.ts +++ /dev/null @@ -1,417 +0,0 @@ -// Tests for src/indexing/index.ts (CspIndex) - -import type { Chunk } from '../types.ts' -import { spawnSync } from 'node:child_process' -import { existsSync, mkdtempSync, readdirSync, readFileSync, rmSync, writeFileSync } from 'node:fs' -import { tmpdir } from 'node:os' -import { join } from 'node:path' -import { afterEach, beforeEach, describe, expect, it } from 'bun:test' -import { ContentType } from '../types.ts' -import { makeStubModel, SelectableBasicBackend } from './dense.ts' -import { CspIndex, DEFAULT_CONTENT } from './index.ts' -import { Bm25Index } from './sparse.ts' - -function makeChunk( - filePath: string, - startLine: number, - endLine: number, - language: string | null = 'typescript', - content?: string, -): Chunk { - return { - content: content ?? `// chunk for ${filePath}:${startLine}-${endLine}`, - filePath, - startLine, - endLine, - language, - } -} - -function buildIndex(chunks: Chunk[]): CspIndex { - const model = makeStubModel(4) - const vectors = chunks.map((_, i) => { - const v = new Float32Array(4) - v[0] = i + 1 - return v - }) - return new CspIndex({ - model, - bm25Index: Bm25Index.build(chunks.map(() => ['x'])), - semanticIndex: new SelectableBasicBackend(vectors), - chunks, - modelPath: 'test-model', - root: null, - content: DEFAULT_CONTENT, - }) -} - -describe('CspIndex.stats', () => { - it('returns zeros for an empty index', () => { - const idx = buildIndex([]) - expect(idx.stats).toEqual({ - indexedFiles: 0, - totalChunks: 0, - languages: {}, - }) - }) - - it('reflects chunk count, file count, and language distribution', () => { - const chunks: Chunk[] = [ - makeChunk('a.ts', 1, 10, 'typescript'), - makeChunk('a.ts', 11, 20, 'typescript'), - makeChunk('b.py', 1, 5, 'python'), - makeChunk('c.bin', 1, 1, null), - ] - const idx = buildIndex(chunks) - expect(idx.stats).toEqual({ - indexedFiles: 3, - totalChunks: 4, - languages: { typescript: 2, python: 1 }, - }) - }) -}) - -describe('CspIndex.search', () => { - it('returns [] on an empty query', () => { - const chunks = [makeChunk('a.ts', 1, 1)] - const idx = buildIndex(chunks) - expect(idx.search('')).toEqual([]) - expect(idx.search(' ')).toEqual([]) - }) - - it('returns [] when the index has no chunks', () => { - const idx = buildIndex([]) - expect(idx.search('anything')).toEqual([]) - }) - - it('returns [] when topK <= 0', () => { - const chunks = [makeChunk('a.ts', 1, 1)] - const idx = buildIndex(chunks) - expect(idx.search('anything', { topK: 0 })).toEqual([]) - expect(idx.search('anything', { topK: -1 })).toEqual([]) - }) - - it('returns [] when filters are set but match nothing (no fallback to unfiltered)', () => { - // Regression: previously an empty selector was treated as "no filter" - // which fell back to an unfiltered search — silently ignoring user intent. - const chunks: Chunk[] = [ - makeChunk('a.ts', 1, 10, 'typescript', 'alpha'), - makeChunk('b.py', 1, 10, 'python', 'beta'), - ] - const idx = buildIndex(chunks) - expect(idx.search('anything', { filterLanguages: ['nonexistent'] })).toEqual([]) - expect(idx.search('anything', { filterPaths: ['nope.ts'] })).toEqual([]) - }) -}) - -describe('CspIndex.findRelated', () => { - it('excludes the source chunk from results', () => { - const chunks: Chunk[] = [ - makeChunk('a.ts', 1, 10, 'typescript', 'seed chunk'), - makeChunk('a.ts', 11, 20, 'typescript', 'companion 1'), - makeChunk('b.ts', 1, 5, 'typescript', 'companion 2'), - ] - const idx = buildIndex(chunks) - const seed = chunks[0]! - const results = idx.findRelated(seed, { topK: 5 }) - // Source chunk must not appear in the results. - expect(results.find(r => r.chunk === seed)).toBeUndefined() - expect(results.length).toBeLessThanOrEqual(5) - }) - - it('accepts a SearchResult as the seed', () => { - const chunks: Chunk[] = [ - makeChunk('a.ts', 1, 10, 'typescript', 'seed'), - makeChunk('b.ts', 1, 5, 'typescript', 'other'), - ] - const idx = buildIndex(chunks) - const results = idx.findRelated({ chunk: chunks[0]!, score: 0.5 }) - expect(results.find(r => r.chunk === chunks[0]!)).toBeUndefined() - }) -}) - -describe('CspIndex save → loadFromDisk roundtrip', () => { - let dir: string - - beforeEach(() => { - dir = mkdtempSync(join(tmpdir(), 'csp-roundtrip-')) - }) - afterEach(() => { - rmSync(dir, { recursive: true, force: true }) - }) - - it('persists chunks, indexes, and metadata', async () => { - const chunks: Chunk[] = [ - makeChunk('a.ts', 1, 10, 'typescript', 'A'), - makeChunk('b.ts', 1, 5, 'python', 'B'), - ] - const idx = buildIndex(chunks) - await idx.save(dir) - const loaded = await CspIndex.loadFromDisk(dir) - expect(loaded.chunks.length).toBe(2) - expect(loaded.chunks.map(c => c.filePath)).toEqual(['a.ts', 'b.ts']) - expect(loaded.stats.totalChunks).toBe(2) - expect(loaded.stats.languages).toEqual({ typescript: 1, python: 1 }) - }) - - it('loadFromDisk throws on a missing directory', async () => { - await expect(CspIndex.loadFromDisk(join(dir, 'nope'))).rejects.toThrow( - /Index not found/, - ) - }) - - it('loadFromDisk throws when a persisted artifact is missing', async () => { - // Dir exists but is empty. - await expect(CspIndex.loadFromDisk(dir)).rejects.toThrow(/Missing:/) - }) - - it('loadFromDisk throws on a schema version mismatch', async () => { - const idx = buildIndex([makeChunk('a.ts', 1, 10, 'typescript', 'A')]) - await idx.save(dir) - // Corrupt the manifest's schema version to simulate a future/older index. - const manifestPath = join(dir, 'manifest.json') - const manifest = JSON.parse(readFileSync(manifestPath, 'utf8')) as Record - manifest.schemaVersion = 999 - writeFileSync(manifestPath, JSON.stringify(manifest)) - await expect(CspIndex.loadFromDisk(dir)).rejects.toThrow(/schema version/i) - }) - - it('loadFromDisk rejects a manifest with an invalid content field', async () => { - const idx = buildIndex([makeChunk('a.ts', 1, 10, 'typescript', 'A')]) - await idx.save(dir) - // Schema version stays valid; `content` is corrupted to a non-ContentType. - const manifestPath = join(dir, 'manifest.json') - const manifest = JSON.parse(readFileSync(manifestPath, 'utf8')) as Record - manifest.content = ['not-a-content-type'] - writeFileSync(manifestPath, JSON.stringify(manifest)) - await expect(CspIndex.loadFromDisk(dir)).rejects.toThrow(/Invalid manifest/) - }) - - it('round-trips chunk content losslessly and yields stable search results', async () => { - const chunks: Chunk[] = [ - makeChunk('a.ts', 1, 10, 'typescript', 'alpha beta'), - makeChunk('b.ts', 11, 20, 'typescript', 'gamma delta'), - makeChunk('c.py', 1, 5, 'python', 'epsilon'), - ] - const idx = buildIndex(chunks) - await idx.save(dir) - - // Chunk fields survive the round-trip intact (chunkToDict/chunkFromDict symmetry). - const loaded = await CspIndex.loadFromDisk(dir) - expect(loaded.chunks).toEqual(chunks) - expect(loaded.stats).toEqual(idx.stats) - - // Two independent loads of the same persisted index produce identical - // ranked results — the restored bm25/dense/model state is deterministic. - const loaded2 = await CspIndex.loadFromDisk(dir) - const a = loaded.search('alpha', { topK: 3 }).map(r => r.chunk.filePath) - const b = loaded2.search('alpha', { topK: 3 }).map(r => r.chunk.filePath) - expect(a).toEqual(b) - }) -}) - -describe('CspIndex.save', () => { - let dir: string - - beforeEach(() => { - dir = mkdtempSync(join(tmpdir(), 'csp-save-')) - }) - afterEach(() => { - rmSync(dir, { recursive: true, force: true }) - }) - - function readJson(name: string): unknown { - return JSON.parse(readFileSync(join(dir, name), 'utf8')) - } - - it('writes all index artifacts to the target directory', async () => { - const chunks: Chunk[] = [ - makeChunk('a.ts', 1, 10, 'typescript', 'A'), - makeChunk('b.ts', 1, 5, 'python', 'B'), - ] - const idx = buildIndex(chunks) - await idx.save(dir) - - for (const name of ['manifest.json', 'chunks.json', 'bm25.json', 'vectors.bin', 'args.json']) { - expect(existsSync(join(dir, name))).toBe(true) - } - }) - - it('creates the target directory if it does not exist', async () => { - const nested = join(dir, 'a', 'b', 'idx') - const idx = buildIndex([makeChunk('a.ts', 1, 10)]) - await idx.save(nested) - expect(existsSync(join(nested, 'manifest.json'))).toBe(true) - }) - - it('writes a manifest with schema version, content, source id, and model id', async () => { - const chunks: Chunk[] = [makeChunk('a.ts', 1, 10, 'typescript', 'A')] - const idx = buildIndex(chunks) - await idx.save(dir) - - const manifest = readJson('manifest.json') as Record - expect(manifest.schemaVersion).toBe(1) - expect(manifest.content).toEqual([...DEFAULT_CONTENT]) - // buildIndex sets root: null → sourceId is null. - expect(manifest.sourceId).toBeNull() - expect(manifest.modelId).toBe('test-model') - // contentHash is deterministic and non-empty. - expect(typeof manifest.contentHash).toBe('string') - expect((manifest.contentHash as string).length).toBeGreaterThan(0) - }) - - it('serializes chunks in camelCase (chunkToDict) form, preserving order', async () => { - const chunks: Chunk[] = [ - makeChunk('a.ts', 1, 10, 'typescript', 'A'), - makeChunk('b.ts', 1, 5, 'python', 'B'), - ] - const idx = buildIndex(chunks) - await idx.save(dir) - - const serialized = readJson('chunks.json') as Array> - expect(serialized.length).toBe(2) - expect(serialized.map(c => c.filePath)).toEqual(['a.ts', 'b.ts']) - const first = serialized[0]! - expect(first.content).toBe('A') - expect(first.startLine).toBe(1) - expect(first.endLine).toBe(10) - expect(first.language).toBe('typescript') - expect(first.location).toBe('a.ts:1-10') - // snake_case wire keys must NOT leak into the round-trip format. - expect(first.file_path).toBeUndefined() - }) - - it('produces a deterministic contentHash for identical chunks', async () => { - const make = (): CspIndex => - buildIndex([makeChunk('a.ts', 1, 10, 'typescript', 'A')]) - - const dir2 = mkdtempSync(join(tmpdir(), 'csp-save-2-')) - try { - await make().save(dir) - await make().save(dir2) - const h1 = (JSON.parse(readFileSync(join(dir, 'manifest.json'), 'utf8')) as Record).contentHash - const h2 = (JSON.parse(readFileSync(join(dir2, 'manifest.json'), 'utf8')) as Record).contentHash - expect(h1).toBe(h2) - } - finally { - rmSync(dir2, { recursive: true, force: true }) - } - }) -}) - -describe('CspIndex.fromPath', () => { - let dir: string - - beforeEach(() => { - dir = mkdtempSync(join(tmpdir(), 'csp-from-path-')) - }) - afterEach(() => { - rmSync(dir, { recursive: true, force: true }) - }) - - it('throws when the path does not exist', async () => { - await expect(CspIndex.fromPath(join(dir, 'nope'))).rejects.toThrow( - /Path does not exist/, - ) - }) - - it('throws when the path exists but is a file', async () => { - const filePath = join(dir, 'a.ts') - writeFileSync(filePath, '// hello\n') - await expect(CspIndex.fromPath(filePath)).rejects.toThrow( - /not a directory/, - ) - }) - - it('builds a CspIndex from a real directory with a small TS file', async () => { - writeFileSync( - join(dir, 'sample.ts'), - 'export function greet(name: string) {\n return `hi ${name}`\n}\n', - ) - const idx = await CspIndex.fromPath(dir, { content: ContentType.CODE }) - expect(idx.stats.totalChunks).toBeGreaterThan(0) - expect(idx.stats.indexedFiles).toBe(1) - expect(idx.chunks[0]!.filePath).toBe('sample.ts') - }) -}) - -describe('CspIndex.fromGit', () => { - let workdir: string - let repoDir: string - - /** Run a git command in `cwd`, throwing with stderr on failure. */ - function git(cwd: string, ...args: string[]): void { - const res = spawnSync('git', args, { - cwd, - encoding: 'utf8', - env: { ...process.env, GIT_TERMINAL_PROMPT: '0' }, - }) - if (res.status !== 0) { - throw new Error(`git ${args.join(' ')} failed: ${res.stderr}`) - } - } - - /** Count leftover clone temp dirs so we can assert cleanup. */ - function cloneTempDirCount(): number { - return readdirSync(tmpdir()).filter(name => name.startsWith('csp-git-')).length - } - - beforeEach(() => { - workdir = mkdtempSync(join(tmpdir(), 'csp-git-src-')) - // A real, non-bare local repo with one committed TS file. `git clone` can - // shallow-clone this over a file:// URL with no network. - repoDir = join(workdir, 'repo') - spawnSync('git', ['init', repoDir], { encoding: 'utf8' }) - git(repoDir, 'config', 'user.email', 'test@example.com') - git(repoDir, 'config', 'user.name', 'Test') - git(repoDir, 'config', 'commit.gpgsign', 'false') - writeFileSync( - join(repoDir, 'sample.ts'), - 'export function greet(name: string) {\n return `hi ${name}`\n}\n', - ) - git(repoDir, 'add', '.') - git(repoDir, 'commit', '-m', 'initial') - }) - afterEach(() => { - rmSync(workdir, { recursive: true, force: true }) - }) - - it('shallow-clones the repo and builds a populated index', async () => { - const before = cloneTempDirCount() - const idx = await CspIndex.fromGit(`file://${repoDir}`, { - content: ContentType.CODE, - }) - expect(idx.stats.totalChunks).toBeGreaterThan(0) - expect(idx.stats.indexedFiles).toBe(1) - expect(idx.chunks[0]!.filePath).toBe('sample.ts') - // The index is rooted at the git URL, not the (deleted) temp checkout, so a - // persisted manifest records a stable sourceId. - expect(idx.root).toBe(`file://${repoDir}`) - // The temporary checkout must be cleaned up (no leak) after success. - expect(cloneTempDirCount()).toBe(before) - }) - - it('cleans up the temp checkout even when clone fails', async () => { - const before = cloneTempDirCount() - const bogus = join(workdir, 'does-not-exist.git') - expect(existsSync(bogus)).toBe(false) - await expect( - CspIndex.fromGit(`file://${bogus}`, { content: ContentType.CODE }), - ).rejects.toThrow(/clone/i) - // Failure path must not leak the temp checkout directory either. - expect(cloneTempDirCount()).toBe(before) - }) - - it('rejects a ref that would inject a git flag (leading dash)', async () => { - const before = cloneTempDirCount() - await expect( - CspIndex.fromGit(`file://${repoDir}`, { - content: ContentType.CODE, - ref: '--upload-pack=touch /tmp/pwned', - }), - ).rejects.toThrow(/Invalid git ref/) - // The guard throws inside the clone step; fromGit's `finally` still cleans - // up the temp checkout, so no dir leaks. - expect(cloneTempDirCount()).toBe(before) - }) -}) diff --git a/src/indexing/index.ts b/src/indexing/index.ts deleted file mode 100644 index 6dc601a..0000000 --- a/src/indexing/index.ts +++ /dev/null @@ -1,550 +0,0 @@ -// Port of src/semble/index/index.py -// -// CspIndex is the hybrid (dense + BM25) search orchestrator. It binds the -// indexing units (model loading + createIndexFromPath) into a single object -// that the CLI and MCP server drive. -// -// Construction: -// - fromPath: index a local directory. -// - fromGit: shallow-clone a git URL into a temp dir, index it via -// fromPath, then re-root the index at the URL. -// - loadFromDisk: reconstruct a saved index from its on-disk artifacts. -// Operations: -// - search / findRelated: delegate to search.ts over the in-memory index. -// - save: persist the index (manifest + chunks + bm25 + dense vectors). - -import type { Chunk, ContentType, IndexStats, SearchResult } from '../types.ts' -import type { Model } from './dense.ts' -import { execFile } from 'node:child_process' -import { createHash } from 'node:crypto' -import { chmodSync, existsSync, mkdtempSync, rmSync } from 'node:fs' -import { mkdir, readFile, stat, writeFile } from 'node:fs/promises' -import { tmpdir } from 'node:os' -import { join } from 'node:path' -import process from 'node:process' -import { promisify } from 'node:util' -import { search as runSearch } from '../search.ts' -import { chunkFromDict, chunkToDict, ContentType as ContentTypeEnum } from '../types.ts' -import { createIndexFromPath } from './create.ts' -import { loadModel as loadDenseModel, makeStubModel, SelectableBasicBackend } from './dense.ts' -import { Bm25Index } from './sparse.ts' - -/** Promisified `git` runner — keeps the network-bound clone off the event loop. */ -const execFileAsync = promisify(execFile) - -/** - * On-disk index schema version. Bumped when the persisted artifact layout or - * format changes; {@link CspIndex.loadFromDisk} (T007) rejects mismatches. - */ -export const INDEX_SCHEMA_VERSION = 1 - -/** - * Persisted index manifest — the top-level metadata that ties the on-disk - * artifacts (chunks.json / bm25.json / vectors.bin / args.json) together and - * guards against loading an incompatible index. - */ -export interface IndexManifest { - schemaVersion: number - /** Hash of the chunk contents — deterministic identity of the indexed corpus. */ - contentHash: string - /** Source root the index was built from (absolute path / git URL), or null. */ - sourceId: string | null - /** Content types this index covers. */ - content: ContentType[] - /** Embedding model identifier, so a load can reject a model mismatch. */ - modelId: string -} - -/** Default content selection when the caller does not specify one (code-only). */ -export const DEFAULT_CONTENT: readonly ContentType[] = [ContentTypeEnum.CODE] - -/** Default result count when the caller omits `topK` (matches the CLI `--top-k` default). */ -const DEFAULT_TOP_K = 5 - -/** - * Build a `SearchResult` for a related chunk, mirroring the `toDict` shape that - * `search.ts` produces so downstream formatters treat both uniformly. - */ -function makeRelatedResult(chunk: Chunk, score: number): SearchResult { - return { - chunk, - score, - toDict: () => ({ - chunk: { - content: chunk.content, - file_path: chunk.filePath, - start_line: chunk.startLine, - end_line: chunk.endLine, - language: chunk.language ?? null, - location: `${chunk.filePath}:${chunk.startLine}-${chunk.endLine}`, - }, - score, - }), - } -} - -export interface CspIndexLoadOptions { - modelPath?: string - content?: ContentType | readonly ContentType[] -} - -export interface CspIndexFromGitOptions extends CspIndexLoadOptions { - ref?: string -} - -/** Options for {@link CspIndex.save}. */ -export interface CspIndexSaveOptions { - /** - * Override for the manifest `contentHash`. {@link loadOrBuildIndex} injects a - * source-file hash here so cache validity can be checked before a rebuild. - * Omitted → defaults to the serialized-chunks hash (T006 behavior). - */ - contentHash?: string -} - -/** Constructor payload — the fully built index state. */ -export interface CspIndexState { - model: Model - bm25Index: Bm25Index - semanticIndex: SelectableBasicBackend - chunks: Chunk[] - modelPath: string - /** Source root the index was built from, or null (e.g. loaded from disk). */ - root: string | null - content: readonly ContentType[] -} - -/** - * Hybrid (dense + BM25) code search index. - * - * Build with {@link CspIndex.fromPath} / {@link CspIndex.fromGit}, query with - * {@link CspIndex.search} / {@link CspIndex.findRelated}, persist with - * {@link CspIndex.save} / {@link CspIndex.loadFromDisk}. - */ -export class CspIndex { - readonly model: Model - readonly bm25Index: Bm25Index - readonly semanticIndex: SelectableBasicBackend - readonly chunks: Chunk[] - readonly modelPath: string - readonly root: string | null - readonly content: readonly ContentType[] - - constructor(state: CspIndexState) { - this.model = state.model - this.bm25Index = state.bm25Index - this.semanticIndex = state.semanticIndex - this.chunks = state.chunks - this.modelPath = state.modelPath - this.root = state.root - this.content = state.content - } - - /** - * Build an index from a local directory. - * - * Loads the embedding model, walks + chunks + embeds the directory via - * {@link createIndexFromPath}, and returns a populated index. - * - * @throws if the path is missing, is not a directory, or has no supported files. - */ - static async fromPath( - path: string, - options: CspIndexLoadOptions = {}, - ): Promise { - let pathStats: Awaited> - try { - pathStats = await stat(path) - } - catch { - throw new Error(`Path does not exist: ${path}`) - } - if (!pathStats.isDirectory()) { - throw new Error(`Path is not a directory: ${path}`) - } - - const { model, modelPath } = await loadDenseModel(options.modelPath) - const content = normalizeContent(options.content) - - const { bm25Index, semanticIndex, chunks } = await createIndexFromPath(path, { - model, - content, - displayRoot: path, - }) - - return new CspIndex({ - model, - bm25Index, - semanticIndex, - chunks, - modelPath, - root: path, - content, - }) - } - - /** - * Build an index from a remote git URL. - * - * Shallow-clones `url` into a fresh `0700` temp directory (non-interactive — - * credential prompts are suppressed), then reuses the {@link CspIndex.fromPath} - * pipeline against the clone root so `.cspignore` / `.gitignore` rules at the - * checkout root are honored. The temp directory is always removed afterward, - * on both the success and failure paths. - * - * @throws if the clone fails (bad URL, auth required, git missing). - */ - static async fromGit( - url: string, - options: CspIndexFromGitOptions = {}, - ): Promise { - const dir = mkdtempSync(join(tmpdir(), 'csp-git-')) - chmodSync(dir, 0o700) - try { - await cloneShallow(url, dir, options.ref) - const { ref: _ref, ...fromPathOptions } = options - const index = await CspIndex.fromPath(dir, fromPathOptions) - // fromPath roots the index at the temp checkout, which we delete in the - // `finally` below — re-root at the git URL so a persisted manifest records - // a stable, meaningful sourceId (not a vanished temp path). - return new CspIndex({ - model: index.model, - bm25Index: index.bm25Index, - semanticIndex: index.semanticIndex, - chunks: index.chunks, - modelPath: index.modelPath, - root: url, - content: index.content, - }) - } - finally { - rmSync(dir, { recursive: true, force: true }) - } - } - - /** Aggregate index statistics: file count, chunk count, language histogram. */ - get stats(): IndexStats { - const files = new Set() - const languages: Record = {} - for (const chunk of this.chunks) { - files.add(chunk.filePath) - const lang = chunk.language - if (lang !== null && lang !== undefined) { - languages[lang] = (languages[lang] ?? 0) + 1 - } - } - return { - indexedFiles: files.size, - totalChunks: this.chunks.length, - languages, - } - } - - /** - * Hybrid (dense + BM25) search over the indexed chunks. - * - * Returns `[]` for blank queries, non-positive `topK`, an empty index, or - * when `filterLanguages`/`filterPaths` narrow the candidate pool to nothing - * (no silent fallback to an unfiltered search). Otherwise delegates to the - * shared ranking pipeline in {@link search.ts} — kept synchronous so the MCP - * server can call it without `await`. - */ - search(query: string, options: SearchOptions = {}): SearchResult[] { - const topK = options.topK ?? DEFAULT_TOP_K - if (query.trim().length === 0 || topK <= 0 || this.chunks.length === 0) { - return [] - } - - const selector = this.buildSelector(options) - if (selector !== undefined && selector.length === 0) { - return [] - } - - return runSearch( - query, - this.model, - this.semanticIndex, - this.bm25Index, - this.chunks, - topK, - selector === undefined ? {} : { selector }, - ) - } - - /** - * Find chunks similar to a seed chunk, by re-embedding the seed's content - * and querying the semantic backend. The seed itself is excluded from the - * results (semble parity). - */ - findRelated( - // Seed needs only the chunk; accept a bare Chunk or anything carrying one - // (e.g. a SearchResult) without forcing the caller to supply `toDict`. - seed: Chunk | { chunk: Chunk, score?: number }, - options: SearchOptions = {}, - ): SearchResult[] { - const seedChunk = 'chunk' in seed ? seed.chunk : seed - const topK = options.topK ?? DEFAULT_TOP_K - if (topK <= 0 || this.chunks.length === 0) { - return [] - } - - // Over-fetch by one so we can drop the seed and still return up to topK. - const queryEmbedding = this.model.encode([seedChunk.content]) - const batch = this.semanticIndex.query(queryEmbedding, topK + 1) - const first = batch[0] - if (first === undefined) { - return [] - } - - const results: SearchResult[] = [] - for (const [index, distance] of first) { - const chunk = this.chunks[index] - if (chunk === undefined || chunk === seedChunk) { - continue - } - results.push(makeRelatedResult(chunk, 1.0 - distance)) - if (results.length >= topK) { - break - } - } - return results - } - - /** - * Build a candidate-index selector from language/path filters, or `undefined` - * when no filter is set. An empty `Uint32Array` (filters matched nothing) is - * returned as-is so the caller can short-circuit to `[]`. - */ - private buildSelector(options: SearchOptions): Uint32Array | undefined { - const { filterLanguages, filterPaths } = options - const hasLangFilter = filterLanguages !== undefined && filterLanguages.length > 0 - const hasPathFilter = filterPaths !== undefined && filterPaths.length > 0 - if (!hasLangFilter && !hasPathFilter) { - return undefined - } - - const indices: number[] = [] - for (let i = 0; i < this.chunks.length; i++) { - const chunk = this.chunks[i]! - if (hasLangFilter && !filterLanguages.includes(chunk.language ?? '')) { - continue - } - if (hasPathFilter && !filterPaths.some(p => chunk.filePath.includes(p))) { - continue - } - indices.push(i) - } - return Uint32Array.from(indices) - } - - /** - * Persist the index to `dir`, writing five artifacts: - * - `chunks.json` — chunks in camelCase round-trip form ({@link chunkToDict}). - * - `bm25.json` — sparse index ({@link Bm25Index.save}). - * - `vectors.bin` + `args.json` — dense index ({@link SelectableBasicBackend.save}). - * - `manifest.json` — schema version, content hash, source id, content, model id. - * - * The directory is created if absent. The five file names are mutually - * distinct, so the backends do not clobber one another. The dense backend - * writes already-normalized vectors and re-normalizes on load idempotently, - * so the round-trip is bit-stable (verified — no float drift, NFR-002). - * - * `options.contentHash` overrides the manifest's `contentHash`. The auto-cache - * orchestrator ({@link loadOrBuildIndex}) injects a *source-file* hash here so - * the manifest records a value it can recompute and compare against the live - * source before a build. When omitted, `contentHash` defaults to the hash of - * the serialized chunks (T006 behavior — backward compatible). - */ - async save(dir: string, options: CspIndexSaveOptions = {}): Promise { - await mkdir(dir, { recursive: true }) - - const serializedChunks = this.chunks.map(chunkToDict) - await writeFile(join(dir, 'chunks.json'), JSON.stringify(serializedChunks)) - - await this.bm25Index.save(dir) - await this.semanticIndex.save(dir) - - const manifest: IndexManifest = { - schemaVersion: INDEX_SCHEMA_VERSION, - contentHash: options.contentHash ?? hashChunks(serializedChunks), - sourceId: this.root, - content: [...this.content], - modelId: this.modelPath, - } - await writeFile(join(dir, 'manifest.json'), JSON.stringify(manifest)) - } - - /** - * Load an index previously persisted with {@link CspIndex.save}. - * - * Validates the directory and all five artifacts exist, checks the manifest - * schema version matches {@link INDEX_SCHEMA_VERSION}, then restores chunks - * ({@link chunkFromDict}), the BM25 index ({@link Bm25Index.load}), the dense - * backend ({@link SelectableBasicBackend.load}), and reloads the embedding - * model identified by the manifest. The chunk round-trip is lossless - * (camelCase symmetry with {@link CspIndex.save}). - * - * @throws if the directory is missing, an artifact is missing, or the - * manifest schema version does not match. - */ - static async loadFromDisk(dir: string): Promise { - if (!existsSync(dir)) { - throw new Error(`Index not found: ${dir}`) - } - - const artifacts = ['manifest.json', 'chunks.json', 'bm25.json', 'vectors.bin', 'args.json'] - for (const name of artifacts) { - if (!existsSync(join(dir, name))) { - throw new Error(`Missing: ${join(dir, name)}`) - } - } - - const rawManifest = JSON.parse(await readFile(join(dir, 'manifest.json'), 'utf8')) as unknown - // Version check first so a stale index gets the precise mismatch error even - // if its (older) shape would fail full validation below. - const rawVersion = (rawManifest as { schemaVersion?: unknown } | null)?.schemaVersion - if (rawVersion !== INDEX_SCHEMA_VERSION) { - throw new Error( - `Index schema version mismatch: expected ${INDEX_SCHEMA_VERSION}, got ${String(rawVersion)}`, - ) - } - const manifest = parseManifest(rawManifest) - - const serializedChunks = JSON.parse(await readFile(join(dir, 'chunks.json'), 'utf8')) as unknown[] - const chunks = serializedChunks.map(c => chunkFromDict(c as Parameters[0])) - - const bm25Index = await Bm25Index.load(dir) - const semanticIndex = await SelectableBasicBackend.load(dir) - - const { model, modelPath } = await loadDenseModel(manifest.modelId) - // Keep the query model's dimension aligned with the persisted vectors so - // re-embedded queries are comparable to the stored backend. (The stub model - // is dimension-agnostic; the real model's dim is fixed by its weights.) - const alignedModel = model.dim === semanticIndex.dim - ? model - : makeStubModel(semanticIndex.dim) - - return new CspIndex({ - model: alignedModel, - bm25Index, - semanticIndex, - chunks, - modelPath, - root: manifest.sourceId, - content: manifest.content, - }) - } -} - -export interface SearchOptions { - topK?: number - filterLanguages?: string[] - filterPaths?: string[] -} - -/** - * Lazy loader for the embedding model. Returns `[model, modelPath]` so callers - * that only need the cached path can destructure `[, modelPath]` (mcp server). - */ -export async function loadModel(modelPath?: string): Promise<[Model, string]> { - const { model, modelPath: resolved } = await loadDenseModel(modelPath) - return [model, resolved] -} - -/** - * Shallow-clone `url` into `dir` (already created, empty). Runs git - * non-interactively so a missing-credential prompt fails fast instead of - * hanging. Throws a clear error (including git's stderr) when the clone fails. - */ -async function cloneShallow(url: string, dir: string, ref?: string): Promise { - // A ref beginning with `-` would be parsed by git as a flag (e.g. - // `--upload-pack=…`, `--config=…`) rather than a branch name — the `--` - // separator below only shields the trailing url/dir, not `--branch `. - // Reject it so a hostile ref can't inject git options (CWE-88). - if (ref !== undefined && ref.startsWith('-')) { - throw new Error(`Invalid git ref (must not start with '-'): ${ref}`) - } - - const args = ['clone', '--depth', '1'] - if (ref !== undefined) { - args.push('--branch', ref) - } - args.push('--', url, dir) - - // Async clone: a network-bound git clone must not block the event loop (an - // MCP server may be serving other requests concurrently). - try { - await execFileAsync('git', args, { - env: { ...process.env, GIT_TERMINAL_PROMPT: '0' }, - }) - } - catch (err) { - const e = err as { stderr?: string, message?: string } - const stderr = (e.stderr ?? '').trim() - const detail = stderr !== '' ? stderr : (e.message ?? 'unknown error') - throw new Error(`git clone failed for ${url}: ${detail}`) - } -} - -/** - * Deterministic content hash of the serialized chunks. T006 only needs a stable - * identity for the indexed corpus; the precise repo-content hash used for cache - * invalidation lands in T009 (cache.ts). Uses sha256 over the chunks JSON so - * identical chunk sets always produce the same digest. - */ -function hashChunks(serializedChunks: unknown[]): string { - return createHash('sha256').update(JSON.stringify(serializedChunks)).digest('hex') -} - -function isContentType(value: unknown): value is ContentType { - return typeof value === 'string' - && (Object.values(ContentTypeEnum) as string[]).includes(value) -} - -/** - * Parse and validate a persisted `manifest.json`. The manifest is an on-disk - * trust boundary, so every field is checked at runtime (mirroring - * {@link chunkFromDict}) — a corrupt or hand-edited manifest fails loudly here - * instead of producing a `CspIndex` whose typed fields (`content`, `sourceId`, - * `modelId`) silently lie about the persisted data. - */ -export function parseManifest(raw: unknown): IndexManifest { - if (raw === null || typeof raw !== 'object') { - throw new Error('Invalid manifest: not an object') - } - const m = raw as Record - if (typeof m.schemaVersion !== 'number') { - throw new TypeError('Invalid manifest: schemaVersion must be a number') - } - if (typeof m.contentHash !== 'string') { - throw new TypeError('Invalid manifest: contentHash must be a string') - } - if (!(m.sourceId === null || typeof m.sourceId === 'string')) { - throw new Error('Invalid manifest: sourceId must be a string or null') - } - if (typeof m.modelId !== 'string') { - throw new TypeError('Invalid manifest: modelId must be a string') - } - if (!Array.isArray(m.content) || !m.content.every(isContentType)) { - throw new Error('Invalid manifest: content must be an array of ContentType') - } - return { - schemaVersion: m.schemaVersion, - contentHash: m.contentHash, - sourceId: m.sourceId, - content: m.content, - modelId: m.modelId, - } -} - -function normalizeContent( - content: ContentType | readonly ContentType[] | undefined, -): readonly ContentType[] { - if (content === undefined) { - return DEFAULT_CONTENT - } - if (Array.isArray(content)) { - return content as readonly ContentType[] - } - return [content as ContentType] -} diff --git a/src/indexing/sparse.test.ts b/src/indexing/sparse.test.ts deleted file mode 100644 index 875a8ac..0000000 --- a/src/indexing/sparse.test.ts +++ /dev/null @@ -1,163 +0,0 @@ -import type { Chunk } from './sparse.ts' -import { mkdtemp, rm } from 'node:fs/promises' -import { tmpdir } from 'node:os' -import path from 'node:path' - -import { afterEach, beforeEach, describe, expect, test } from 'bun:test' -import { Bm25Index, enrichForBm25, selectorToMask } from './sparse.ts' - -function makeChunk(overrides: Partial & { filePath: string, content?: string }): Chunk { - return { - content: overrides.content ?? '', - filePath: overrides.filePath, - startLine: overrides.startLine ?? 1, - endLine: overrides.endLine ?? 1, - language: overrides.language ?? null, - } -} - -describe('enrichForBm25', () => { - test('appends repeated stem and last 3 dir parts (2-part dir)', () => { - // Mirrors upstream Python: Path('src/utils/format.ts').parent.parts == ('src', 'utils'), - // so last-3 is the full ['src', 'utils']. - const out = enrichForBm25(makeChunk({ filePath: 'src/utils/format.ts', content: 'hello world' })) - expect(out).toBe('hello world format format src utils') - }) - - test('trims to the last 3 dir parts (4-part dir)', () => { - const out = enrichForBm25(makeChunk({ filePath: 'a/b/c/d/foo.py', content: 'x' })) - expect(out).toBe('x foo foo b c d') - }) - - test('handles a top-level file with no directory components', () => { - const out = enrichForBm25(makeChunk({ filePath: 'foo.py', content: 'x' })) - expect(out).toBe('x foo foo ') - }) - - test('drops "." pseudo-segments from relative paths', () => { - const out = enrichForBm25(makeChunk({ filePath: './a/b/foo.ts', content: 'x' })) - expect(out).toBe('x foo foo a b') - }) - - test('normalizes backslashes for cross-platform consistency', () => { - // Repo-relative paths must produce the same enrichment regardless of - // host OS — Windows hosts may surface back-slashes if a caller forgets - // to normalize before passing the chunk through. - const out = enrichForBm25(makeChunk({ filePath: 'src\\utils\\format.ts', content: 'hello world' })) - expect(out).toBe('hello world format format src utils') - }) -}) - -describe('selectorToMask', () => { - test('builds a 0/1 mask the same length as `size`', () => { - const mask = selectorToMask(new Uint32Array([0, 2, 5]), 6) - expect(mask).not.toBeNull() - expect(Array.from(mask!)).toEqual([1, 0, 1, 0, 0, 1]) - }) - - test('returns null for a null selector', () => { - expect(selectorToMask(null, 6)).toBeNull() - }) - - test('returns null for an undefined selector', () => { - expect(selectorToMask(undefined, 6)).toBeNull() - }) - - test('ignores indices outside the mask bounds', () => { - // Out-of-bounds indices are silently dropped rather than crashing — - // upstream relies on the selector being well-formed but we want to be - // defensive in the TS port. - const mask = selectorToMask(new Uint32Array([0, 10]), 3) - expect(Array.from(mask!)).toEqual([1, 0, 0]) - }) -}) - -describe('Bm25Index.build / getScores', () => { - test('ranks documents containing the query term higher', () => { - const index = Bm25Index.build([ - ['hello', 'world'], - ['hello'], - ['world'], - ]) - const scores = index.getScores(['hello']) - expect(scores).toHaveLength(3) - expect(scores[0]).toBeGreaterThan(0) - expect(scores[1]).toBeGreaterThan(0) - expect(scores[2]).toBe(0) - }) - - test('returns zero scores for unknown query tokens', () => { - const index = Bm25Index.build([['hello'], ['world']]) - const scores = index.getScores(['unknown']) - expect(Array.from(scores)).toEqual([0, 0]) - }) - - test('returns an empty-array-equivalent for an empty corpus', () => { - const index = Bm25Index.build([]) - const scores = index.getScores(['anything']) - expect(scores).toHaveLength(0) - }) - - test('returns zero scores when query tokens are empty', () => { - const index = Bm25Index.build([['hello'], ['world']]) - const scores = index.getScores([]) - expect(Array.from(scores)).toEqual([0, 0]) - }) - - test('weightMask zeros out masked-out documents', () => { - const index = Bm25Index.build([ - ['hello', 'world'], - ['hello'], - ['world'], - ]) - // Mask in docs 0 and 2 only. - const mask = new Uint8Array([1, 0, 1]) - const scores = index.getScores(['hello'], mask) - expect(scores[0]).toBeGreaterThan(0) - expect(scores[1]).toBe(0) - expect(scores[2]).toBe(0) // doc 2 doesn't contain 'hello' - }) - - test('weightMask only suppresses scores; matched-in docs are unchanged', () => { - const index = Bm25Index.build([ - ['hello', 'world'], - ['hello'], - ['world'], - ]) - const baseline = index.getScores(['hello']) - const masked = index.getScores(['hello'], new Uint8Array([1, 1, 1])) - expect(Array.from(masked)).toEqual(Array.from(baseline)) - }) - - test('repeated query tokens do not compound scores', () => { - const index = Bm25Index.build([['hello']]) - const single = index.getScores(['hello']) - const repeated = index.getScores(['hello', 'hello', 'hello']) - expect(Array.from(repeated)).toEqual(Array.from(single)) - }) -}) - -describe('Bm25Index.save / load', () => { - let tmp: string - - beforeEach(async () => { - tmp = await mkdtemp(path.join(tmpdir(), 'csp-bm25-')) - }) - - afterEach(async () => { - await rm(tmp, { recursive: true, force: true }) - }) - - test('round-trips an index and preserves scores', async () => { - const index = Bm25Index.build([ - ['alpha', 'beta'], - ['alpha'], - ['beta', 'gamma'], - ]) - await index.save(tmp) - const loaded = await Bm25Index.load(tmp) - const original = index.getScores(['alpha']) - const restored = loaded.getScores(['alpha']) - expect(Array.from(restored)).toEqual(Array.from(original)) - }) -}) diff --git a/src/indexing/sparse.ts b/src/indexing/sparse.ts deleted file mode 100644 index d5e1b73..0000000 --- a/src/indexing/sparse.ts +++ /dev/null @@ -1,228 +0,0 @@ -// Port of src/semble/index/sparse.py -// -// Implements the two helpers from the upstream module plus a minimal BM25 -// index (Bm25Index) that stands in for Python's `bm25s` library. -// -// BM25 backend choice (see PR body for full discussion): -// Option B (inline minimal BM25+ with k1=1.5, b=0.75) was chosen over a -// third-party npm such as wink-bm25-text-search because: -// - The dependency tree stays self-contained while the project is still -// a scaffold (no other indexing deps are pinned yet). -// - The required surface is tiny (build / getScores / save / load) and -// getScores must respect a weight_mask that maps cleanly to BM25's -// per-document scoring loop. -// - Replacing this backend later is a localized change because all -// callers go through the Bm25Index class. - -import type { Chunk } from '../types.ts' -import { mkdir, readFile, writeFile } from 'node:fs/promises' -import path from 'node:path' - -// Re-exported so existing importers (e.g. sparse.test.ts) keep resolving -// `Chunk` from this module after the type was unified into ../types.ts. -export type { Chunk } - -/** - * Append file path components to BM25 content to boost path-based queries. - * - * Assumes `chunk.filePath` is already repo-relative (set during indexing) so - * machine-specific directory components are never indexed. The stem is - * repeated twice to up-weight file-path matches in BM25. - * - * Repo-relative paths are normalized to POSIX (forward slashes) before - * parsing so Windows-host indexes produce the same enriched text as POSIX - * hosts. Without this, `path.parse` on Windows would split on `\\` while - * the indexer stores forward-slash paths, leading to inconsistent BM25 - * tokenization across platforms. - */ -export function enrichForBm25(chunk: Chunk): string { - const normalized = chunk.filePath.replace(/\\/g, '/') - const parsed = path.posix.parse(normalized) - const stem = parsed.name - const dirParts = parsed.dir - .split('/') - .filter(part => part !== '' && part !== '.') - const dirText = dirParts.slice(-3).join(' ') - return `${chunk.content} ${stem} ${stem} ${dirText}` -} - -/** - * Convert a selector array of indices into a boolean mask of length `size`. - * - * Returns `null` when `selector` is null/undefined so callers can skip mask - * application entirely (matching the upstream semantics). - */ -export function selectorToMask( - selector: Uint32Array | null | undefined, - size: number, -): Uint8Array | null { - if (selector === null || selector === undefined) { - return null - } - const mask = new Uint8Array(size) - for (const idx of selector) { - if (idx < size) { - mask[idx] = 1 - } - } - return mask -} - -// --------------------------------------------------------------------------- -// Minimal BM25 index -// --------------------------------------------------------------------------- - -// Standard Okapi BM25 hyperparameters used by bm25s' default Lucene scorer. -const K1 = 1.5 -const B = 0.75 - -interface Bm25State { - // Number of documents indexed. - numDocs: number - // Document length (token count) per document, in doc order. - docLengths: Float32Array - // Average document length across the corpus. - avgDocLength: number - // Term -> array of [docId, termFreq] entries (postings list). - postings: Map> - // Term -> document frequency (count of docs containing the term). - docFreq: Map -} - -/** - * Minimal BM25 index supporting build / getScores / save / load. - * - * Documents are passed pre-tokenized (callers use `tokenize(enrichForBm25(...))`). - * `getScores` returns a Float32Array of per-document scores in doc order, - * matching the bm25s.BM25.get_scores contract used by upstream. - */ -export class Bm25Index { - // Exposed only for save() — kept private to consumers. - readonly #state: Bm25State - - private constructor(state: Bm25State) { - this.#state = state - } - - /** - * Per-document token counts in document order — one entry per indexed - * document. Exposed read-only so callers can assert the corpus size - * (`documents.length === numDocs`) without reaching into private state. - */ - get documents(): readonly number[] { - return Array.from(this.#state.docLengths) - } - - /** Build an index from an array of pre-tokenized documents. */ - static build(documents: string[][]): Bm25Index { - const numDocs = documents.length - const docLengths = new Float32Array(numDocs) - const postings = new Map>() - const docFreq = new Map() - - let totalLen = 0 - for (let docId = 0; docId < numDocs; docId++) { - const tokens = documents[docId] ?? [] - docLengths[docId] = tokens.length - totalLen += tokens.length - - // Term frequencies for this document. - const tf = new Map() - for (const token of tokens) { - tf.set(token, (tf.get(token) ?? 0) + 1) - } - - for (const [term, freq] of tf) { - let list = postings.get(term) - if (list === undefined) { - list = [] - postings.set(term, list) - } - list.push([docId, freq]) - docFreq.set(term, (docFreq.get(term) ?? 0) + 1) - } - } - - const avgDocLength = numDocs > 0 ? totalLen / numDocs : 0 - - return new Bm25Index({ numDocs, docLengths, avgDocLength, postings, docFreq }) - } - - /** - * Compute BM25 scores for the given query tokens. - * - * Returns a Float32Array of length numDocs, in document order. When - * `weightMask` is provided, documents with mask[i] === 0 receive a score - * of 0 (matching bm25s.BM25.get_scores(..., weight_mask=mask) semantics). - */ - getScores(queryTokens: string[], weightMask?: Uint8Array | null): Float32Array { - const { numDocs, docLengths, avgDocLength, postings, docFreq } = this.#state - const scores = new Float32Array(numDocs) - if (queryTokens.length === 0 || numDocs === 0) { - return scores - } - - // De-duplicate query tokens — repeated terms shouldn't compound BM25 scores. - const uniqueTerms = new Set(queryTokens) - - for (const term of uniqueTerms) { - const list = postings.get(term) - if (list === undefined) { - continue - } - const df = docFreq.get(term) ?? 0 - // Lucene/Robertson IDF: log(1 + (N - df + 0.5) / (df + 0.5)). - const idf = Math.log(1 + (numDocs - df + 0.5) / (df + 0.5)) - - for (const [docId, freq] of list) { - // Skip masked-out documents inside the posting-list iteration so we - // avoid the work entirely; Float32Array entries default to 0 so the - // final scores match the post-loop zeroing approach. - if (weightMask && (weightMask[docId] ?? 0) === 0) { - continue - } - const dl = docLengths[docId] ?? 0 - const denom = freq + K1 * (1 - B + (B * dl) / (avgDocLength || 1)) - const contrib = (idf * (freq * (K1 + 1))) / (denom || 1) - scores[docId] = (scores[docId] ?? 0) + contrib - } - } - - return scores - } - - /** Persist the index to `dir`. Creates the directory if it doesn't exist. */ - async save(dir: string): Promise { - await mkdir(dir, { recursive: true }) - const { numDocs, docLengths, avgDocLength, postings, docFreq } = this.#state - const serialized = { - version: 1, - numDocs, - avgDocLength, - docLengths: Array.from(docLengths), - postings: Array.from(postings.entries()), - docFreq: Array.from(docFreq.entries()), - } - await writeFile(path.join(dir, 'bm25.json'), JSON.stringify(serialized)) - } - - /** Load an index previously persisted with `save`. */ - static async load(dir: string): Promise { - const raw = await readFile(path.join(dir, 'bm25.json'), 'utf8') - const parsed = JSON.parse(raw) as { - version: number - numDocs: number - avgDocLength: number - docLengths: number[] - postings: Array<[string, Array<[number, number]>]> - docFreq: Array<[string, number]> - } - return new Bm25Index({ - numDocs: parsed.numDocs, - docLengths: Float32Array.from(parsed.docLengths), - avgDocLength: parsed.avgDocLength, - postings: new Map(parsed.postings), - docFreq: new Map(parsed.docFreq), - }) - } -} diff --git a/src/indexing/types.test.ts b/src/indexing/types.test.ts deleted file mode 100644 index ab71014..0000000 --- a/src/indexing/types.test.ts +++ /dev/null @@ -1,40 +0,0 @@ -// Tests for src/indexing/types.ts - -import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs' -import { tmpdir } from 'node:os' -import { join } from 'node:path' -import { afterEach, beforeEach, describe, expect, it } from 'bun:test' -import { PersistencePath } from './types.ts' - -describe('PersistencePath', () => { - let dir: string - - beforeEach(() => { - dir = mkdtempSync(join(tmpdir(), 'csp-pp-')) - }) - afterEach(() => { - rmSync(dir, { recursive: true, force: true }) - }) - - it('fromPath produces the expected layout', () => { - const p = PersistencePath.fromPath(dir) - expect(p.chunks).toBe(join(dir, 'chunks.json')) - expect(p.bm25Index).toBe(join(dir, 'bm25_index')) - expect(p.semanticIndex).toBe(join(dir, 'semantic_index')) - expect(p.metadata).toBe(join(dir, 'metadata.json')) - }) - - it('nonExisting returns every path when the dir is empty', () => { - const p = PersistencePath.fromPath(dir) - expect(p.nonExisting().sort()).toEqual( - [p.chunks, p.bm25Index, p.semanticIndex, p.metadata].sort(), - ) - }) - - it('nonExisting returns only the truly missing paths', () => { - const p = PersistencePath.fromPath(dir) - writeFileSync(p.chunks, '[]') - mkdirSync(p.bm25Index, { recursive: true }) - expect(p.nonExisting().sort()).toEqual([p.semanticIndex, p.metadata].sort()) - }) -}) diff --git a/src/indexing/types.ts b/src/indexing/types.ts deleted file mode 100644 index 78a2c0e..0000000 --- a/src/indexing/types.ts +++ /dev/null @@ -1,44 +0,0 @@ -// Port of src/semble/index/types.py - -import { existsSync } from 'node:fs' -import { join } from 'node:path' - -/** - * Resolved on-disk paths used by the index save/load roundtrip. - * - * Mirrors `semble.index.types.PersistencePath`. - */ -export class PersistencePath { - readonly chunks: string - readonly bm25Index: string - readonly semanticIndex: string - readonly metadata: string - - constructor(opts: { - chunks: string - bm25Index: string - semanticIndex: string - metadata: string - }) { - this.chunks = opts.chunks - this.bm25Index = opts.bm25Index - this.semanticIndex = opts.semanticIndex - this.metadata = opts.metadata - } - - /** Return absolute paths that don't currently exist on disk. */ - nonExisting(): string[] { - return [this.chunks, this.bm25Index, this.semanticIndex, this.metadata] - .filter(p => !existsSync(p)) - } - - /** Build a PersistencePath rooted at `base`. */ - static fromPath(base: string): PersistencePath { - return new PersistencePath({ - chunks: join(base, 'chunks.json'), - bm25Index: join(base, 'bm25_index'), - semanticIndex: join(base, 'semantic_index'), - metadata: join(base, 'metadata.json'), - }) - } -} diff --git a/src/languages.test.ts b/src/languages.test.ts deleted file mode 100644 index 5a8c69c..0000000 --- a/src/languages.test.ts +++ /dev/null @@ -1,135 +0,0 @@ -import { describe, expect, it } from 'bun:test' -import { - ALL_LANGUAGES, - CONFIG_LANGUAGES, - DATA_LANGUAGES, - detectLanguage, - DOC_LANGUAGES, - EXTENSION_TO_LANGUAGE, - getExtensions, -} from './languages.ts' - -describe('detectLanguage', () => { - it('detects typescript from .ts', () => { - expect(detectLanguage('foo.ts')).toBe('typescript') - }) - - it('detects tsx from .tsx', () => { - expect(detectLanguage('foo.tsx')).toBe('tsx') - }) - - it('detects python from .py', () => { - expect(detectLanguage('foo.py')).toBe('python') - }) - - it('detects markdown from .md', () => { - expect(detectLanguage('foo.md')).toBe('markdown') - }) - - it('returns undefined for unknown extensions', () => { - expect(detectLanguage('foo.unknown')).toBeUndefined() - }) - - it('is case-insensitive on the suffix', () => { - expect(detectLanguage('Foo.TS')).toBe('typescript') - }) - - it('returns undefined for files without an extension', () => { - expect(detectLanguage('Makefile')).toBeUndefined() - }) - - it('returns undefined for dotfiles like .gitignore', () => { - // Mirrors Python's Path('.gitignore').suffix === '' - expect(detectLanguage('.gitignore')).toBeUndefined() - expect(detectLanguage('dir/.gitignore')).toBeUndefined() - expect(detectLanguage('dir\\.gitignore')).toBeUndefined() - }) - - it('matches the final suffix for files with multiple dots', () => { - expect(detectLanguage('foo.bar.ts')).toBe('typescript') - }) - - it('handles paths with directory separators', () => { - expect(detectLanguage('src/indexing/files.ts')).toBe('typescript') - }) - - it('handles Windows-style path separators', () => { - // Mirrors pathlib.Path on Windows where '\\' is also a separator. - expect(detectLanguage('src\\indexing\\files.ts')).toBe('typescript') - expect(detectLanguage('C:\\Users\\me\\foo.py')).toBe('python') - }) -}) - -describe('getExtensions', () => { - it('includes common code extensions when content type is code', () => { - const exts = getExtensions(['code'], undefined) - expect(exts).toContain('.ts') - expect(exts).toContain('.py') - expect(exts).toContain('.go') - }) - - it('includes doc extensions but not code extensions when content type is docs', () => { - const exts = getExtensions(['docs'], undefined) - expect(exts).toContain('.md') - expect(exts).toContain('.rst') - expect(exts).not.toContain('.ts') - }) - - it('includes config extensions when content type is config', () => { - const exts = getExtensions(['config'], undefined) - expect(exts).toContain('.toml') - expect(exts).toContain('.yaml') - }) - - it('appends user-provided extensions', () => { - const exts = getExtensions(['code'], ['.foo']) - expect(exts).toContain('.foo') - }) - - it('returns a sorted list with no duplicates', () => { - const exts = getExtensions(['code', 'docs'], ['.ts', '.foo']) - const sorted = [...exts].sort() - expect(exts).toEqual(sorted) - expect(new Set(exts).size).toBe(exts.length) - }) - - it('unions multiple content types', () => { - const code = new Set(getExtensions(['code'], undefined)) - const docs = new Set(getExtensions(['docs'], undefined)) - const both = new Set(getExtensions(['code', 'docs'], undefined)) - for (const ext of code) { - expect(both.has(ext)).toBe(true) - } - for (const ext of docs) { - expect(both.has(ext)).toBe(true) - } - }) -}) - -describe('language sets', () => { - it('EXTENSION_TO_LANGUAGE is non-empty', () => { - expect(Object.keys(EXTENSION_TO_LANGUAGE).length).toBeGreaterThan(0) - }) - - it('ALL_LANGUAGES is non-empty', () => { - expect(ALL_LANGUAGES.size).toBeGreaterThan(0) - }) - - it('DOC_LANGUAGES is non-empty', () => { - expect(DOC_LANGUAGES.size).toBeGreaterThan(0) - }) - - it('CONFIG_LANGUAGES is non-empty', () => { - expect(CONFIG_LANGUAGES.size).toBeGreaterThan(0) - }) - - it('DATA_LANGUAGES is non-empty', () => { - expect(DATA_LANGUAGES.size).toBeGreaterThan(0) - }) - - it('ALL_LANGUAGES contains every value in EXTENSION_TO_LANGUAGE', () => { - for (const lang of Object.values(EXTENSION_TO_LANGUAGE)) { - expect(ALL_LANGUAGES.has(lang)).toBe(true) - } - }) -}) diff --git a/src/languages.ts b/src/languages.ts deleted file mode 100644 index ca13165..0000000 --- a/src/languages.ts +++ /dev/null @@ -1,526 +0,0 @@ -// Port of src/semble/index/files.py - -export type ContentType = 'code' | 'docs' | 'config' - -export const EXTENSION_TO_LANGUAGE: Record = { - '.4th': 'forth', - '.ada': 'ada', - '.adb': 'ada', - '.adoc': 'asciidoc', - '.ads': 'ada', - '.agda': 'agda', - '.al': 'al', - '.as': 'actionscript', - '.asciidoc': 'asciidoc', - '.asm': 'asm', - '.astro': 'astro', - '.awk': 'awk', - '.axi': 'netlinx', - '.axs': 'netlinx', - '.bash': 'bash', - '.bat': 'batch', - '.bb': 'bitbake', - '.bbappend': 'bitbake', - '.bbclass': 'bitbake', - '.beancount': 'beancount', - '.bib': 'bibtex', - '.bicep': 'bicep', - '.blade': 'blade', - '.bq': 'sql_bigquery', - '.brs': 'brightscript', - '.bsl': 'bsl', - '.bzl': 'starlark', - '.c': 'c', - '.c3': 'c3', - '.c3i': 'c3', - '.c3t': 'c3', - '.caddyfile': 'caddy', - '.cairo': 'cairo', - '.capnp': 'capnp', - '.cbl': 'cobol', - '.cc': 'cpp', - '.cedar': 'cedar', - '.cedarschema': 'cedarschema', - '.cel': 'cel', - '.cfc': 'cfml', - '.cfg': 'ini', - '.chatito': 'chatito', - '.circom': 'circom', - '.cjs': 'javascript', - '.ck': 'chuck', - '.cl': 'commonlisp', - '.clar': 'clarity', - '.clj': 'clojure', - '.cljc': 'clojure', - '.cljs': 'clojure', - '.cls': 'abl', - '.cmake': 'cmake', - '.cmd': 'batch', - '.cob': 'cobol', - '.cobol': 'cobol', - '.conf': 'nginx', - '.cook': 'cooklang', - '.corn': 'corn', - '.cpon': 'cpon', - '.cpp': 'cpp', - '.cr': 'crystal', - '.cs': 'csharp', - '.cshtml': 'razor', - '.css': 'css', - '.cst': 'cst', - '.csv': 'csv', - '.cts': 'typescript', - '.cu': 'cuda', - '.cuda': 'cuda', - '.cue': 'cue', - '.cxx': 'cpp', - '.cylc': 'cylc', - '.d': 'd', - '.dart': 'dart', - '.desktop': 'desktop', - '.dhall': 'dhall', - '.diff': 'diff', - '.dj': 'djot', - '.dl': 'souffle', - '.dockerfile': 'dockerfile', - '.dot': 'dot', - '.dsp': 'faust', - '.dtd': 'dtd', - '.dts': 'devicetree', - '.dtsi': 'devicetree', - '.ebnf': 'ebnf', - '.eds': 'eds', - '.eex': 'eex', - '.el': 'elisp', - '.elm': 'elm', - '.elv': 'elvish', - '.enforce': 'enforce', - '.eps': 'postscript', - '.erb': 'embeddedtemplate', - '.erl': 'erlang', - '.ex': 'elixir', - '.exs': 'elixir', - '.f': 'fortran', - '.f03': 'fortran', - '.f08': 'fortran', - '.f90': 'fortran', - '.f95': 'fortran', - '.fc': 'func', - '.fidl': 'fidl', - '.filter': 'poe_filter', - '.fir': 'firrtl', - '.fish': 'fish', - '.fnl': 'fennel', - '.fs': 'fsharp', - '.fsd': 'facility', - '.fsi': 'fsharp_signature', - '.fsx': 'fsharp', - '.fth': 'forth', - '.fun': 'sml', - '.g': 'gap', - '.gd': 'gdscript', - '.gdshader': 'gdshader', - '.gi': 'gap', - '.gitattributes': 'gitattributes', - '.gitignore': 'gitignore', - '.gleam': 'gleam', - '.glsl': 'glsl', - '.gn': 'gn', - '.gni': 'gn', - '.gnuplot': 'gnuplot', - '.go': 'go', - '.gotmpl': 'gotmpl', - '.gp': 'gnuplot', - '.gql': 'graphql', - '.gradle': 'groovy', - '.graphql': 'graphql', - '.gren': 'gren', - '.groovy': 'groovy', - '.gv': 'dot', - '.h': 'c', - '.hack': 'hack', - '.hare': 'hare', - '.hbs': 'glimmer', - '.hcl': 'hcl', - '.heex': 'heex', - '.hjson': 'hjson', - '.hlsl': 'hlsl', - '.hocon': 'hocon', - '.hoon': 'hoon', - '.hpp': 'cpp', - '.hrl': 'erlang', - '.hs': 'haskell', - '.htm': 'html', - '.html': 'html', - '.http': 'http', - '.hurl': 'hurl', - '.hx': 'haxe', - '.hxx': 'cpp', - '.idr': 'idris', - '.inc': 'sourcepawn', - '.ini': 'ini', - '.ino': 'arduino', - '.ispc': 'ispc', - '.j2': 'jinja2', - '.jai': 'jai', - '.janet': 'janet', - '.java': 'java', - '.jinja2': 'jinja2', - '.jl': 'julia', - '.journal': 'ledger', - '.jq': 'jq', - '.js': 'javascript', - '.json': 'json', - '.json5': 'json5', - '.jsonnet': 'jsonnet', - '.jsx': 'javascript', - '.just': 'just', - '.k': 'kcl', - '.kdl': 'kdl', - '.kt': 'kotlin', - '.kts': 'kotlin', - '.lc': 'elsa', - '.ldg': 'ledger', - '.lds': 'linkerscript', - '.lean': 'lean', - '.ledger': 'ledger', - '.leex': 'eex', - '.less': 'less', - '.libsonnet': 'jsonnet', - '.liquid': 'liquid', - '.lisp': 'commonlisp', - '.ll': 'llvm', - '.lua': 'lua', - '.luau': 'luau', - '.m': 'objc', - '.magik': 'magik', - '.makefile': 'make', - '.markdown': 'markdown', - '.matlab': 'matlab', - '.md': 'markdown', - '.mermaid': 'mermaid', - '.meson': 'meson', - '.mjs': 'javascript', - '.mk': 'make', - '.ml': 'ocaml', - '.mli': 'ocaml_interface', - '.mlir': 'mlir', - '.mll': 'ocamllex', - '.mmd': 'mermaid', - '.mod': 'gomod', - '.mojo': 'mojo', - '.move': 'move', - '.mts': 'typescript', - '.nasm': 'nasm', - '.ncl': 'nickel', - '.nginx': 'nginx', - '.nim': 'nim', - '.nims': 'nim', - '.ninja': 'ninja', - '.nix': 'nix', - '.norg': 'norg', - '.nqc': 'nqc', - '.nu': 'nushell', - '.nut': 'squirrel', - '.odin': 'odin', - '.org': 'org', - '.p': 'abl', - '.pas': 'pascal', - '.patch': 'diff', - '.pbtxt': 'textproto', - '.pem': 'pem', - '.pgn': 'pgn', - '.php': 'php', - '.pkl': 'pkl', - '.pl': 'perl', - '.plt': 'gnuplot', - '.pm': 'perl', - '.po': 'po', - '.pony': 'pony', - '.pot': 'po', - '.pp': 'puppet', - '.prisma': 'prisma', - '.pro': 'prolog', - '.promql': 'promql', - '.properties': 'properties', - '.proto': 'proto', - '.prql': 'prql', - '.ps': 'postscript', - '.ps1': 'powershell', - '.psd1': 'powershell', - '.psm1': 'powershell', - '.psv': 'psv', - '.pug': 'pug', - '.purs': 'purescript', - '.py': 'python', - '.pyi': 'python', - '.pyw': 'python', - '.ql': 'ql', - '.qml': 'qmljs', - '.r': 'r', - '.rasi': 'rasi', - '.razor': 'razor', - '.rb': 'ruby', - '.rbs': 'rbs', - '.re': 're2c', - '.rego': 'rego', - '.res': 'rescript', - '.resi': 'rescript', - '.rkt': 'racket', - '.robot': 'robot', - '.roc': 'roc', - '.ron': 'ron', - '.rs': 'rust', - '.rst': 'rst', - '.rtf': 'rtf', - '.s': 'asm', - '.scad': 'openscad', - '.scala': 'scala', - '.scm': 'scheme', - '.scss': 'scss', - '.sh': 'bash', - '.shtml': 'superhtml', - '.sig': 'sml', - '.slang': 'slang', - '.smali': 'smali', - '.smithy': 'smithy', - '.smk': 'snakemake', - '.sml': 'sml', - '.sol': 'solidity', - '.sp': 'sourcepawn', - '.sparql': 'sparql', - '.sql': 'sql', - '.squirrel': 'squirrel', - '.st': 'smalltalk', - '.stan': 'stan', - '.star': 'starlark', - '.sv': 'systemverilog', - '.svelte': 'svelte', - '.svh': 'systemverilog', - '.sw': 'sway', - '.swift': 'swift', - '.tact': 'tact', - '.tal': 'uxntal', - '.tape': 'vhs', - '.tcl': 'tcl', - '.td': 'tablegen', - '.templ': 'templ', - '.tera': 'tera', - '.tex': 'latex', - '.textproto': 'textproto', - '.tf': 'terraform', - '.tfvars': 'terraform', - '.thrift': 'thrift', - '.tl': 'teal', - '.tla': 'tlaplus', - '.todotxt': 'todotxt', - '.toml': 'toml', - '.tres': 'godot_resource', - '.trigger': 'apex', - '.ts': 'typescript', - '.tscn': 'godot_resource', - '.tsconfig': 'typoscript', - '.tsp': 'typespec', - '.tsv': 'tsv', - '.tsx': 'tsx', - '.ttl': 'turtle', - '.twig': 'twig', - // Overly broad - // '.txt': 'vimdoc', - '.typoscript': 'typoscript', - '.typst': 'typst', - '.v': 'v', - '.vb': 'vb', - '.verilog': 'verilog', - '.vhd': 'vhdl', - '.vhdl': 'vhdl', - '.vim': 'vim', - '.vrl': 'vrl', - '.vue': 'vue', - '.w': 'abl', - '.wast': 'wast', - '.wat': 'wat', - '.wgsl': 'wgsl', - '.wit': 'wit', - '.wl': 'wolfram', - '.xml': 'xml', - '.xsl': 'xml', - '.xslt': 'xml', - '.yaml': 'yaml', - '.yml': 'yaml', - '.yuck': 'yuck', - '.zig': 'zig', - '.ziggy': 'ziggy', - '.zsh': 'zsh', -} - -export const DOC_LANGUAGES: ReadonlySet = new Set([ - 'asciidoc', - 'bibtex', - 'djot', - 'doxygen', - 'html', - 'javadoc', - 'jsdoc', - 'latex', - 'luadoc', - 'markdown', - 'markdown_inline', - 'mermaid', - 'norg', - 'norg_meta', - 'org', - 'phpdoc', - 'po', - 'rst', - 'rtf', - 'vimdoc', -]) - -export const CONFIG_LANGUAGES: ReadonlySet = new Set([ - 'beancount', - 'capnp', - 'cedarschema', - 'comment', - 'cooklang', - 'cpon', - 'desktop', - 'devicetree', - 'diff', - 'dtd', - 'editorconfig', - 'ebnf', - 'git_config', - 'gitattributes', - 'gitcommit', - 'gitignore', - 'godot_resource', - 'gomod', - 'gosum', - 'gowork', - 'gpg', - 'hjson', - 'hocon', - 'ini', - 'kdl', - 'ledger', - 'pem', - 'pgn', - 'properties', - 'proto', - 'requirements', - 'ron', - 'smithy', - 'ssh_config', - 'textproto', - 'thrift', - 'todotxt', - 'toml', - 'turtle', - 'typespec', - 'wit', - 'xcompose', - 'xml', - 'yaml', - 'ziggy_schema', -]) - -export const DATA_LANGUAGES: ReadonlySet = new Set([ - 'csv', - 'json', - 'json5', - 'psv', - 'tsv', -]) - -export const ALL_LANGUAGES: ReadonlySet = new Set( - Object.values(EXTENSION_TO_LANGUAGE), -) - -// Code languages = ALL - DOC - CONFIG - DATA -const CODE_LANGUAGES: ReadonlySet = (() => { - const set = new Set(ALL_LANGUAGES) - for (const l of DOC_LANGUAGES) { - set.delete(l) - } - for (const l of CONFIG_LANGUAGES) { - set.delete(l) - } - for (const l of DATA_LANGUAGES) { - set.delete(l) - } - return set -})() - -// Invert EXTENSION_TO_LANGUAGE, collecting duplicates per language. -const LANGUAGE_TO_EXTENSIONS: ReadonlyMap = (() => { - const inv = new Map() - for (const [ext, lang] of Object.entries(EXTENSION_TO_LANGUAGE)) { - const list = inv.get(lang) - if (list === undefined) { - inv.set(lang, [ext]) - } - else { list.push(ext) } - } - return inv -})() - -const CONTENT_TYPE_LANGUAGES: Record> = { - code: CODE_LANGUAGES, - docs: DOC_LANGUAGES, - config: CONFIG_LANGUAGES, -} - -/** - * Detect the language of a file by its extension. - * - * Matching is case-insensitive on the final `.suffix` (mirroring Python's - * `Path(...).suffix.lower()` lookup). - */ -export function detectLanguage(fileName: string): string | undefined { - // Mirror Python's Path(fileName).suffix.lower(): take the substring after - // the last path separator, then return the part from the final '.' onward — - // but only if that '.' is not at the very start of the basename (so - // '.gitignore' resolves to '' just like Python). Both POSIX ('/') and - // Windows ('\\') separators are handled, matching pathlib.Path on Windows. - const lastSep = Math.max(fileName.lastIndexOf('/'), fileName.lastIndexOf('\\')) - const base = fileName.slice(lastSep + 1) - const dot = base.lastIndexOf('.') - if (dot <= 0) { - return undefined - } - return EXTENSION_TO_LANGUAGE[base.slice(dot).toLowerCase()] -} - -/** - * Resolve a set of content types to the union of file extensions associated - * with their languages. Optional `extensions` are added verbatim. The result - * is sorted lexicographically (mirroring Python's `sorted(set)`). - */ -export function getExtensions( - types: readonly ContentType[], - extensions: readonly string[] | undefined, -): string[] { - const languages = new Set() - for (const type of types) { - for (const lang of CONTENT_TYPE_LANGUAGES[type]) { - languages.add(lang) - } - } - const out = new Set() - for (const lang of languages) { - const exts = LANGUAGE_TO_EXTENSIONS.get(lang) - if (exts === undefined) { - continue - } - for (const ext of exts) { - out.add(ext) - } - } - if (extensions !== undefined) { - for (const ext of extensions) { - out.add(ext) - } - } - return [...out].sort() -} diff --git a/src/mcp/.gitkeep b/src/mcp/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/src/mcp/server.test.ts b/src/mcp/server.test.ts deleted file mode 100644 index 8b53e78..0000000 --- a/src/mcp/server.test.ts +++ /dev/null @@ -1,343 +0,0 @@ -import { afterAll, beforeEach, describe, expect, it } from 'bun:test' -import { makeStubModel, SelectableBasicBackend } from '../indexing/dense.ts' -import * as indexing from '../indexing/index.ts' -import { CspIndex } from '../indexing/index.ts' -import { Bm25Index } from '../indexing/sparse.ts' -import { ContentType } from '../types.ts' -import { _internal, createServer, IndexCache } from './server.ts' - -// We intercept CspIndex.fromPath/fromGit by reassigning the static methods on -// the *real* class object (the same reference server.ts imports) rather than -// `mock.module`. Bun's `mock.module` mutates the process-wide module registry -// irreversibly — it would leak the stub into sibling test files (notably -// ../indexing/index.test.ts) that exercise the genuine CspIndex. Static-method -// reassignment is plain property mutation, so `afterAll` can restore it. -let fromPathCalls = 0 -let fromGitCalls = 0 -let fromPathImpl: () => Promise = async () => makeIndex() -let fromGitImpl: () => Promise = async () => makeIndex() - -// A real, empty CspIndex instance: `instanceof CspIndex` holds and `search` -// returns [] for an empty index, matching what these tests assert. -function makeIndex(chunks: CspIndex['chunks'] = []): CspIndex { - const vectors = chunks.map(() => new Float32Array(4)) - return new CspIndex({ - model: makeStubModel(4), - bm25Index: Bm25Index.build(chunks.map(() => ['x'])), - semanticIndex: new SelectableBasicBackend(vectors), - chunks, - modelPath: '/tmp/fake-model', - root: null, - content: [ContentType.CODE], - }) -} - -// IndexCache now routes every in-memory miss through a `loadOrBuild` seam -// (the shared `~/.csp` disk cache in production). These tests don't want to -// touch the real ~/.csp home or the network, so they inject a seam that -// delegates to the static-mocked CspIndex.fromGit/fromPath — preserving the -// fromGitCalls/fromPathCalls counters the existing assertions rely on while -// proving the IndexCache → loadOrBuild → (git vs path) routing. -async function stubLoadOrBuild(source: string, _opts: { content: ContentType[], ref?: string | undefined, modelPath?: string | undefined }): Promise { - return source.startsWith('http://') || source.startsWith('https://') - ? CspIndex.fromGit(source, {}) - : CspIndex.fromPath(source, { content: [ContentType.CODE] }) -} - -const realFromPath = CspIndex.fromPath -const realFromGit = CspIndex.fromGit - -CspIndex.fromPath = async (..._args: Parameters): Promise => { - fromPathCalls++ - return fromPathImpl() -} -CspIndex.fromGit = async (..._args: Parameters): Promise => { - fromGitCalls++ - return fromGitImpl() -} - -afterAll(() => { - // Restore the genuine static methods so later test files see real behavior. - CspIndex.fromPath = realFromPath - CspIndex.fromGit = realFromGit -}) - -beforeEach(() => { - fromPathCalls = 0 - fromGitCalls = 0 - fromPathImpl = async () => makeIndex() - fromGitImpl = async () => makeIndex() -}) - -describe('IndexCache', () => { - it('caches results — second call returns the cached value', async () => { - const cache = new IndexCache({ content: [ContentType.CODE], loadOrBuild: stubLoadOrBuild }) - const first = await cache.get('/tmp/some-repo') - const second = await cache.get('/tmp/some-repo') - expect(second).toBe(first) - expect(fromPathCalls).toBe(1) - }) - - it('deduplicates concurrent get() for the same source', async () => { - const cache = new IndexCache({ loadOrBuild: stubLoadOrBuild }) - const [a, b] = await Promise.all([ - cache.get('/tmp/dedup-repo'), - cache.get('/tmp/dedup-repo'), - ]) - expect(a).toBe(b) - expect(fromPathCalls).toBe(1) - }) - - it('evict() removes the cached entry so the next get() rebuilds', async () => { - const cache = new IndexCache({ loadOrBuild: stubLoadOrBuild }) - await cache.get('/tmp/repo-to-evict') - expect(fromPathCalls).toBe(1) - - await cache.evict('/tmp/repo-to-evict') - - await cache.get('/tmp/repo-to-evict') - expect(fromPathCalls).toBe(2) - }) - - it('LRU: the 11th distinct source evicts the oldest', async () => { - const cache = new IndexCache({ loadOrBuild: stubLoadOrBuild }) - for (let i = 0; i < 10; i++) { - await cache.get(`/tmp/lru-${i}`) - } - expect(cache.size).toBe(10) - - await cache.get('/tmp/lru-10') - expect(cache.size).toBe(10) - - // /tmp/lru-0 was the oldest and should have been evicted — refetch triggers rebuild. - const before = fromPathCalls - await cache.get('/tmp/lru-0') - expect(fromPathCalls).toBe(before + 1) - }) - - it('treats git URLs differently from local paths', async () => { - const cache = new IndexCache({ loadOrBuild: stubLoadOrBuild }) - await cache.get('https://github.com/org/repo') - expect(fromGitCalls).toBe(1) - expect(fromPathCalls).toBe(0) - - await cache.get('/tmp/local-path') - expect(fromPathCalls).toBe(1) - }) - - it('evict() awaitably blocks until the cache entry is gone', async () => { - const cache = new IndexCache({ loadOrBuild: stubLoadOrBuild }) - await cache.get('/tmp/await-evict') - expect(cache.size).toBe(1) - await cache.evict('/tmp/await-evict') - expect(cache.size).toBe(0) - }) - - it('failed get() does not poison the cache entry', async () => { - fromPathImpl = async () => { - throw new Error('boom') - } - - const cache = new IndexCache({ loadOrBuild: stubLoadOrBuild }) - await expect(cache.get('/tmp/will-fail')).rejects.toThrow('boom') - - // After failure, the next call retries. - fromPathImpl = async () => makeIndex() - await expect(cache.get('/tmp/will-fail')).resolves.toBeInstanceOf(indexing.CspIndex) - }) -}) - -describe('IndexCache ↔ disk cache (loadOrBuildIndex routing)', () => { - // A spy seam standing in for loadOrBuildIndex so these tests assert routing - // without touching the real ~/.csp home or the network. Mirrors the cli DI - // seam contract: (source, { content, ref? }) → Promise. - interface LoadOrBuildCall { - source: string - content: ContentType[] - ref: string | undefined - } - - function makeLoadOrBuildSpy(): { - seam: (source: string, opts: { content: ContentType[], ref?: string | undefined }) => Promise - calls: LoadOrBuildCall[] - } { - const calls: LoadOrBuildCall[] = [] - const seam = async ( - source: string, - opts: { content: ContentType[], ref?: string | undefined }, - ): Promise => { - calls.push({ source, content: opts.content, ref: opts.ref }) - return makeIndex() - } - return { seam, calls } - } - - it('get() miss routes the build through the injected loadOrBuild seam', async () => { - const { seam, calls } = makeLoadOrBuildSpy() - const cache = new IndexCache({ content: [ContentType.CODE], loadOrBuild: seam }) - - await cache.get('/tmp/disk-cache-repo') - - // Build went through the disk-cache seam, not the raw fromPath/fromGit path. - expect(calls.length).toBe(1) - expect(calls[0]!.source).toBe('/tmp/disk-cache-repo') - expect(calls[0]!.content).toEqual([ContentType.CODE]) - expect(fromPathCalls).toBe(0) - }) - - it('omits ref when absent and forwards it when present (matches cli key contract)', async () => { - const { seam, calls } = makeLoadOrBuildSpy() - const cache = new IndexCache({ loadOrBuild: seam }) - - await cache.get('https://github.com/org/repo') - expect(calls[0]!.ref).toBeUndefined() - - await cache.get('https://github.com/org/repo', 'v1.2.3') - expect(calls[1]!.ref).toBe('v1.2.3') - }) - - it('cache hit reuses the in-memory entry — seam called once for two gets', async () => { - const { seam, calls } = makeLoadOrBuildSpy() - const cache = new IndexCache({ loadOrBuild: seam }) - - const first = await cache.get('/tmp/hot-repo') - const second = await cache.get('/tmp/hot-repo') - - expect(second).toBe(first) - // In-memory LRU absorbs the second get; the disk seam is not re-consulted. - expect(calls.length).toBe(1) - }) - - it('watcher-style evict invalidates in-memory only — re-get re-routes through seam, no disk deletion', async () => { - const { seam, calls } = makeLoadOrBuildSpy() - const cache = new IndexCache({ loadOrBuild: seam }) - - await cache.get('/tmp/watched-repo') - expect(calls.length).toBe(1) - - // The watcher's job is in-memory eviction only. evict() must NOT delete the - // disk cache entry — content-hash invalidation inside loadOrBuildIndex owns - // that. Proving evict touches only the in-memory slot guards against the - // double-rebuild the STOP condition warns about. - await cache.evict('/tmp/watched-repo') - expect(cache.size).toBe(0) - - await cache.get('/tmp/watched-repo') - // Re-get re-consults the disk seam exactly once; loadOrBuildIndex's own - // content-hash check decides reuse-vs-rebuild on disk (single rebuild). - expect(calls.length).toBe(2) - }) -}) - -describe('getIndex (safety layer)', () => { - it('rejects ssh:// git URLs', async () => { - const cache = new IndexCache({ loadOrBuild: stubLoadOrBuild }) - await expect( - _internal.getIndex('ssh://git@github.com/org/repo.git', undefined, cache), - ).rejects.toThrow(/Only https:\/\/, http:\/\//) - }) - - it('rejects git:// git URLs', async () => { - const cache = new IndexCache({ loadOrBuild: stubLoadOrBuild }) - await expect( - _internal.getIndex('git://github.com/org/repo.git', undefined, cache), - ).rejects.toThrow(/Only https:\/\/, http:\/\//) - }) - - it('rejects file:// pseudo-URLs', async () => { - const cache = new IndexCache({ loadOrBuild: stubLoadOrBuild }) - await expect( - _internal.getIndex('file:///tmp/whatever', undefined, cache), - ).rejects.toThrow(/Only https:\/\/, http:\/\//) - }) - - it('rejects when repo and defaultSource are both undefined', async () => { - const cache = new IndexCache({ loadOrBuild: stubLoadOrBuild }) - await expect(_internal.getIndex(undefined, undefined, cache)).rejects.toThrow( - /No repo specified/, - ) - }) - - it('falls back to defaultSource when repo is undefined', async () => { - const cache = new IndexCache({ loadOrBuild: stubLoadOrBuild }) - const result = await _internal.getIndex(undefined, '/tmp/default-repo', cache) - expect(result).toBeInstanceOf(indexing.CspIndex) - expect(fromPathCalls).toBe(1) - }) - - it('accepts https:// git URLs', async () => { - const cache = new IndexCache({ loadOrBuild: stubLoadOrBuild }) - const result = await _internal.getIndex( - 'https://github.com/org/repo', - undefined, - cache, - ) - expect(result).toBeInstanceOf(indexing.CspIndex) - expect(fromGitCalls).toBe(1) - }) - - it('wraps underlying index errors in a descriptive message', async () => { - fromPathImpl = async () => { - throw new Error('disk full') - } - const cache = new IndexCache({ loadOrBuild: stubLoadOrBuild }) - await expect(_internal.getIndex('/tmp/bad', undefined, cache)).rejects.toThrow( - /Failed to index .*disk full/, - ) - }) -}) - -describe('createServer', () => { - it('returns a server object exposing `search` and `find_related` tools', async () => { - const cache = new IndexCache({ loadOrBuild: stubLoadOrBuild }) - const server = await createServer(cache, '/tmp/default') - - expect(server.tools.has('search')).toBe(true) - expect(server.tools.has('find_related')).toBe(true) - - const searchTool = server.tools.get('search')! - expect(searchTool.title).toBe( - 'Search a codebase with a natural-language or code query.', - ) - - const findRelatedTool = server.tools.get('find_related')! - expect(findRelatedTool.title).toBe( - 'Find code chunks semantically similar to a specific location in a file.', - ) - }) - - it('`search` handler returns "No results" JSON when the index yields nothing', async () => { - const cache = new IndexCache({ loadOrBuild: stubLoadOrBuild }) - const server = await createServer(cache, '/tmp/default') - const searchTool = server.tools.get('search')! - const out = await searchTool.handler({ query: 'foo' }) - expect(JSON.parse(out)).toEqual({ error: 'No results found.' }) - }) - - it('`search` handler surfaces safety errors as plain strings', async () => { - const cache = new IndexCache({ loadOrBuild: stubLoadOrBuild }) - const server = await createServer(cache) // no defaultSource - const searchTool = server.tools.get('search')! - const out = await searchTool.handler({ query: 'foo' }) // no repo either - expect(out).toMatch(/No repo specified/) - }) - - it('`search` handler rejects ssh:// git URLs as a plain-string error', async () => { - const cache = new IndexCache({ loadOrBuild: stubLoadOrBuild }) - const server = await createServer(cache) - const searchTool = server.tools.get('search')! - const out = await searchTool.handler({ - query: 'foo', - repo: 'ssh://git@github.com/org/repo', - }) - expect(out).toMatch(/Only https:\/\/, http:\/\//) - }) - - it('`find_related` handler returns a helpful message when the chunk is missing', async () => { - const cache = new IndexCache({ loadOrBuild: stubLoadOrBuild }) - const server = await createServer(cache, '/tmp/default') - const tool = server.tools.get('find_related')! - const out = await tool.handler({ file_path: 'nope.ts', line: 42 }) - expect(out).toMatch(/No chunk found at nope.ts:42/) - }) -}) diff --git a/src/mcp/server.ts b/src/mcp/server.ts deleted file mode 100644 index 30e617e..0000000 --- a/src/mcp/server.ts +++ /dev/null @@ -1,696 +0,0 @@ -// Port of src/semble/mcp.py - -import type { CspIndex } from '../indexing/index.ts' -import * as fs from 'node:fs/promises' - -import * as path from 'node:path' -import process from 'node:process' -import { loadOrBuildIndex } from '../indexing/cache.ts' -import { loadModel } from '../indexing/index.ts' -import { ContentType } from '../types.ts' -import { formatResults, isGitUrl, resolveChunk } from '../utils.ts' -import { version } from '../version.ts' - -const REPO_DESCRIPTION - = 'https:// or http:// git URL (e.g. https://github.com/org/repo) or local directory path to index and search. ' - + 'Required when no default index was configured at startup. ' - + 'The index is cached after the first call, so repeat queries are fast.' - -const CACHE_MAX_SIZE = 10 // Max number of cached indexes to keep in memory. - -const SERVER_INSTRUCTIONS - = 'Instant code search for any local or remote git repository. ' - + 'Call `search` to find relevant code; call `find_related` on a result to discover similar code elsewhere. ' - + 'When working in a local project, pass the project root as `repo`. ' - + 'For remote repos, pass an explicit https:// URL. Never guess or infer URLs. ' - + 'Prefer these tools over Grep, Glob, or Read for any question about how code works.' - -/** - * A deferred Promise — exposes its resolve/reject for use as a one-shot - * readiness signal (the model-load latch in IndexCache). - */ -interface Deferred { - promise: Promise - resolve: (value: T) => void - reject: (reason?: unknown) => void -} - -function createDeferred(): Deferred { - let resolve!: (value: T) => void - let reject!: (reason?: unknown) => void - const promise = new Promise((res, rej) => { - resolve = res - reject = rej - }) - return { promise, resolve, reject } -} - -/** Resolve a local filesystem path to its canonical absolute form. */ -async function resolvePath(p: string): Promise { - try { - return await fs.realpath(p) - } - catch { - return path.resolve(p) - } -} - -/** - * Disk-cache seam: routes an in-memory cache miss through the shared - * `~/.csp/index/` disk cache. Mirrors the cli DI seam contract so cli and - * mcp compute the same cache key for the same (source, content, ref) — see - * `cli.ts`'s `_defaultLoadOrBuild`. Tests inject a stub to stay off the real - * `~/.csp` home and the network. - */ -export type LoadOrBuildSeam = ( - source: string, - opts: { content: ContentType[], ref?: string | undefined, modelPath?: string | undefined }, -) => Promise - -export interface IndexCacheOptions { - content?: ContentType[] - /** - * Override the disk-cache build path (defaults to {@link loadOrBuildIndex}). - * Injected by tests to assert routing without touching `~/.csp` / network. - */ - loadOrBuild?: LoadOrBuildSeam -} - -/** - * Default disk-cache seam: forward to {@link loadOrBuildIndex}, re-narrowing - * `ref` so an absent ref is omitted rather than passed as explicit `undefined` - * (required under `exactOptionalPropertyTypes`). Identical to cli's - * `_defaultLoadOrBuild` so both layers key the cache the same way. - */ -async function defaultLoadOrBuild( - source: string, - opts: { content: ContentType[], ref?: string | undefined, modelPath?: string | undefined }, -): Promise { - return loadOrBuildIndex(source, { - content: opts.content, - ...(opts.ref !== undefined ? { ref: opts.ref } : {}), - ...(opts.modelPath !== undefined ? { modelPath: opts.modelPath } : {}), - }) -} - -/** - * Cache of indexed repos and local paths for the lifetime of the MCP server - * process. LRU-bounded (10 entries) and deduplicates concurrent requests via - * Promise caching. - */ -export class IndexCache { - // Use a Map for insertion-order semantics (LRU via re-insert). - private readonly tasks = new Map>() - private readonly content: ContentType[] - private readonly loadOrBuild: LoadOrBuildSeam - private readonly modelReady: Deferred - private modelPath: string | null = null - private modelError: unknown = null - private modelLoadStarted = false - private watcherClose: (() => Promise) | null = null - - constructor(options: IndexCacheOptions = {}) { - this.content = options.content ?? [ContentType.CODE] - this.loadOrBuild = options.loadOrBuild ?? defaultLoadOrBuild - this.modelReady = createDeferred() - // Prevent unhandled promise rejection warnings if the model fails to load - // before any caller awaits the promise. Callers of awaitModel() still - // observe the rejection because they await the same promise themselves. - this.modelReady.promise.catch(() => {}) - } - - /** - * Begin loading the embedding model (idempotent). Call from `serve` to - * run model load in parallel with starting the server. If never called - * explicitly, the first `get()` will trigger it. - */ - ensureModelLoading(): void { - if (this.modelLoadStarted) { - return - } - this.modelLoadStarted = true - void (async () => { - try { - const [, modelPath] = await loadModel() - this.modelPath = modelPath - this.modelReady.resolve(modelPath) - } - catch (err) { - this.modelError = err - this.modelReady.reject(err) - } - })() - } - - private async awaitModel(): Promise { - this.ensureModelLoading() - if (this.modelError !== null) { - throw this.modelError - } - return this.modelReady.promise - } - - private async computeCacheKey(source: string, ref?: string): Promise { - if (isGitUrl(source)) { - return ref !== undefined && ref !== '' ? `${source}@${ref}` : source - } - return resolvePath(source) - } - - /** - * Return an index for the requested source, building and caching it on - * first access. Concurrent calls for the same key share a single Promise. - */ - async get(source: string, ref?: string): Promise { - const cacheKey = await this.computeCacheKey(source, ref) - - const existing = this.tasks.get(cacheKey) - if (existing !== undefined) { - // Touch for LRU (move to most-recent end). - this.tasks.delete(cacheKey) - this.tasks.set(cacheKey, existing) - try { - return await existing - } - catch (err) { - // Only evict if this task hasn't already been replaced. - if (this.tasks.get(cacheKey) === existing) { - this.tasks.delete(cacheKey) - } - throw err - } - } - - const modelPath = await this.awaitModel() - - // Re-check after the await: another caller may have populated the entry. - const racedExisting = this.tasks.get(cacheKey) - if (racedExisting !== undefined) { - this.tasks.delete(cacheKey) - this.tasks.set(cacheKey, racedExisting) - return racedExisting - } - - // LRU eviction: drop oldest entry (first inserted). - if (this.tasks.size >= CACHE_MAX_SIZE) { - const oldestKey = this.tasks.keys().next().value - if (oldestKey !== undefined) { - this.tasks.delete(oldestKey) - } - } - - // Route the in-memory miss through the shared disk cache. The seam owns the - // `isGitUrl` branch and the `~/.csp/index/` content-hash reuse/rebuild; - // we only hand it the (source, content, ref) and the pre-warmed modelPath. - // `ref` / `modelPath` are omitted when absent to satisfy - // `exactOptionalPropertyTypes` and to match cli's cache-key contract. - const buildPromise: Promise = this.loadOrBuild(source, { - content: this.content, - ...(ref !== undefined ? { ref } : {}), - ...(modelPath !== undefined ? { modelPath } : {}), - }) - - this.tasks.set(cacheKey, buildPromise) - - try { - return await buildPromise - } - catch (err) { - // Only evict if this task hasn't already been replaced. - if (this.tasks.get(cacheKey) === buildPromise) { - this.tasks.delete(cacheKey) - } - throw err - } - } - - /** - * Remove the cached entry for `source`. Awaitable so callers (notably the - * file watcher) can guarantee the deletion lands before the next `get()`. - */ - async evict(source: string): Promise { - const cacheKey = await this.computeCacheKey(source) - this.tasks.delete(cacheKey) - } - - /** Number of currently cached entries (for tests / introspection). */ - get size(): number { - return this.tasks.size - } - - /** - * Start a background watcher that evicts + re-gets the index whenever - * files at `path` change. Uses chokidar (debounced). - * - * Calling this more than once stops the previous watcher first to avoid - * leaking file handles. - */ - async startWatcher(watchPath: string): Promise { - // Stop any existing watcher before installing a new one. - await this.stopWatcher() - - interface ChokidarWatcher { - on: (event: string, cb: () => void) => void - close: () => Promise - } - interface ChokidarModule { - watch: ( - watchPath: string, - opts: { ignoreInitial: boolean, persistent: boolean }, - ) => ChokidarWatcher - } - let chokidar: ChokidarModule - try { - // Resolve lazily so the module loads even when chokidar is absent. - const mod = (await import('chokidar')) as { default?: ChokidarModule } & ChokidarModule - chokidar = mod.default ?? mod - } - catch { - // chokidar not installed — silently no-op so callers that don't need - // watching still work. - return - } - - // Match semble: watch everything. Upstream relies on the underlying - // walker's .gitignore handling to filter what actually ends up in the - // index; the watcher itself doesn't filter, so projects rooted inside a - // dotfile directory (e.g. ~/.config/proj) still re-index correctly. - const watcher = chokidar.watch(watchPath, { - ignoreInitial: true, - persistent: true, - }) - - let debounce: ReturnType | null = null - const onChange = (): void => { - if (debounce !== null) { - clearTimeout(debounce) - } - debounce = setTimeout(() => { - debounce = null - // Await evict before get so the rebuild sees a fresh cache slot. - void (async () => { - try { - await this.evict(watchPath) - await this.get(watchPath) - } - catch { - // Swallow rebuild errors; the next explicit get() will surface them. - } - })() - }, 250) - } - - watcher.on('add', onChange) - watcher.on('change', onChange) - watcher.on('unlink', onChange) - watcher.on('addDir', onChange) - watcher.on('unlinkDir', onChange) - - this.watcherClose = async () => { - if (debounce !== null) { - clearTimeout(debounce) - } - await watcher.close() - } - } - - /** Stop the file watcher, if any. */ - async stopWatcher(): Promise { - if (this.watcherClose !== null) { - const close = this.watcherClose - this.watcherClose = null - await close() - } - } -} - -/** - * Return a cached index for a repo, rejecting unsafe git transport schemes - * and missing-source cases with descriptive errors. - */ -async function getIndex( - repo: string | undefined, - defaultSource: string | undefined, - cache: IndexCache, -): Promise { - if ( - repo !== undefined - && isGitUrl(repo) - && !repo.startsWith('https://') - && !repo.startsWith('http://') - ) { - throw new Error( - `Only https://, http://, or local directory paths are accepted as \`repo\`. Got: ${JSON.stringify(repo)}`, - ) - } - const source = repo ?? defaultSource - if (source === undefined || source === '') { - throw new Error( - 'No repo specified and no default index. ' - + 'Pass an https:// or http:// git URL or local directory path as `repo`.', - ) - } - try { - return await cache.get(source) - } - catch (exc) { - const msg = exc instanceof Error ? exc.message : String(exc) - throw new Error(`Failed to index ${JSON.stringify(source)}: ${msg}`) - } -} - -// Exported for tests so they can exercise the safety branches without the SDK. -export const _internal = { getIndex } - -/** Configured MCP server (typed loosely so we don't depend on the SDK at compile time). */ -export interface CspMcpServer { - /** Tool registry — exposed for test/introspection. */ - readonly tools: ReadonlyMap - /** True when the real `@modelcontextprotocol/sdk` server backs this object. */ - readonly isPlaceholder: boolean - /** Connect to a transport (no-op for the placeholder). */ - connect: (transport: unknown) => Promise - /** Underlying SDK server, if any. */ - readonly underlying: unknown -} - -interface ToolDef { - title: string - description: string - handler: (args: Record) => Promise -} - -/** - * Build and return a configured MCP server backed by the given cache. - * - * If `@modelcontextprotocol/sdk` is installed, this registers `search` and - * `find_related` tools on a real `McpServer`. If it isn't (yet), a - * placeholder is returned so the rest of the module remains usable and - * testable. - */ -export async function createServer( - cache: IndexCache, - defaultSource?: string, -): Promise { - const searchTool: ToolDef = { - title: 'Search a codebase with a natural-language or code query.', - description: - 'Pass a git URL or local path as `repo` to index it on demand; indexes are cached for the session. ' - + 'Use this to find where something is implemented, understand a library, or locate related code.', - handler: async (args) => { - try { - const query = String(args.query ?? '') - const repo = args.repo === undefined ? undefined : String(args.repo) - const topK - = typeof args.top_k === 'number' - ? args.top_k - : typeof args.topK === 'number' - ? args.topK - : 5 - - const index = await getIndex(repo, defaultSource, cache) - const results = index.search(query, { topK }) - if (results.length === 0) { - return JSON.stringify({ error: 'No results found.' }) - } - return JSON.stringify(formatResults(query, results)) - } - catch (err) { - return err instanceof Error ? err.message : String(err) - } - }, - } - - const findRelatedTool: ToolDef = { - title: 'Find code chunks semantically similar to a specific location in a file.', - description: - 'Use after `search` to explore related implementations or callers. ' - + 'Pass file_path and line from a prior search result.', - handler: async (args) => { - try { - const filePath = String(args.file_path ?? args.filePath ?? '') - const line = Number(args.line ?? 0) - const repo = args.repo === undefined ? undefined : String(args.repo) - const topK - = typeof args.top_k === 'number' - ? args.top_k - : typeof args.topK === 'number' - ? args.topK - : 5 - - const index = await getIndex(repo, defaultSource, cache) - const chunk = resolveChunk(index.chunks, filePath, line) - if (chunk === null) { - return ( - `No chunk found at ${filePath}:${line}. ` - + 'Make sure the file is indexed and the line number is within a known chunk.' - ) - } - const results = index.findRelated(chunk, { topK }) - if (results.length === 0) { - return JSON.stringify({ - error: `No related chunks found for ${filePath}:${line}.`, - }) - } - return JSON.stringify( - formatResults(`Chunks related to ${filePath}:${line}`, results), - ) - } - catch (err) { - return err instanceof Error ? err.message : String(err) - } - }, - } - - const tools = new Map([ - ['search', searchTool], - ['find_related', findRelatedTool], - ]) - - // Try to wire up the real MCP SDK; fall back to a placeholder if it's not - // installed (per the unit spec — Unit 0 may not be merged yet). - type McpServerCtor = new ( - info: { name: string, version?: string }, - options?: { instructions?: string }, - ) => McpServerInstance - interface McpServerInstance { - registerTool: ( - name: string, - config: { title: string, description: string, inputSchema?: unknown }, - handler: (args: Record) => Promise, - ) => void - connect: (transport: unknown) => Promise - } - let McpServer: McpServerCtor | null = null - try { - const mod = (await import('@modelcontextprotocol/sdk/server/mcp.js')) as { - McpServer: McpServerCtor - } - McpServer = mod.McpServer - } - catch { - McpServer = null - } - - if (McpServer === null) { - return { - tools, - isPlaceholder: true, - connect: async () => { - throw new Error( - '@modelcontextprotocol/sdk is not installed; createServer returned a placeholder.', - ) - }, - underlying: null, - } - } - - const underlying = new McpServer( - { name: 'csp', version }, - { instructions: SERVER_INSTRUCTIONS }, - ) - - // The MCP SDK's `registerTool` `inputSchema` expects a Zod raw shape - // (`Record`), not raw JSON Schema. zod is a transitive - // dependency of @modelcontextprotocol/sdk, so if the SDK loaded we should - // be able to load zod too. If it isn't reachable for any reason, fall back - // to registering the tool without an input schema so it's still callable. - interface ZodLikeSchema { - optional: () => ZodLikeSchema - describe: (desc: string) => ZodLikeSchema - default: (value: unknown) => ZodLikeSchema - } - interface ZodLikeModule { - string: () => ZodLikeSchema - number: () => ZodLikeSchema - } - let z: ZodLikeModule | null = null - try { - const zmod = (await import('zod')) as { z?: ZodLikeModule } & ZodLikeModule - z = zmod.z ?? zmod - } - catch { - z = null - } - - const searchSchema = z === null - ? undefined - : { - query: z.string().describe('Natural language or code query.'), - repo: z.string().describe(REPO_DESCRIPTION).optional(), - top_k: z.number().describe('Number of results to return.').default(5), - } - - const findRelatedSchema = z === null - ? undefined - : { - file_path: z - .string() - .describe( - 'Path to the file as stored in the index (use file_path from a search result).', - ), - line: z.number().describe('Line number (1-indexed).'), - repo: z.string().describe(REPO_DESCRIPTION).optional(), - top_k: z.number().describe('Number of similar chunks to return.').default(5), - } - - underlying.registerTool( - 'search', - { - title: searchTool.title, - description: searchTool.description, - ...(searchSchema !== undefined ? { inputSchema: searchSchema } : {}), - }, - async args => ({ - content: [{ type: 'text', text: await searchTool.handler(args) }], - }), - ) - - underlying.registerTool( - 'find_related', - { - title: findRelatedTool.title, - description: findRelatedTool.description, - ...(findRelatedSchema !== undefined ? { inputSchema: findRelatedSchema } : {}), - }, - async args => ({ - content: [ - { type: 'text', text: await findRelatedTool.handler(args) }, - ], - }), - ) - - return { - tools, - isPlaceholder: false, - connect: async transport => underlying.connect(transport), - underlying, - } -} - -export interface ServeOptions { - ref?: string | undefined - content?: ContentType[] -} - -/** - * Start an MCP stdio server, optionally pre-indexing a default source. - * - * Pre-warms the embedding model in parallel with starting the server and - * starts a file watcher for local paths. - */ -export async function serve(path?: string, options: ServeOptions = {}): Promise { - const cache = new IndexCache({ content: options.content ?? [ContentType.CODE] }) - - // Kick off model load + optional pre-index in parallel with server startup. - const prewarm = (async (): Promise => { - try { - cache.ensureModelLoading() - // Wait for the model load to settle before pre-indexing. - // awaitModel is private; ensure the model is ready by triggering and - // catching get() — which itself awaits the model. - if (path !== undefined && path !== '') { - try { - await cache.get(path, options.ref) - } - catch { - // Pre-indexing failure shouldn't crash the server. - } - if (!isGitUrl(path)) { - try { - await cache.startWatcher(path) - } - catch { - // Watcher failure is non-fatal. - } - } - } - } - catch { - // Already logged via modelError; the server can still report errors per-call. - } - })() - - const server = await createServer(cache, path) - - // Attempt to attach stdio transport from the SDK; if not available, log and exit cleanly. - let StdioTransportCtor: - | (new () => { close?: () => Promise | void }) - | null = null - try { - const mod = (await import('@modelcontextprotocol/sdk/server/stdio.js')) as { - StdioServerTransport: new () => { close?: () => Promise | void } - } - StdioTransportCtor = mod.StdioServerTransport - } - catch { - StdioTransportCtor = null - } - - if (StdioTransportCtor === null || server.isPlaceholder) { - // No SDK — nothing to serve. Await pre-warm so callers can inspect the - // cache, then tear down the watcher so this path doesn't leak file - // handles (the prewarm above may have started one). - try { - await prewarm - } - finally { - await cache.stopWatcher() - } - return - } - - // Hook into stdin EOF so we can return once the client disconnects, mirroring - // semble's `run_stdio_async()` blocking semantics. Both listeners share a - // single cleanup so whichever event fires first removes the other — - // otherwise repeated `serve()` calls (tests, restarts) accumulate listeners - // on `process.stdin` and trip MaxListenersExceededWarning. - const stdinClosed = new Promise((resolve) => { - const cleanup = (): void => { - process.stdin.removeListener('end', cleanup) - process.stdin.removeListener('close', cleanup) - resolve() - } - process.stdin.on('end', cleanup) - process.stdin.on('close', cleanup) - }) - - const transport = new StdioTransportCtor() - try { - // connect() must be inside the try so a failure here still runs the - // transport/watcher cleanup below. - await server.connect(transport) - // Block on stdin close — connect() returns immediately after handshake, - // and we MUST NOT close the transport until the client disconnects. - await stdinClosed - // After the client disconnects, drain any pre-warm work that's still in - // flight so we don't orphan promises. - await prewarm - } - finally { - if (typeof transport.close === 'function') { - await transport.close() - } - await cache.stopWatcher() - } -} diff --git a/src/ranking/.gitkeep b/src/ranking/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/src/ranking/boosting.test.ts b/src/ranking/boosting.test.ts deleted file mode 100644 index cf01723..0000000 --- a/src/ranking/boosting.test.ts +++ /dev/null @@ -1,297 +0,0 @@ -import type { Chunk } from '../types.ts' -import { describe, expect, test } from 'bun:test' -import { - _chunkDefinesSymbol, - _countKeywordMatches, - _extractSymbolName, - _stemMatches, - applyQueryBoost, - boostMultiChunkFiles, - - DEFINITION_BOOST_MULTIPLIER, - EMBEDDED_SYMBOL_BOOST_SCALE, - FILE_COHERENCE_BOOST_FRAC, - isSymbolQuery, -} from './boosting.ts' - -function mkChunk(content: string, filePath: string, startLine = 1, endLine = 10): Chunk { - return { content, filePath, startLine, endLine } -} - -describe('isSymbolQuery', () => { - test('PascalCase identifiers are symbol queries', () => { - expect(isSymbolQuery('HandlerStack')).toBe(true) - expect(isSymbolQuery('Client')).toBe(true) - }) - - test('namespace-qualified identifiers are symbol queries', () => { - expect(isSymbolQuery('Sinatra::Base')).toBe(true) - expect(isSymbolQuery('Phoenix.Router')).toBe(true) - expect(isSymbolQuery('foo->bar')).toBe(true) - expect(isSymbolQuery('A\\B\\C')).toBe(true) - }) - - test('leading-underscore identifiers are symbol queries', () => { - expect(isSymbolQuery('_private')).toBe(true) - expect(isSymbolQuery('_')).toBe(true) - }) - - test('snake_case identifiers are symbol queries', () => { - expect(isSymbolQuery('my_func')).toBe(true) - }) - - test('plain lowercase words are NL', () => { - expect(isSymbolQuery('session')).toBe(false) - expect(isSymbolQuery('foo')).toBe(false) - }) - - test('NL phrases are NL', () => { - expect(isSymbolQuery('how does this work')).toBe(false) - expect(isSymbolQuery('find the cache layer')).toBe(false) - }) - - test('trims whitespace', () => { - expect(isSymbolQuery(' HandlerStack ')).toBe(true) - }) -}) - -describe('_extractSymbolName', () => { - test('extracts trailing name after :: separator', () => { - expect(_extractSymbolName('Sinatra::Base')).toBe('Base') - }) - - test('extracts trailing name after .', () => { - expect(_extractSymbolName('Phoenix.Router')).toBe('Router') - }) - - test('extracts trailing name after ->', () => { - expect(_extractSymbolName('foo->bar')).toBe('bar') - }) - - test('returns the original (trimmed) when no separator', () => { - expect(_extractSymbolName('Client')).toBe('Client') - expect(_extractSymbolName(' Client ')).toBe('Client') - }) -}) - -describe('_stemMatches', () => { - test('exact match', () => { - expect(_stemMatches('client', 'client')).toBe(true) - }) - - test('snake-stripped match', () => { - expect(_stemMatches('handler_stack', 'handlerstack')).toBe(true) - }) - - test('plural-stripped match', () => { - expect(_stemMatches('clients', 'client')).toBe(true) - expect(_stemMatches('handler_stacks', 'handlerstack')).toBe(true) - }) - - test('no match', () => { - expect(_stemMatches('foo', 'bar')).toBe(false) - }) -}) - -describe('_chunkDefinesSymbol', () => { - test('matches class definition', () => { - const chunk = mkChunk('class HandlerStack:\n pass\n', 'a.py') - expect(_chunkDefinesSymbol(chunk, 'HandlerStack')).toBe(true) - }) - - test('matches def function', () => { - const chunk = mkChunk('def my_func(x):\n return x\n', 'a.py') - expect(_chunkDefinesSymbol(chunk, 'my_func')).toBe(true) - }) - - test('matches namespace-qualified defmodule for trailing name', () => { - const chunk = mkChunk('defmodule Phoenix.Router do\nend\n', 'a.ex') - expect(_chunkDefinesSymbol(chunk, 'Router')).toBe(true) - }) - - test('case-sensitive: does not match "Module" as keyword', () => { - const chunk = mkChunk('Module Foo', 'a.txt') - expect(_chunkDefinesSymbol(chunk, 'Foo')).toBe(false) - }) - - test('case-insensitive for SQL DDL', () => { - const chunk = mkChunk('create table users (id int);', 'a.sql') - expect(_chunkDefinesSymbol(chunk, 'users')).toBe(true) - const chunk2 = mkChunk('CREATE TABLE users (id int);', 'a.sql') - expect(_chunkDefinesSymbol(chunk2, 'users')).toBe(true) - }) - - test('does not match in the middle of a word', () => { - const chunk = mkChunk('# subclass Foo\n', 'a.py') - expect(_chunkDefinesSymbol(chunk, 'Foo')).toBe(false) - }) -}) - -describe('_countKeywordMatches', () => { - test('all exact matches', () => { - expect(_countKeywordMatches(new Set(['foo', 'bar']), new Set(['foo', 'bar', 'baz']))).toBe(2) - }) - - test('prefix overlap (min 3 chars)', () => { - // "dep" matches "dependency" (keyword shorter than part) - expect(_countKeywordMatches(new Set(['dep']), new Set(['dependency']))).toBe(1) - // "depend" matches "dependencies" (both ≥3, longer.startsWith(shorter)) - expect(_countKeywordMatches(new Set(['depend']), new Set(['dependencies']))).toBe(1) - // Part shorter than keyword also works (shorter is part) - expect(_countKeywordMatches(new Set(['dependency']), new Set(['dep']))).toBe(1) - }) - - test('skips < 3 chars', () => { - expect(_countKeywordMatches(new Set(['de']), new Set(['dependency']))).toBe(0) - }) -}) - -describe('boostMultiChunkFiles', () => { - test('top chunk receives boost_unit * fileSum / maxFileSum', () => { - const c1 = mkChunk('x', 'a.ts', 1, 10) - const c2 = mkChunk('y', 'a.ts', 11, 20) - const c3 = mkChunk('z', 'a.ts', 21, 30) - const cOther = mkChunk('q', 'b.ts') - - const scores = new Map([ - [c1, 0.5], - [c2, 0.4], - [c3, 0.3], - [cOther, 0.2], - ]) - - boostMultiChunkFiles(scores) - - // Top chunk in a.ts is c1 (0.5). file_sum["a.ts"] = 1.2, file_sum["b.ts"] = 0.2. - // max_score = 0.5, boost_unit = 0.5 * 0.2 = 0.1, max_file_sum = 1.2. - // c1 gets: 0.5 + 0.1 * 1.2 / 1.2 = 0.6 - // cOther gets: 0.2 + 0.1 * 0.2 / 1.2 ≈ 0.21666... - expect(scores.get(c1)).toBeCloseTo(0.6, 10) - expect(scores.get(c2)).toBe(0.4) - expect(scores.get(c3)).toBe(0.3) - expect(scores.get(cOther)).toBeCloseTo(0.2 + 0.1 * 0.2 / 1.2, 10) - }) - - test('no-op on empty map', () => { - const scores = new Map() - boostMultiChunkFiles(scores) - expect(scores.size).toBe(0) - }) - - test('no-op when max score is zero', () => { - const c = mkChunk('x', 'a.ts') - const scores = new Map([[c, 0]]) - boostMultiChunkFiles(scores) - expect(scores.get(c)).toBe(0) - }) - - test('no NaN/Infinity when fileSums cancel to zero', () => { - // Positive and negative scores within each file sum to zero → maxFileSum == 0. - // Without the guard, the boost formula would divide by zero and corrupt the scores map. - const c1 = mkChunk('x', 'a.ts', 1, 10) - const c2 = mkChunk('y', 'a.ts', 11, 20) - const scores = new Map([ - [c1, 1.0], - [c2, -1.0], - ]) - boostMultiChunkFiles(scores) - const v1 = scores.get(c1) - const v2 = scores.get(c2) - expect(Number.isFinite(v1 ?? Number.NaN)).toBe(true) - expect(Number.isFinite(v2 ?? Number.NaN)).toBe(true) - // No mutation expected when maxFileSum <= 0. - expect(v1).toBe(1.0) - expect(v2).toBe(-1.0) - }) - - test('uses FILE_COHERENCE_BOOST_FRAC = 0.2', () => { - // Single chunk, single file → fileSum == maxFileSum, so boost = boost_unit. - const c = mkChunk('x', 'a.ts') - const scores = new Map([[c, 1.0]]) - boostMultiChunkFiles(scores) - expect(scores.get(c)).toBeCloseTo(1.0 + 1.0 * FILE_COHERENCE_BOOST_FRAC, 10) - }) -}) - -describe('applyQueryBoost', () => { - test('symbol query with definition keyword boosts chunk by DEFINITION_BOOST_MULTIPLIER * maxScore (1.0× when stem does not match)', () => { - // File stem is "other", not "handlerstack" → 1.0× tier. - const defChunk = mkChunk('class HandlerStack:\n pass\n', 'other.py') - const otherChunk = mkChunk('print("hi")', 'b.py') - - const scores = new Map([ - [defChunk, 0.5], - [otherChunk, 1.0], - ]) - const boosted = applyQueryBoost(scores, 'HandlerStack', [defChunk, otherChunk]) - - // maxScore = 1.0, boostUnit = 1.0 * 3.0 = 3.0; defChunk picks up 3.0 (1.0× tier). - expect(boosted.get(defChunk)).toBeCloseTo(0.5 + 1.0 * DEFINITION_BOOST_MULTIPLIER, 10) - expect(boosted.get(otherChunk)).toBe(1.0) - }) - - test('symbol query with matching file stem gets 1.5× tier boost', () => { - // Stem "handler_stack" matches "handlerstack" after snake-stripping. - const defChunk = mkChunk('class HandlerStack:\n pass\n', 'handler_stack.py') - const scores = new Map([[defChunk, 0.5]]) - const boosted = applyQueryBoost(scores, 'HandlerStack', [defChunk]) - // boostUnit = 0.5 * 3.0 = 1.5; tier = 1.5 * 1.5 = 2.25; new score = 0.5 + 2.25 = 2.75. - expect(boosted.get(defChunk)).toBeCloseTo(2.75, 10) - }) - - test('symbol query promotes non-candidate stem-matching chunks', () => { - const candidate = mkChunk('print("hi")', 'b.py') - const nonCandidate = mkChunk('class HandlerStack:\n pass\n', 'handler_stack.py') - const scores = new Map([[candidate, 1.0]]) - const boosted = applyQueryBoost(scores, 'HandlerStack', [candidate, nonCandidate]) - // Non-candidate appears with score = boostUnit * 1.5 = 1.0 * 3.0 * 1.5 = 4.5. - expect(boosted.get(nonCandidate)).toBeCloseTo(4.5, 10) - }) - - test('NL query with embedded PascalCase triggers half-strength embedded boost', () => { - const defChunk = mkChunk('class StateManager:\n pass\n', 'state_manager.py') - const scores = new Map([[defChunk, 1.0]]) - const boosted = applyQueryBoost( - scores, - 'where does the StateManager initialize state', - [defChunk], - ) - // Embedded boost: tier-with-stem-match = boostUnit * 1.5 - // boostUnit_embedded = 1.0 * DEFINITION_BOOST_MULTIPLIER * EMBEDDED_SYMBOL_BOOST_SCALE = 1.5 - // tier = 1.5 * 1.5 = 2.25 → new score = 1.0 + 2.25 = 3.25 - // Plus possible stem-match boost from `_boostStemMatches`. To avoid that ambiguity, - // assert lower bound. - const expectedEmbedded = DEFINITION_BOOST_MULTIPLIER * EMBEDDED_SYMBOL_BOOST_SCALE * 1.5 - const result = boosted.get(defChunk) ?? 0 - expect(result).toBeGreaterThanOrEqual(1.0 + expectedEmbedded - 1e-9) - }) - - test('returns a new map and does not mutate input', () => { - const c = mkChunk('class Foo:\n pass\n', 'foo.py') - const original = new Map([[c, 1.0]]) - const boosted = applyQueryBoost(original, 'Foo', [c]) - expect(original.get(c)).toBe(1.0) - expect(boosted).not.toBe(original) - expect(boosted.get(c)).toBeGreaterThan(1.0) - }) - - test('empty input returns a fresh map (no aliasing of caller state)', () => { - const empty = new Map() - const out = applyQueryBoost(empty, 'foo', []) - expect(out.size).toBe(0) - // Result must not alias the caller's map: mutating the result must not affect the input. - expect(out).not.toBe(empty) - out.set(mkChunk('x', 'a.ts'), 1) - expect(empty.size).toBe(0) - }) - - test('NL query boosts via stem matches when file path words match', () => { - const c = mkChunk('print("hi")', 'cache_layer.py') - const scores = new Map([[c, 1.0]]) - const boosted = applyQueryBoost(scores, 'find the cache layer', [c]) - // Keywords: {find, the→stopword, cache, layer} → {find, cache, layer}. - // Parts from "cache_layer" split → cache_layer, cache, layer - // Matches: cache, layer → n=2, ratio=2/3, boost = 1.0 * 1.0 * 2/3 - expect(boosted.get(c)).toBeCloseTo(1.0 + 2 / 3, 10) - }) -}) diff --git a/src/ranking/boosting.ts b/src/ranking/boosting.ts deleted file mode 100644 index f524d25..0000000 --- a/src/ranking/boosting.ts +++ /dev/null @@ -1,459 +0,0 @@ -// Port of src/semble/ranking/boosting.py - -import type { Chunk } from '../types.ts' -import { splitIdentifier } from '../tokens.ts' - -// Symbol-lookup queries: namespace-qualified, leading-underscore, or containing -// uppercase/underscore. Plain lowercase words (e.g. "session") are NL, not symbols. -export const SYMBOL_QUERY_RE = /^(?:[A-Z_a-z]\w*(?:(?:::|\\|->|\.)[A-Z_a-z]\w*)+|_\w*|[A-Za-z][\da-z]*[A-Z_]\w*|[A-Z][A-Za-z0-9]*)$/ - -// CamelCase/camelCase identifiers embedded in a NL query; excludes plain words and pure acronyms. -export const EMBEDDED_SYMBOL_RE = /\b(?:[A-Z][a-z][\da-z]*[A-Z][\dA-Za-z]*|[a-z][\da-z]*[A-Z][\dA-Za-z]+)\b/g - -// Minimum stem length for prefix-based non-candidate scan (avoids over-broad matches). -export const EMBEDDED_STEM_MIN_LEN = 4 - -// Half-strength: the symbol may be incidental to the NL query. -export const EMBEDDED_SYMBOL_BOOST_SCALE = 0.5 - -// Case-sensitive: IGNORECASE produces false positives like "Module" in Python docs -// or "Class" method calls in Ruby. -export const DEFINITION_KEYWORDS = [ - 'class', - 'module', - 'defmodule', // Elixir - 'def', - 'interface', - 'struct', - 'enum', - 'trait', - 'type', - 'func', - 'function', - 'object', - 'abstract class', - 'data class', - 'fn', - 'fun', // Kotlin - 'package', - 'namespace', - 'protocol', // Swift - 'record', // C# 9+, Java 16+ - 'typedef', // C/C++/Dart -] as const - -// SQL DDL is conventionally all-caps or all-lowercase; match both via IGNORECASE. -export const SQL_DEFINITION_KEYWORDS = [ - 'CREATE TABLE', - 'CREATE VIEW', - 'CREATE PROCEDURE', - 'CREATE FUNCTION', -] as const - -// Additive boost multiplier for chunks that define a queried symbol. -export const DEFINITION_BOOST_MULTIPLIER = 3.0 - -// Additive boost multiplier for NL queries when file stems match query words. -export const STEM_BOOST_MULTIPLIER = 1.0 - -// Fraction of max_score added to each file's top chunk, scaled by its aggregate candidate score. -export const FILE_COHERENCE_BOOST_FRAC = 0.2 - -// Common English stopwords excluded from file-stem matching for NL queries. -export const STOPWORDS: ReadonlySet = new Set( - ('a an and are as at be by do does for from has have how if in is it not of on or the to was' - + ' what when where which who why with').split(' '), -) - -function escapeRegex(s: string): string { - return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') -} - -/** Find the max numeric value in an iterable without spreading (avoids argument-count limits). */ -function maxValue(values: Iterable): number { - let m = Number.NEGATIVE_INFINITY - for (const v of values) { - if (v > m) { - m = v - } - } - return m -} - -const KEYWORD_PREFIX = '(?:^|(?<=\\s))(?:' -const DEFINITION_KEYWORD_BODY = DEFINITION_KEYWORDS.map(escapeRegex).join('|') -const SQL_KEYWORD_BODY = SQL_DEFINITION_KEYWORDS.map(escapeRegex).join('|') - -/** Return True if the query looks like a bare symbol or namespace-qualified identifier. */ -export function isSymbolQuery(query: string): boolean { - return SYMBOL_QUERY_RE.test(query.trim()) -} - -/** Apply query-type boosts to candidate scores. Returns a new Map. */ -export function applyQueryBoost( - combinedScores: Map, - query: string, - allChunks: Chunk[], -): Map { - if (combinedScores.size === 0) { - // Always return a fresh Map to honor the non-mutating contract; do not alias caller state. - return new Map() - } - - const maxScore = maxValue(combinedScores.values()) - const boosted = new Map(combinedScores) - - if (isSymbolQuery(query)) { - _boostSymbolDefinitions(boosted, query, maxScore, allChunks) - } - else { - _boostStemMatches(boosted, query, maxScore) - _boostEmbeddedSymbols(boosted, query, maxScore, allChunks) - } - - return boosted -} - -/** Promote files with multiple high-scoring chunks by boosting their top chunk (in-place). */ -export function boostMultiChunkFiles(scores: Map): void { - if (scores.size === 0) { - return - } - - const maxScore = maxValue(scores.values()) - if (maxScore === 0.0) { - return - } - - const fileSum = new Map() - const bestChunk = new Map() - for (const [chunk, score] of scores) { - const filePath = chunk.filePath - fileSum.set(filePath, (fileSum.get(filePath) ?? 0.0) + score) - const existingBest = bestChunk.get(filePath) - if (existingBest === undefined || score > (scores.get(existingBest) ?? -Infinity)) { - bestChunk.set(filePath, chunk) - } - } - - const maxFileSum = maxValue(fileSum.values()) - // Guard against zero/negative maxFileSum to avoid NaN / Infinity from the division below - // (e.g. when positive and negative chunk scores cancel out within every file). - if (maxFileSum <= 0) { - return - } - const boostUnit = maxScore * FILE_COHERENCE_BOOST_FRAC - for (const [filePath, chunk] of bestChunk) { - const sum = fileSum.get(filePath) ?? 0.0 - scores.set(chunk, (scores.get(chunk) ?? 0.0) + boostUnit * sum / maxFileSum) - } -} - -/** - * Extract the final identifier from a possibly namespace-qualified query. - * - * Examples: "Sinatra::Base" → "Base", "Client" → "Client". - */ -export function _extractSymbolName(query: string): string { - for (const separator of ['::', '\\', '->', '.']) { - const idx = query.lastIndexOf(separator) - if (idx !== -1) { - return query.slice(idx + separator.length) - } - } - return query.trim() -} - -// LRU-ish cache for compiled definition patterns; simple FIFO eviction at 256 entries. -const DEFINITION_PATTERN_CACHE_MAX = 256 -const _definitionPatternCache = new Map() - -export function _definitionPattern(symbolName: string): [RegExp, RegExp] { - const cached = _definitionPatternCache.get(symbolName) - if (cached !== undefined) { - return cached - } - - const escaped = escapeRegex(symbolName) - const nsPrefix = '(?:[A-Z_a-z]\\w*(?:\\.|::))*' - const suffix = `)\\s+${nsPrefix}${escaped}(?:\\s|[<({:\\[;]|$)` - const general = new RegExp(KEYWORD_PREFIX + DEFINITION_KEYWORD_BODY + suffix, 'm') - const sql = new RegExp(KEYWORD_PREFIX + SQL_KEYWORD_BODY + suffix, 'im') - const entry: [RegExp, RegExp] = [general, sql] - - if (_definitionPatternCache.size >= DEFINITION_PATTERN_CACHE_MAX) { - // FIFO eviction: drop the oldest entry. - const firstKey = _definitionPatternCache.keys().next().value - if (firstKey !== undefined) { - _definitionPatternCache.delete(firstKey) - } - } - _definitionPatternCache.set(symbolName, entry) - return entry -} - -/** - * Return True if the chunk contains a definition of *symbolName*. - * - * Case-sensitive for general keywords, case-insensitive for SQL DDL. - * Also matches namespace-qualified forms (e.g. `defmodule Phoenix.Router` for `Router`). - */ -export function _chunkDefinesSymbol(chunk: Chunk, symbolName: string): boolean { - const [general, sql] = _definitionPattern(symbolName) - return general.test(chunk.content) || sql.test(chunk.content) -} - -// Mirror Python's `str.rstrip("s")`: strip all trailing 's' characters. -function stripTrailingS(s: string): string { - return s.endsWith('s') ? s.replace(/s+$/, '') : s -} - -/** Return True if *stem* matches *name* (exact, snake_case-normalised, or plural). */ -export function _stemMatches(stem: string, name: string): boolean { - const stemNorm = stem.replace(/_/g, '') - return stem === name - || stemNorm === name - || stripTrailingS(stem) === name - || stripTrailingS(stemNorm) === name -} - -function pathStemOriginal(filePath: string): string { - // Match Python's pathlib.Path.stem: filename without suffix; handles both / and \. - // Path.stem leaves leading-dot files untouched (".gitignore" → ".gitignore"). - const sepIdx = Math.max(filePath.lastIndexOf('/'), filePath.lastIndexOf('\\')) - const base = sepIdx === -1 ? filePath : filePath.slice(sepIdx + 1) - const dotIdx = base.lastIndexOf('.') - return dotIdx <= 0 ? base : base.slice(0, dotIdx) -} - -function pathStemLower(filePath: string): string { - return pathStemOriginal(filePath).toLowerCase() -} - -function pathParentName(filePath: string): string { - // Strip trailing separators, then take the segment before the basename. - const cleaned = filePath.replace(/[/\\]+$/, '') - const sepIdx = Math.max(cleaned.lastIndexOf('/'), cleaned.lastIndexOf('\\')) - if (sepIdx === -1) { - return '' - } - const parent = cleaned.slice(0, sepIdx) - const parentSepIdx = Math.max(parent.lastIndexOf('/'), parent.lastIndexOf('\\')) - return parentSepIdx === -1 ? parent : parent.slice(parentSepIdx + 1) -} - -/** Return the boost amount for a chunk that defines one of *names* (0.0 if none match). */ -export function _definitionTier(chunk: Chunk, names: Set, boostUnit: number): number { - let matches = false - for (const name of names) { - if (_chunkDefinesSymbol(chunk, name)) { - matches = true - break - } - } - if (!matches) { - return 0.0 - } - const stem = pathStemLower(chunk.filePath) - for (const name of names) { - if (_stemMatches(stem, name.toLowerCase())) { - return boostUnit * 1.5 - } - } - return boostUnit * 1.0 -} - -/** Boost non-candidate chunks whose lowercased file stem satisfies stemOk (in-place). */ -export function _scanNonCandidates( - boosted: Map, - names: Set, - boostUnit: number, - allChunks: Chunk[], - stemOk: (stem: string) => boolean, -): void { - for (const chunk of allChunks) { - if (boosted.has(chunk)) { - continue - } - if (!stemOk(pathStemLower(chunk.filePath))) { - continue - } - const tier = _definitionTier(chunk, names, boostUnit) - if (tier !== 0.0) { - boosted.set(chunk, tier) - } - } -} - -/** Boost chunks that define the queried symbol, scanning candidates and stem-matched non-candidates (in-place). */ -export function _boostSymbolDefinitions( - boosted: Map, - query: string, - maxScore: number, - allChunks: Chunk[], -): void { - const symbolName = _extractSymbolName(query) - const names = new Set([symbolName]) - const trimmed = query.trim() - if (symbolName !== trimmed) { - names.add(trimmed) - } - - const boostUnit = maxScore * DEFINITION_BOOST_MULTIPLIER - - // Iterate keys() directly: we only update existing entries, never add/delete during iteration. - for (const chunk of boosted.keys()) { - const tier = _definitionTier(chunk, names, boostUnit) - if (tier !== 0.0) { - boosted.set(chunk, (boosted.get(chunk) ?? 0.0) + tier) - } - } - - const symbolLower = symbolName.toLowerCase() - _scanNonCandidates( - boosted, - names, - boostUnit, - allChunks, - stem => _stemMatches(stem, symbolLower), - ) -} - -/** - * Boost chunks defining CamelCase/camelCase symbols embedded in NL queries (in-place). - * - * Half-strength vs pure symbol queries. Non-candidate scan uses stem-prefix match - * so e.g. `state.ts` is found for symbol `StateManager`. - */ -export function _boostEmbeddedSymbols( - boosted: Map, - query: string, - maxScore: number, - allChunks: Chunk[], -): void { - const names = new Set(query.match(EMBEDDED_SYMBOL_RE) ?? []) - if (names.size === 0) { - return - } - - const boostUnit = maxScore * DEFINITION_BOOST_MULTIPLIER * EMBEDDED_SYMBOL_BOOST_SCALE - - // Iterate keys() directly: we only update existing entries, never add/delete during iteration. - for (const chunk of boosted.keys()) { - const tier = _definitionTier(chunk, names, boostUnit) - if (tier !== 0.0) { - boosted.set(chunk, (boosted.get(chunk) ?? 0.0) + tier) - } - } - - const symbolsLower: string[] = Array.from(names, s => s.toLowerCase()) - for (const chunk of allChunks) { - if (boosted.has(chunk)) { - continue - } - const stem = pathStemLower(chunk.filePath) - const stemNorm = stem.replace(/_/g, '') - let matches = false - for (const symbolLower of symbolsLower) { - if ( - stem === symbolLower - || stemNorm === symbolLower - || (stem.length >= EMBEDDED_STEM_MIN_LEN && symbolLower.startsWith(stem)) - || (stemNorm.length >= EMBEDDED_STEM_MIN_LEN && symbolLower.startsWith(stemNorm)) - ) { - matches = true - break - } - } - if (!matches) { - continue - } - const tier = _definitionTier(chunk, names, boostUnit) - if (tier !== 0.0) { - boosted.set(chunk, tier) - } - } -} - -/** Count query keywords that match path parts, allowing prefix overlap (min 3 chars). */ -export function _countKeywordMatches(keywords: Set, parts: Set): number { - let exactCount = 0 - const exact = new Set() - for (const k of keywords) { - if (parts.has(k)) { - exact.add(k) - exactCount++ - } - } - if (exactCount === keywords.size) { - return exactCount - } - let nMatches = exactCount - for (const keyword of keywords) { - if (exact.has(keyword)) { - continue - } - for (const part of parts) { - // Avoid array allocation + destructuring on every iteration; pick shorter/longer directly. - const shorter = keyword.length <= part.length ? keyword : part - const longer = keyword.length <= part.length ? part : keyword - if (shorter.length >= 3 && longer.startsWith(shorter)) { - nMatches++ - break - } - } - } - return nMatches -} - -const QUERY_WORD_RE = /[A-Z_]\w*/gi - -/** - * Boost chunks whose file paths match NL query keywords (in-place). - * - * Uses prefix matching for morphological variants (e.g. "dependency" matches - * "dependencies"). Matches file stems and the immediate parent directory name. - */ -export function _boostStemMatches( - boosted: Map, - query: string, - maxScore: number, -): void { - const keywords = new Set() - for (const word of query.match(QUERY_WORD_RE) ?? []) { - if (word.length > 2) { - const lower = word.toLowerCase() - if (!STOPWORDS.has(lower)) { - keywords.add(lower) - } - } - } - if (keywords.size === 0) { - return - } - - const boost = maxScore * STEM_BOOST_MULTIPLIER - const pathCache = new Map>() - // Iterate keys() directly: we only update existing entries, never add/delete during iteration. - for (const chunk of boosted.keys()) { - let parts = pathCache.get(chunk.filePath) - if (parts === undefined) { - // Use original-case stem so splitIdentifier sees camelCase boundaries. - parts = new Set(splitIdentifier(pathStemOriginal(chunk.filePath))) - const parentName = pathParentName(chunk.filePath) - if (parentName !== '' && parentName !== '.' && parentName !== '/' && parentName !== '..') { - for (const p of splitIdentifier(parentName)) { - parts.add(p) - } - } - pathCache.set(chunk.filePath, parts) - } - const nMatches = _countKeywordMatches(keywords, parts) - if (nMatches > 0) { - const matchRatio = nMatches / keywords.size - if (matchRatio >= 0.10) { - boosted.set(chunk, (boosted.get(chunk) ?? 0.0) + boost * matchRatio) - } - } - } -} diff --git a/src/ranking/penalties.test.ts b/src/ranking/penalties.test.ts deleted file mode 100644 index 52bbcf3..0000000 --- a/src/ranking/penalties.test.ts +++ /dev/null @@ -1,200 +0,0 @@ -// Tests for src/ranking/penalties.ts — parity checked against the Python source. -import { describe, expect, it } from 'bun:test' - -import { - _filePathPenalty, - FILE_SATURATION_DECAY, - MILD_PENALTY, - MODERATE_PENALTY, - rerankTopK, - STRONG_PENALTY, -} from './penalties.ts' - -interface Chunk { - content: string - filePath: string - startLine: number - endLine: number - language?: string -} - -function makeChunk(filePath: string, idx = 0): Chunk { - return { - content: `chunk ${idx}`, - filePath, - startLine: idx, - endLine: idx + 1, - } -} - -describe('_filePathPenalty', () => { - it('penalises JS/TS test files with STRONG_PENALTY', () => { - expect(_filePathPenalty('src/foo.test.ts')).toBe(STRONG_PENALTY) - }) - - it('penalises .spec.tsx files with STRONG_PENALTY', () => { - expect(_filePathPenalty('src/foo.spec.tsx')).toBe(STRONG_PENALTY) - }) - - it('penalises __init__.py with MODERATE_PENALTY (re-export barrel)', () => { - expect(_filePathPenalty('src/__init__.py')).toBe(MODERATE_PENALTY) - }) - - it('penalises .d.ts type stubs with MILD_PENALTY', () => { - expect(_filePathPenalty('src/foo.d.ts')).toBe(MILD_PENALTY) - }) - - it('penalises files under tests/ — TEST_DIR + TEST_FILE share one STRONG branch', () => { - // Python parity: only one STRONG_PENALTY multiplication regardless of how - // many of {TEST_FILE_RE, TEST_DIR_RE} match (they are OR'd in one branch). - expect(_filePathPenalty('tests/test_foo.py')).toBeCloseTo(STRONG_PENALTY, 10) - }) - - it('returns 1.0 for ordinary source files', () => { - expect(_filePathPenalty('src/foo.ts')).toBe(1.0) - }) - - it('compounds STRONG (examples/) and STRONG (.test.ts) penalties', () => { - // Python: examples/foo.test.ts -> 0.09 - expect(_filePathPenalty('examples/foo.test.ts')).toBeCloseTo(STRONG_PENALTY * STRONG_PENALTY, 10) - }) - - it('compounds MILD (.d.ts) and MODERATE (__init__) penalties', () => { - // Python: src/__init__.d.ts -> 0.7 (only .d.ts matches; basename is __init__.d.ts) - expect(_filePathPenalty('src/__init__.d.ts')).toBe(MILD_PENALTY) - }) - - it('penalises compat dirs with STRONG_PENALTY', () => { - expect(_filePathPenalty('compat/foo.ts')).toBe(STRONG_PENALTY) - }) - - it('penalises examples dirs with STRONG_PENALTY', () => { - expect(_filePathPenalty('examples/foo.ts')).toBe(STRONG_PENALTY) - }) - - it('normalises backslashes to forward slashes before matching', () => { - expect(_filePathPenalty('src\\foo.test.ts')).toBe(STRONG_PENALTY) - }) - - it('handles bare __init__.py basename without path', () => { - expect(_filePathPenalty('__init__.py')).toBe(MODERATE_PENALTY) - }) - - it('penalises Go _test.go files', () => { - expect(_filePathPenalty('pkg/foo_test.go')).toBe(STRONG_PENALTY) - }) - - it('penalises Java FooTests.java files', () => { - expect(_filePathPenalty('src/FooTests.java')).toBe(STRONG_PENALTY) - }) - - it('penalises legacy dirs with STRONG_PENALTY', () => { - expect(_filePathPenalty('legacy/foo.ts')).toBe(STRONG_PENALTY) - }) -}) - -describe('rerankTopK', () => { - it('returns an empty list for empty input', () => { - expect(rerankTopK(new Map(), 5)).toEqual([]) - }) - - it('returns an empty list for non-positive topK', () => { - const a = makeChunk('a.ts', 0) - const scores = new Map([[a, 1.0]]) - expect(rerankTopK(scores, 0)).toEqual([]) - expect(rerankTopK(scores, -1)).toEqual([]) - expect(rerankTopK(scores, -5)).toEqual([]) - }) - - it('applies saturation decay to chunks from the same file', () => { - // 4 chunks from the same file, all initial score 1.0, no path penalty. - const a = makeChunk('src/foo.ts', 0) - const b = makeChunk('src/foo.ts', 1) - const c = makeChunk('src/foo.ts', 2) - const d = makeChunk('src/foo.ts', 3) - const scores = new Map([ - [a, 1.0], - [b, 1.0], - [c, 1.0], - [d, 1.0], - ]) - const result = rerankTopK(scores, 4, { penalisePaths: false }) - expect(result).toHaveLength(4) - // Sorted descending after decay; ties preserved by sort stability of computation. - const finalScores = result.map(([, s]) => s) - // First chunk picked: 1.0 (no decay) - // Second: 1.0 * 0.5 = 0.5 - // Third: 1.0 * 0.25 = 0.25 - // Fourth: 1.0 * 0.125 = 0.125 - expect(finalScores[0]).toBeCloseTo(1.0, 10) - expect(finalScores[1]).toBeCloseTo(FILE_SATURATION_DECAY, 10) - expect(finalScores[2]).toBeCloseTo(FILE_SATURATION_DECAY ** 2, 10) - expect(finalScores[3]).toBeCloseTo(FILE_SATURATION_DECAY ** 3, 10) - }) - - it('truncates to topK after sorting', () => { - const a = makeChunk('a.ts', 0) - const b = makeChunk('b.ts', 1) - const c = makeChunk('c.ts', 2) - const scores = new Map([ - [a, 0.5], - [b, 0.9], - [c, 0.1], - ]) - const result = rerankTopK(scores, 2, { penalisePaths: false }) - expect(result).toHaveLength(2) - expect(result[0]![0]).toBe(b) - expect(result[1]![0]).toBe(a) - }) - - it('applies path penalties before sorting when enabled', () => { - // a is a test file (penalty 0.3), b is normal. a wins pre-penalty, b wins post. - const a = makeChunk('src/foo.test.ts', 0) - const b = makeChunk('src/foo.ts', 1) - const scores = new Map([ - [a, 0.9], - [b, 0.5], - ]) - const result = rerankTopK(scores, 2) - expect(result[0]![0]).toBe(b) - expect(result[1]![0]).toBe(a) - expect(result[0]![1]).toBeCloseTo(0.5, 10) - expect(result[1]![1]).toBeCloseTo(0.9 * STRONG_PENALTY, 10) - }) - - it('does not apply path penalties when penalisePaths is false', () => { - const a = makeChunk('src/foo.test.ts', 0) - const b = makeChunk('src/foo.ts', 1) - const scores = new Map([ - [a, 0.9], - [b, 0.5], - ]) - const result = rerankTopK(scores, 2, { penalisePaths: false }) - expect(result[0]![0]).toBe(a) - expect(result[0]![1]).toBeCloseTo(0.9, 10) - expect(result[1]![0]).toBe(b) - expect(result[1]![1]).toBeCloseTo(0.5, 10) - }) - - it('mixes saturation decay across multiple files', () => { - // Two files, two chunks each. All score 1.0. topK = 4. - const a1 = makeChunk('a.ts', 0) - const a2 = makeChunk('a.ts', 1) - const b1 = makeChunk('b.ts', 2) - const b2 = makeChunk('b.ts', 3) - const scores = new Map([ - [a1, 1.0], - [a2, 1.0], - [b1, 1.0], - [b2, 1.0], - ]) - const result = rerankTopK(scores, 4, { penalisePaths: false }) - expect(result).toHaveLength(4) - // First two picked at 1.0 (first of each file), next two at 0.5. - const top = result.map(([, s]) => s) - expect(top[0]).toBeCloseTo(1.0, 10) - expect(top[1]).toBeCloseTo(1.0, 10) - expect(top[2]).toBeCloseTo(FILE_SATURATION_DECAY, 10) - expect(top[3]).toBeCloseTo(FILE_SATURATION_DECAY, 10) - }) -}) diff --git a/src/ranking/penalties.ts b/src/ranking/penalties.ts deleted file mode 100644 index 0758a27..0000000 --- a/src/ranking/penalties.ts +++ /dev/null @@ -1,183 +0,0 @@ -// Port of src/semble/ranking/penalties.py - -import type { Chunk } from '../types.ts' - -// Patterns that identify test files across common languages. -// Grouped by language for readability; combined into a single regex. -export const TEST_FILE_RE = new RegExp( - '(?:^|/)' - + '(?:' - // Python - + 'test_[^/]*\\.py' // test_foo.py - + '|[^/]*_test\\.py' // foo_test.py - // Go - + '|[^/]*_test\\.go' // foo_test.go - // Java - + '|[^/]*Tests?\\.java' // FooTest.java / FooTests.java - // PHP - + '|[^/]*Test\\.php' // FooTest.php - // Ruby - + '|[^/]*_spec\\.rb' // foo_spec.rb - + '|[^/]*_test\\.rb' // foo_test.rb - // JavaScript / TypeScript - + '|[^/]*\\.test\\.[jt]sx?' // foo.test.js/ts/jsx/tsx - + '|[^/]*\\.spec\\.[jt]sx?' // foo.spec.js/ts/jsx/tsx - // Kotlin - + '|[^/]*Tests?\\.kt' // FooTest.kt / FooTests.kt - + '|[^/]*Spec\\.kt' // FooSpec.kt (Kotest) - // Swift - + '|[^/]*Tests?\\.swift' // FooTests.swift (XCTest) - + '|[^/]*Spec\\.swift' // FooSpec.swift (Quick) - // C# - + '|[^/]*Tests?\\.cs' // FooTest.cs / FooTests.cs - // C / C++ - + '|test_[^/]*\\.cpp' // test_foo.cpp (Google Test) - + '|[^/]*_test\\.cpp' // foo_test.cpp (Google Test) - + '|test_[^/]*\\.c' // test_foo.c - + '|[^/]*_test\\.c' // foo_test.c - // Scala - + '|[^/]*Spec\\.scala' // FooSpec.scala (ScalaTest) - + '|[^/]*Suite\\.scala' // FooSuite.scala (MUnit) - + '|[^/]*Test\\.scala' // FooTest.scala - // Dart - + '|[^/]*_test\\.dart' // foo_test.dart - + '|test_[^/]*\\.dart' // test_foo.dart - // Lua - + '|[^/]*_spec\\.lua' // foo_spec.lua (busted) - + '|[^/]*_test\\.lua' // foo_test.lua - + '|test_[^/]*\\.lua' // test_foo.lua (luaunit) - // Shared helper patterns (all languages) - + '|test_helper[^/]*\\.\\w+' // test_helpers.go, test_helper.rb, etc. - + ')$', -) - -// Test/spec directories. -export const TEST_DIR_RE = /(?:^|\/)(?:tests?|__tests__|spec|testing)(?:\/|$)/ - -// Compat/legacy path components. -export const COMPAT_DIR_RE = /(?:^|\/)(?:compat|_compat|legacy)(?:\/|$)/ - -// Examples/docs path components. -export const EXAMPLES_DIR_RE = /(?:^|\/)(?:_?examples?|docs?_src)(?:\/|$)/ - -// TypeScript declaration files (.d.ts stubs). -export const TYPE_DEFS_RE = /\.d\.ts$/ - -export const STRONG_PENALTY = 0.3 // test files, compat shims, example/doc code -export const MODERATE_PENALTY = 0.5 // re-export / metadata files -export const MILD_PENALTY = 0.7 // .d.ts declaration stubs (still carry useful type info) - -// Filenames that are re-export barrels or package-level metadata. -export const REEXPORT_FILENAMES = new Set(['__init__.py', 'package-info.java']) - -// Maximum chunks from the same file before a saturation penalty is applied. -export const FILE_SATURATION_THRESHOLD = 1 - -// Multiplicative penalty per extra chunk from the same file beyond the threshold. -export const FILE_SATURATION_DECAY = 0.5 - -/** - * Select top-k results with optional file-path penalties and file-saturation decay. - * - * When `penalisePaths` is true, path penalties are applied before sorting. - * Saturation decay is applied greedily during the greedy pass; because decay - * only reduces scores and candidates are pre-sorted descending, early exit is - * safe once the remaining scores cannot beat the current k-th best. - */ -export function rerankTopK( - scores: Map, - topK: number, - options: { penalisePaths?: boolean } = {}, -): Array<[Chunk, number]> { - const penalisePaths = options.penalisePaths ?? true - - if (scores.size === 0 || topK <= 0) { - return [] - } - - // Apply file-path penalties. - const penaltyCache = new Map() - const penalised = new Map() - for (const [chunk, score] of scores) { - if (penalisePaths) { - let cached = penaltyCache.get(chunk.filePath) - if (cached === undefined) { - cached = _filePathPenalty(chunk.filePath) - penaltyCache.set(chunk.filePath, cached) - } - penalised.set(chunk, score * cached) - } - else { - penalised.set(chunk, score) - } - } - - // Sort by penalised score (highest first) — single sort. - const ranked = [...penalised.keys()].sort((a, b) => { - const sa = penalised.get(a) as number - const sb = penalised.get(b) as number - return sb - sa - }) - - const fileSelected = new Map() - const selected: Array<[number, Chunk]> = [] - let minSelected = Number.POSITIVE_INFINITY - - for (const chunk of ranked) { - const penScore = penalised.get(chunk) as number - - if (selected.length >= topK && penScore <= minSelected) { - break - } - - const alreadySelected = fileSelected.get(chunk.filePath) ?? 0 - let effScore = penScore - if (alreadySelected >= FILE_SATURATION_THRESHOLD) { - const excess = alreadySelected - FILE_SATURATION_THRESHOLD + 1 - effScore *= FILE_SATURATION_DECAY ** excess - } - - selected.push([effScore, chunk]) - fileSelected.set(chunk.filePath, alreadySelected + 1) - - if (selected.length >= topK) { - let m = Number.POSITIVE_INFINITY - for (const [s] of selected) { - if (s < m) { - m = s - } - } - minSelected = m - } - } - - selected.sort((a, b) => b[0] - a[0]) - return selected.slice(0, topK).map(([score, chunk]) => [chunk, score]) -} - -/** - * Return a combined multiplicative penalty for all applicable path patterns. - */ -export function _filePathPenalty(filePath: string): number { - const normalised = filePath.replace(/\\/g, '/') - let penalty = 1.0 - if (TEST_FILE_RE.test(normalised) || TEST_DIR_RE.test(normalised)) { - penalty *= STRONG_PENALTY - } - // Match Python's Path(file_path).name (POSIX semantics): only forward-slash - // is a separator. Backslashes in the raw path are part of the filename. - const basename = filePath.slice(filePath.lastIndexOf('/') + 1) - if (REEXPORT_FILENAMES.has(basename)) { - penalty *= MODERATE_PENALTY - } - if (COMPAT_DIR_RE.test(normalised)) { - penalty *= STRONG_PENALTY - } - if (EXAMPLES_DIR_RE.test(normalised)) { - penalty *= STRONG_PENALTY - } - if (TYPE_DEFS_RE.test(normalised)) { - penalty *= MILD_PENALTY - } - return penalty -} diff --git a/src/ranking/weighting.test.ts b/src/ranking/weighting.test.ts deleted file mode 100644 index 56c31db..0000000 --- a/src/ranking/weighting.test.ts +++ /dev/null @@ -1,28 +0,0 @@ -import { describe, expect, test } from 'bun:test' -import { ALPHA_NL, ALPHA_SYMBOL, resolveAlpha } from './weighting.ts' - -describe('resolveAlpha', () => { - test('returns ALPHA_NL for plain lowercase queries', () => { - expect(resolveAlpha('session', null)).toBe(0.5) - expect(resolveAlpha('session', null)).toBe(ALPHA_NL) - }) - - test('returns ALPHA_SYMBOL for PascalCase symbol queries', () => { - expect(resolveAlpha('HandlerStack', null)).toBe(0.3) - expect(resolveAlpha('HandlerStack', null)).toBe(ALPHA_SYMBOL) - }) - - test('returns the provided alpha when set', () => { - expect(resolveAlpha('foo', 0.7)).toBe(0.7) - expect(resolveAlpha('HandlerStack', 0.9)).toBe(0.9) - }) - - test('treats undefined like null', () => { - expect(resolveAlpha('session', undefined)).toBe(0.5) - expect(resolveAlpha('HandlerStack', undefined)).toBe(0.3) - }) - - test('alpha=0 is honored (not treated as missing)', () => { - expect(resolveAlpha('HandlerStack', 0)).toBe(0) - }) -}) diff --git a/src/ranking/weighting.ts b/src/ranking/weighting.ts deleted file mode 100644 index e8a52b6..0000000 --- a/src/ranking/weighting.ts +++ /dev/null @@ -1,14 +0,0 @@ -// Port of src/semble/ranking/weighting.py - -import { isSymbolQuery } from './boosting.ts' - -export const ALPHA_SYMBOL = 0.3 // lean BM25 for exact keyword matching -export const ALPHA_NL = 0.5 // balanced semantic + BM25 - -/** Return the blending weight for semantic scores, auto-detecting from query type. */ -export function resolveAlpha(query: string, alpha: number | null | undefined): number { - if (alpha !== null && alpha !== undefined) { - return alpha - } - return isSymbolQuery(query) ? ALPHA_SYMBOL : ALPHA_NL -} diff --git a/src/search.test.ts b/src/search.test.ts deleted file mode 100644 index a661a68..0000000 --- a/src/search.test.ts +++ /dev/null @@ -1,361 +0,0 @@ -import type { - Bm25Index, - Chunk, - Model, - SelectableBasicBackend, -} from './search.ts' -import { describe, expect, it } from 'bun:test' -import { - _rrfScores, - _searchBm25, - _searchSemantic, - _sortTopK, - RRF_K, - search, -} from './search.ts' - -// ---------- Fixtures --------------------------------------------------------- - -function makeChunk(overrides: Partial): Chunk { - return { - content: '', - filePath: 'src/a.ts', - startLine: 1, - endLine: 10, - language: 'ts', - ...overrides, - } -} - -function makeChunks(): Chunk[] { - return [ - makeChunk({ content: 'class Alpha {}', filePath: 'src/alpha.ts', startLine: 10, endLine: 20 }), - makeChunk({ content: 'function beta() {}', filePath: 'src/alpha.ts', startLine: 30, endLine: 40 }), - makeChunk({ content: 'export const gamma = 1', filePath: 'src/gamma.ts', startLine: 1, endLine: 5 }), - makeChunk({ content: 'function delta() {}', filePath: 'src/delta.ts', startLine: 5, endLine: 15 }), - makeChunk({ content: 'class Epsilon {}', filePath: 'src/epsilon.ts', startLine: 50, endLine: 60 }), - ] -} - -function mockModel(): Model { - return { - encode: (texts: string[]) => texts.map(() => new Float32Array([0.1, 0.2, 0.3])), - dim: 3, - } -} - -interface QueryCall { - vectors: Float32Array[] - k: number - selector: Uint32Array | undefined -} - -function mockSemanticIndex( - results: Array<[number, number]>, - capture?: { calls: QueryCall[] }, -): SelectableBasicBackend { - return { - query: (vectors, k, selector) => { - capture?.calls.push({ vectors, k, selector }) - return [results] - }, - } -} - -interface Bm25Call { - tokens: string[] - mask: Uint8Array | undefined -} - -function mockBm25(scores: number[], capture?: { calls: Bm25Call[] }): Bm25Index { - return { - getScores: (tokens, mask) => { - capture?.calls.push({ tokens, mask }) - return new Float32Array(scores) - }, - } -} - -// ---------- _sortTopK ------------------------------------------------------- - -describe('_sortTopK', () => { - it('returns indices in descending score order', () => { - const scores = new Float32Array([0.1, 0.9, 0.5, 0.3, 0.7]) - const out = _sortTopK(scores, 3) - expect([...out]).toEqual([1, 4, 2]) - }) - - it('clamps to array length when topK is larger', () => { - const scores = new Float32Array([1, 2, 3]) - const out = _sortTopK(scores, 10) - expect([...out]).toEqual([2, 1, 0]) - }) - - it('returns empty Uint32Array on empty input', () => { - const out = _sortTopK(new Float32Array(), 5) - expect(out.length).toBe(0) - }) -}) - -// ---------- _rrfScores ------------------------------------------------------ - -describe('_rrfScores', () => { - it('assigns 1/(RRF_K+rank) to chunks in descending raw-score order', () => { - const chunks = makeChunks() - const raw = new Map([ - [chunks[0]!, 0.1], - [chunks[1]!, 0.9], - [chunks[2]!, 0.5], - ]) - const rrf = _rrfScores(raw) - expect(rrf.get(chunks[1]!)).toBeCloseTo(1 / (RRF_K + 1), 10) - expect(rrf.get(chunks[2]!)).toBeCloseTo(1 / (RRF_K + 2), 10) - expect(rrf.get(chunks[0]!)).toBeCloseTo(1 / (RRF_K + 3), 10) - }) - - it('returns an empty map for empty input', () => { - const out = _rrfScores(new Map()) - expect(out.size).toBe(0) - }) - - it('first-rank chunk gets ~0.01639 (1/61)', () => { - const chunks = makeChunks() - const raw = new Map([[chunks[0]!, 5.0]]) - const rrf = _rrfScores(raw) - expect(rrf.get(chunks[0]!)).toBeCloseTo(1 / 61, 10) - }) -}) - -// ---------- _searchSemantic / _searchBm25 ----------------------------------- - -describe('_searchSemantic', () => { - it('converts cosine distance to similarity (1 - distance)', () => { - const chunks = makeChunks() - const idx = mockSemanticIndex([[0, 0.2], [2, 0.7]]) - const results = _searchSemantic('q', mockModel(), idx, chunks, 5, undefined) - expect(results.length).toBe(2) - expect(results[0]!.chunk).toBe(chunks[0]!) - expect(results[0]!.score).toBeCloseTo(0.8, 10) - expect(results[1]!.chunk).toBe(chunks[2]!) - expect(results[1]!.score).toBeCloseTo(0.3, 10) - }) - - it('passes the selector through to semanticIndex.query', () => { - const chunks = makeChunks() - const capture = { calls: [] as QueryCall[] } - const idx = mockSemanticIndex([[0, 0.5]], capture) - const selector = new Uint32Array([0, 2]) - _searchSemantic('q', mockModel(), idx, chunks, 5, selector) - expect(capture.calls.length).toBe(1) - expect(capture.calls[0]!.selector).toBe(selector) - expect(capture.calls[0]!.k).toBe(5) - }) -}) - -describe('_searchBm25', () => { - it('excludes zero-score chunks and returns top-k sorted', () => { - const chunks = makeChunks() - const bm = mockBm25([0.5, 0, 0.9, 0.2, 0]) - const results = _searchBm25('alpha beta', bm, chunks, 5, undefined) - expect(results.map(r => r.chunk)).toEqual([chunks[2]!, chunks[0]!, chunks[3]!]) - expect(results[0]!.score).toBeCloseTo(0.9, 5) - }) - - it('returns [] when tokenize yields no tokens', () => { - const chunks = makeChunks() - const bm = mockBm25([1, 1, 1, 1, 1]) - const results = _searchBm25(' ', bm, chunks, 5, undefined) - expect(results).toEqual([]) - }) - - it('builds a boolean weight mask from the selector', () => { - const chunks = makeChunks() - const capture = { calls: [] as Bm25Call[] } - const bm = mockBm25([1, 1, 1, 1, 1], capture) - _searchBm25('alpha', bm, chunks, 5, new Uint32Array([1, 3])) - expect(capture.calls.length).toBe(1) - expect(Array.from(capture.calls[0]!.mask!)).toEqual([0, 1, 0, 1, 0]) - }) -}) - -// ---------- search() -------------------------------------------------------- - -describe('search() — alpha blending', () => { - it('with alpha=1.0 yields purely semantic ordering (BM25 contribution = 0)', () => { - const chunks = makeChunks() - // chunks[2] is first in semantic, chunks[0] is second; BM25 strongly favors chunks[4] - // but alpha=1.0 must zero its contribution so it never outranks a semantic hit. - const idx = mockSemanticIndex([[2, 0.05], [0, 0.10]]) - const bm = mockBm25([0, 0, 0, 0, 9.0]) - const results = search('alpha', mockModel(), idx, bm, chunks, 3, { alpha: 1.0, rerank: false }) - // The two semantic hits must come first; chunks[4] (BM25-only) ranks last with score 0. - expect(results[0]!.chunk).toBe(chunks[2]!) - expect(results[1]!.chunk).toBe(chunks[0]!) - expect(results[0]!.score).toBeGreaterThan(0) - expect(results[1]!.score).toBeGreaterThan(0) - // chunks[4] is in the union but scored 0 under alpha=1.0. - const ch4Result = results.find(r => r.chunk === chunks[4]) - if (ch4Result !== undefined) { - expect(ch4Result.score).toBe(0) - } - }) - - it('with alpha=0.0 yields purely BM25 ordering', () => { - const chunks = makeChunks() - const idx = mockSemanticIndex([[0, 0.05]]) - const bm = mockBm25([0.5, 0, 0.9, 0.2, 0]) - const results = search('alpha', mockModel(), idx, bm, chunks, 3, { alpha: 0.0, rerank: false }) - // BM25 top: chunks[2] (0.9), chunks[0] (0.5), chunks[3] (0.2) - expect(results.map(r => r.chunk)).toEqual([chunks[2]!, chunks[0]!, chunks[3]!]) - }) -}) - -describe('search() — RRF normalisation', () => { - it('produces score 1/61 for a chunk that is rank-1 in semantic with alpha=1.0', () => { - const chunks = makeChunks() - const idx = mockSemanticIndex([[0, 0.0]]) // distance 0 → similarity 1 - const bm = mockBm25([0, 0, 0, 0, 0]) - const results = search('q', mockModel(), idx, bm, chunks, 5, { alpha: 1.0, rerank: false }) - expect(results.length).toBe(1) - expect(results[0]!.score).toBeCloseTo(1 / 61, 10) - }) -}) - -describe('search() — sort stability', () => { - it('iterates candidates in startLine order before scoring (counteracts hash nondeterminism)', () => { - // Build a scenario where two chunks tie on combined RRF score and we want - // the lower-startLine chunk to be produced first. - const chunks = [ - makeChunk({ content: 'foo', filePath: 'src/late.ts', startLine: 100 }), - makeChunk({ content: 'bar', filePath: 'src/early.ts', startLine: 1 }), - ] - // Both rank 1 in their respective lists → both get the same RRF score - // → combined ties → ordering must come from startLine. - const idx = mockSemanticIndex([[0, 0.5]]) // chunks[0] only in semantic - const bm = mockBm25([0, 1.0]) // chunks[1] only in bm25 - const results = search('q', mockModel(), idx, bm, chunks, 5, { alpha: 0.5, rerank: false }) - expect(results.length).toBe(2) - expect(results[0]!.chunk.startLine).toBe(1) - expect(results[1]!.chunk.startLine).toBe(100) - }) -}) - -describe('search() — empty inputs', () => { - it('returns [] when both backends yield nothing', () => { - const chunks = makeChunks() - const idx = mockSemanticIndex([]) - const bm = mockBm25(chunks.map(() => 0)) - const results = search('q', mockModel(), idx, bm, chunks, 5) - expect(results).toEqual([]) - }) -}) - -describe('search() — rerank pipeline', () => { - it('rerank=true applies multi-chunk file boost (chunks[0] & chunks[1] share src/alpha.ts)', () => { - const chunks = makeChunks() - // Semantic puts both alpha.ts chunks high; gamma.ts is also present. - const idx = mockSemanticIndex([[0, 0.10], [1, 0.20], [2, 0.30]]) - const bm = mockBm25([0, 0, 0, 0, 0]) - const ranked = search('q', mockModel(), idx, bm, chunks, 3, { alpha: 1.0, rerank: true }) - // With multi-chunk boost, the best chunk in alpha.ts should outrank - // gamma.ts even though gamma.ts has a respectable RRF rank. - expect(ranked[0]!.chunk.filePath).toBe('src/alpha.ts') - }) - - it('rerank=false skips boosts and just sorts by combined score', () => { - const chunks = makeChunks() - const idx = mockSemanticIndex([[0, 0.10], [1, 0.20], [2, 0.30]]) - const bm = mockBm25([0, 0, 0, 0, 0]) - const ranked = search('q', mockModel(), idx, bm, chunks, 3, { alpha: 1.0, rerank: false }) - expect(ranked.map(r => r.chunk)).toEqual([chunks[0]!, chunks[1]!, chunks[2]!]) - }) -}) - -describe('search() — file-saturation decay', () => { - it('demotes extra chunks from the same file after the first match', () => { - // Three chunks of src/alpha.ts at semantic ranks 1/2/3, plus one from src/beta.ts at rank 4. - // Saturation should push the 2nd & 3rd alpha.ts chunks below beta.ts in the final ordering. - const chunks = [ - makeChunk({ content: '', filePath: 'src/alpha.ts', startLine: 10 }), - makeChunk({ content: '', filePath: 'src/alpha.ts', startLine: 30 }), - makeChunk({ content: '', filePath: 'src/alpha.ts', startLine: 50 }), - makeChunk({ content: '', filePath: 'src/beta.ts', startLine: 1 }), - ] - const idx = mockSemanticIndex([ - [0, 0.10], - [1, 0.20], - [2, 0.30], - [3, 0.40], - ]) - const bm = mockBm25([0, 0, 0, 0]) - // alpha=1.0 → multi-chunk boost only touches the single best chunk per file. - // file-saturation decay (0.5^excess) demotes the 2nd & 3rd alpha.ts chunks. - const ranked = search('q', mockModel(), idx, bm, chunks, 4, { alpha: 1.0, rerank: true }) - // First slot: best alpha.ts chunk (boosted). - expect(ranked[0]!.chunk.filePath).toBe('src/alpha.ts') - // Second slot: beta.ts (no saturation penalty applied to a different file). - expect(ranked[1]!.chunk.filePath).toBe('src/beta.ts') - }) -}) - -describe('search() — auto-alpha for symbol queries', () => { - it('passes penalisePaths=true (alpha<1.0) for symbol-shaped queries by default', () => { - // Indirect assertion: a symbol query has alpha=0.3, so BM25 contributes. - // With alpha=0.3 and only a BM25 hit, the result must contain that hit. - const chunks = makeChunks() - const idx = mockSemanticIndex([]) - const bm = mockBm25([0, 0, 0.9, 0, 0]) - const ranked = search('FooBar', mockModel(), idx, bm, chunks, 3, { rerank: false }) - expect(ranked.length).toBe(1) - expect(ranked[0]!.chunk).toBe(chunks[2]!) - }) -}) - -// ---------- SearchResult.toDict (integration with ../types.ts) --------------- - -describe('SearchResult.toDict', () => { - it('search() results carry a toDict producing the formatResults-compatible shape', () => { - const chunks = makeChunks() - const idx = mockSemanticIndex([[2, 0.0]]) // chunks[2]: src/gamma.ts, lines 1-5 - const bm = mockBm25([0, 0, 0, 0, 0]) - const results = search('q', mockModel(), idx, bm, chunks, 5, { alpha: 1.0, rerank: false }) - expect(results.length).toBe(1) - const r = results[0]! - expect(typeof r.toDict).toBe('function') - expect(r.toDict()).toEqual({ - chunk: { - content: 'export const gamma = 1', - file_path: 'src/gamma.ts', - start_line: 1, - end_line: 5, - language: 'ts', - location: 'src/gamma.ts:1-5', - }, - score: r.score, - }) - }) - - it('_searchSemantic results carry a toDict', () => { - const chunks = makeChunks() - const idx = mockSemanticIndex([[0, 0.2]]) - const results = _searchSemantic('q', mockModel(), idx, chunks, 5, undefined) - expect(typeof results[0]!.toDict).toBe('function') - }) - - it('_searchBm25 results carry a toDict', () => { - const chunks = makeChunks() - const bm = mockBm25([0.5, 0, 0.9, 0.2, 0]) - const results = _searchBm25('alpha beta', bm, chunks, 5, undefined) - expect(typeof results[0]!.toDict).toBe('function') - }) - - it('toDict renders a null language as null (no string coercion)', () => { - const chunks = [makeChunk({ filePath: 'src/n.ts', startLine: 3, endLine: 8, language: null })] - const idx = mockSemanticIndex([[0, 0.0]]) - const bm = mockBm25([0]) - const results = search('q', mockModel(), idx, bm, chunks, 5, { alpha: 1.0, rerank: false }) - const dict = results[0]!.toDict() as { chunk: Record } - expect(dict.chunk.language).toBeNull() - expect(dict.chunk.location).toBe('src/n.ts:3-8') - }) -}) diff --git a/src/search.ts b/src/search.ts deleted file mode 100644 index 253fb90..0000000 --- a/src/search.ts +++ /dev/null @@ -1,249 +0,0 @@ -// Port of src/semble/search.py - -import type { Chunk, SearchResult } from './types.ts' -import { applyQueryBoost, boostMultiChunkFiles } from './ranking/boosting.ts' -import { rerankTopK } from './ranking/penalties.ts' -import { resolveAlpha } from './ranking/weighting.ts' -import { tokenize } from './tokens.ts' - -// Re-export the shared types so downstream importers (and tests) can keep -// pulling `Chunk`/`SearchResult` from this module's public surface. -export type { Chunk, SearchResult } - -/** - * Render a chunk as a JSONable object (snake_cased fields + `location`), - * mirroring semble's `Chunk.to_dict`. - */ -function chunkToDict(chunk: Chunk): Record { - return { - content: chunk.content, - file_path: chunk.filePath, - start_line: chunk.startLine, - end_line: chunk.endLine, - language: chunk.language ?? null, - location: `${chunk.filePath}:${chunk.startLine}-${chunk.endLine}`, - } -} - -/** - * Build a `SearchResult` with a `toDict` closure, so every result this module - * produces satisfies the `../types.ts` `SearchResult` contract that - * `utils.formatResults` consumes. - */ -function makeResult(chunk: Chunk, score: number): SearchResult { - return { - chunk, - score, - toDict: () => ({ chunk: chunkToDict(chunk), score }), - } -} - -// --- Public exports --------------------------------------------------------- - -export const RRF_K = 60 - -/** Minimal embedding model interface (parallels `model2vec.StaticModel`). */ -export interface Model { - encode: (texts: string[]) => Float32Array[] - dim: number -} - -/** - * Minimal vector backend interface (parallels `vicinity` CosineBasicBackend). - * - * `query` returns one result list per query vector — each list is a sequence - * of `[chunkIndex, cosineDistance]` pairs sorted by ascending distance. - */ -export interface SelectableBasicBackend { - query: ( - vectors: Float32Array[], - k: number, - selector?: Uint32Array, - ) => Array> -} - -/** Minimal BM25 backend interface (parallels `bm25s.BM25`). */ -export interface Bm25Index { - getScores: (queryTokens: string[], weightMask?: Uint8Array) => Float32Array -} - -/** Build a boolean weight mask from a chunk-index selector, or `undefined` if no selector. */ -function selectorToMask(selector: Uint32Array | undefined, size: number): Uint8Array | undefined { - if (selector === undefined) { - return undefined - } - const mask = new Uint8Array(size) - for (const idx of selector) { - if (idx < size) { - mask[idx] = 1 - } - } - return mask -} - -/** - * Convert raw scores to RRF scores `1 / (RRF_K + rank)`; highest raw score → rank 1. - * - * Ties in the raw scores are broken by insertion order (the underlying sort is stable). - */ -export function _rrfScores(scores: Map): Map { - if (scores.size === 0) { - return scores - } - const ranked = [...scores.entries()].sort((a, b) => b[1] - a[1]) - const out = new Map() - for (let i = 0; i < ranked.length; i++) { - const entry = ranked[i] - if (entry === undefined) { - continue - } - const rank = i + 1 - out.set(entry[0], 1.0 / (RRF_K + rank)) - } - return out -} - -/** Partial sort: return indices of the top-k largest entries of `arr`, in descending-score order. */ -export function _sortTopK(arr: Float32Array, topK: number): Uint32Array { - const n = arr.length - const indices = Array.from({ length: n }, (_, i) => i) - indices.sort((a, b) => { - const av = arr[a] as number - const bv = arr[b] as number - return bv - av - }) - const k = Math.min(topK, n) - const out = new Uint32Array(k) - for (let i = 0; i < k; i++) { - out[i] = indices[i] as number - } - return out -} - -/** Run semantic search for a query. Converts cosine distance → similarity (`1 - distance`). */ -export function _searchSemantic( - query: string, - model: Model, - semanticIndex: SelectableBasicBackend, - chunks: Chunk[], - topK: number, - selector: Uint32Array | undefined, -): SearchResult[] { - const queryEmbedding = model.encode([query]) - const batch = semanticIndex.query(queryEmbedding, topK, selector) - const first = batch[0] - if (first === undefined) { - return [] - } - const results: SearchResult[] = [] - for (const [index, distance] of first) { - const chunk = chunks[index] - if (chunk === undefined) { - continue - } - results.push(makeResult(chunk, 1.0 - distance)) - } - return results -} - -/** Return chunks ranked by BM25 score, excluding zero-score results. */ -export function _searchBm25( - query: string, - bm25Index: Bm25Index, - chunks: Chunk[], - topK: number, - selector: Uint32Array | undefined, -): SearchResult[] { - const tokens = tokenize(query) - if (tokens.length === 0) { - return [] - } - const mask = selectorToMask(selector, chunks.length) - const scores = bm25Index.getScores(tokens, mask) - const indices = _sortTopK(scores, topK) - const results: SearchResult[] = [] - for (const i of indices) { - const score = scores[i] - if (score === undefined || score <= 0) { - continue - } - const chunk = chunks[i] - if (chunk === undefined) { - continue - } - results.push(makeResult(chunk, score)) - } - return results -} - -export interface SearchOptions { - /** Weight for semantic score (1-alpha for BM25). `undefined` → auto-detect by query type. */ - alpha?: number - /** Optional chunk-index selector to filter candidates. */ - selector?: Uint32Array - /** Whether to apply code-tuned reranking (path penalties, file saturation, boosts). */ - rerank?: boolean -} - -/** - * Hybrid search: alpha-weighted combination of semantic and BM25 scores. - * - * Both score sets are converted to RRF scores before combining, so `alpha` has a - * consistent meaning regardless of raw-score magnitude. - */ -export function search( - query: string, - model: Model, - semanticIndex: SelectableBasicBackend, - bm25Index: Bm25Index, - chunks: Chunk[], - topK: number, - options: SearchOptions = {}, -): SearchResult[] { - const { alpha, selector, rerank = true } = options - const alphaWeight = resolveAlpha(query, alpha) - - // Over-fetch candidates so the merged pool is large enough after union & re-ranking. - const candidateCount = topK * 5 - - const semantic = _searchSemantic(query, model, semanticIndex, chunks, candidateCount, selector) - const semanticScores = new Map() - for (const r of semantic) { - semanticScores.set(r.chunk, r.score) - } - - const bm25Scores = new Map() - for (const r of _searchBm25(query, bm25Index, chunks, candidateCount, selector)) { - if (r.score) { - bm25Scores.set(r.chunk, r.score) - } - } - - const normalizedSemantic = _rrfScores(semanticScores) - const normalizedBm25 = _rrfScores(bm25Scores) - - // Sort the union by start_line to counteract hash-iteration nondeterminism. - const union = new Set([...normalizedSemantic.keys(), ...normalizedBm25.keys()]) - const allCandidates = [...union].sort((a, b) => a.startLine - b.startLine) - - let combinedScores = new Map() - for (const chunk of allCandidates) { - const s = normalizedSemantic.get(chunk) ?? 0 - const b = normalizedBm25.get(chunk) ?? 0 - combinedScores.set(chunk, alphaWeight * s + (1.0 - alphaWeight) * b) - } - - let ranked: Array<[Chunk, number]> - if (rerank) { - boostMultiChunkFiles(combinedScores) - combinedScores = applyQueryBoost(combinedScores, query, chunks) - ranked = rerankTopK(combinedScores, topK, { penalisePaths: alphaWeight < 1.0 }) - } - else { - ranked = [...combinedScores.entries()] - .sort((a, b) => b[1] - a[1]) - .slice(0, topK) - } - - return ranked.map(([chunk, score]) => makeResult(chunk, score)) -} diff --git a/src/stats.test.ts b/src/stats.test.ts deleted file mode 100644 index 5d65772..0000000 --- a/src/stats.test.ts +++ /dev/null @@ -1,321 +0,0 @@ -import type { StatsSearchResult } from './stats.ts' -import { appendFileSync, existsSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs' -import { tmpdir } from 'node:os' -import path from 'node:path' -// Tests for src/stats.ts — port of src/semble/stats.py -import { afterEach, beforeEach, describe, expect, test } from 'bun:test' -import { - BucketStats, - buildSavingsSummary, - clearSavings, - formatSavingsReport, - resetStatsFile, - saveSearchStats, - setStatsFile, - -} from './stats.ts' - -function makeResult(content: string, filePath: string): StatsSearchResult { - return { chunk: { content, filePath } } -} - -let tmpDir: string -let statsFile: string - -beforeEach(() => { - tmpDir = mkdtempSync(path.join(tmpdir(), 'csp-stats-')) - statsFile = path.join(tmpDir, 'savings.jsonl') - setStatsFile(statsFile) -}) - -afterEach(() => { - resetStatsFile() - rmSync(tmpDir, { recursive: true, force: true }) -}) - -describe('BucketStats', () => { - test('add accumulates fields and clamps savedChars to >= 0', () => { - const b = new BucketStats() - b.add(100, 400) - b.add(100, 400) - expect(b.calls).toBe(2) - expect(b.snippetChars).toBe(200) - expect(b.fileChars).toBe(800) - expect(b.savedChars).toBe(600) - }) - - test('add does not produce negative savedChars when snippet > file', () => { - const b = new BucketStats() - b.add(500, 100) - expect(b.savedChars).toBe(0) - expect(b.snippetChars).toBe(500) - expect(b.fileChars).toBe(100) - }) -}) - -describe('saveSearchStats', () => { - test('appends one valid JSONL line per call', () => { - const results = [makeResult('hello world', '/a.ts'), makeResult('foo bar baz', '/b.ts')] - saveSearchStats(results, 'search', { '/a.ts': 100, '/b.ts': 200 }) - - const content = readFileSync(statsFile, 'utf8') - const lines = content.split('\n').filter(l => l.length > 0) - expect(lines).toHaveLength(1) - const record = JSON.parse(lines[0]!) as Record - expect(record.call).toBe('search') - expect(record.results).toBe(2) - expect(record.snippet_chars).toBe('hello world'.length + 'foo bar baz'.length) - expect(record.file_chars).toBe(300) - expect(typeof record.ts).toBe('number') - }) - - test('two calls produce two lines', () => { - saveSearchStats([makeResult('abc', '/a.ts')], 'search', { '/a.ts': 50 }) - saveSearchStats([makeResult('xy', '/b.ts')], 'find_related', { '/b.ts': 20 }) - - const content = readFileSync(statsFile, 'utf8') - const lines = content.split('\n').filter(l => l.length > 0) - expect(lines).toHaveLength(2) - const r1 = JSON.parse(lines[0]!) as Record - const r2 = JSON.parse(lines[1]!) as Record - expect(r1.call).toBe('search') - expect(r2.call).toBe('find_related') - }) - - test('deduplicates file_chars per unique filePath', () => { - // Same path twice — file should only count once toward file_chars. - const results = [makeResult('aaa', '/a.ts'), makeResult('bbb', '/a.ts')] - saveSearchStats(results, 'search', { '/a.ts': 100 }) - - const content = readFileSync(statsFile, 'utf8') - const lines = content.split('\n').filter(l => l.length > 0) - const record = JSON.parse(lines[0]!) as Record - expect(record.file_chars).toBe(100) - expect(record.snippet_chars).toBe(6) - }) - - test('ignores paths missing from fileSizes', () => { - const results = [makeResult('aaa', '/a.ts'), makeResult('bbb', '/missing.ts')] - saveSearchStats(results, 'search', { '/a.ts': 100 }) - - const content = readFileSync(statsFile, 'utf8') - const lines = content.split('\n').filter(l => l.length > 0) - const record = JSON.parse(lines[0]!) as Record - expect(record.file_chars).toBe(100) - }) - - test('never throws on I/O error', () => { - // Point stats file at a path whose parent cannot be created (a regular - // file used as a directory). saveSearchStats must swallow the error. - const conflictFile = path.join(tmpDir, 'conflict') - writeFileSync(conflictFile, 'not a directory') - setStatsFile(path.join(conflictFile, 'nested', 'savings.jsonl')) - - expect(() => { - saveSearchStats([makeResult('x', '/a.ts')], 'search', { '/a.ts': 10 }) - }).not.toThrow() - }) -}) - -describe('buildSavingsSummary', () => { - test('parses all valid lines and skips malformed ones', () => { - const now = Date.now() / 1000 - const lines = [ - JSON.stringify({ ts: now, call: 'search', results: 1, snippet_chars: 100, file_chars: 400 }), - 'this is not json', - JSON.stringify({ ts: now, call: 'find_related', results: 1, snippet_chars: 50, file_chars: 200 }), - '{"incomplete": ', - JSON.stringify({ ts: now, call: 'search', results: 1, snippet_chars: 100, file_chars: 400 }), - ] - writeFileSync(statsFile, `${lines.join('\n')}\n`) - - const summary = buildSavingsSummary() - expect(summary.buckets['All time']!.calls).toBe(3) - expect(summary.callTypeCounts).toEqual({ search: 2, find_related: 1 }) - }) - - test('bucket math: 2 search calls with snippet=100/file=400 → savedChars=600, ratio 0.75', () => { - const now = Date.now() / 1000 - const lines = [ - JSON.stringify({ ts: now, call: 'search', results: 1, snippet_chars: 100, file_chars: 400 }), - JSON.stringify({ ts: now, call: 'search', results: 1, snippet_chars: 100, file_chars: 400 }), - ] - writeFileSync(statsFile, `${lines.join('\n')}\n`) - - const summary = buildSavingsSummary() - const all = summary.buckets['All time']! - expect(all.calls).toBe(2) - expect(all.snippetChars).toBe(200) - expect(all.fileChars).toBe(800) - expect(all.savedChars).toBe(600) - expect(all.savedChars / all.fileChars).toBe(0.75) - - expect(summary.buckets.Today!.calls).toBe(2) - expect(summary.buckets['Last 7 days']!.calls).toBe(2) - }) - - test('older entries fall outside Today and Last 7 days buckets', () => { - const now = Date.now() / 1000 - const tenDaysAgo = now - 10 * 24 * 60 * 60 - const lines = [ - JSON.stringify({ ts: tenDaysAgo, call: 'search', results: 1, snippet_chars: 100, file_chars: 400 }), - JSON.stringify({ ts: now, call: 'search', results: 1, snippet_chars: 100, file_chars: 400 }), - ] - writeFileSync(statsFile, `${lines.join('\n')}\n`) - - const summary = buildSavingsSummary() - expect(summary.buckets['All time']!.calls).toBe(2) - expect(summary.buckets['Last 7 days']!.calls).toBe(1) - expect(summary.buckets.Today!.calls).toBe(1) - }) - - test('missing stats file returns empty summary', () => { - const summary = buildSavingsSummary(path.join(tmpDir, 'does-not-exist.jsonl')) - expect(summary.buckets['All time']!.calls).toBe(0) - expect(summary.callTypeCounts).toEqual({}) - }) - - test('skips records with NaN numeric fields', () => { - // `typeof NaN === 'number'` would otherwise let these through and - // poison date formatting / bucket math with NaN. - const now = Date.now() / 1000 - const lines = [ - // NaN serializes as `null` in JSON.stringify, so emit NaN literally. - '{"ts": NaN, "call": "search", "results": 1, "snippet_chars": 0, "file_chars": 0}', - '{"ts": 0, "call": "search", "results": 1, "snippet_chars": NaN, "file_chars": 0}', - '{"ts": 0, "call": "search", "results": 1, "snippet_chars": 0, "file_chars": NaN}', - JSON.stringify({ ts: now, call: 'search', results: 1, snippet_chars: 100, file_chars: 400 }), - ] - writeFileSync(statsFile, `${lines.join('\n')}\n`) - - const summary = buildSavingsSummary() - // Only the last valid record is counted. - expect(summary.buckets['All time']!.calls).toBe(1) - expect(summary.callTypeCounts).toEqual({ search: 1 }) - }) - - test('call types matching built-in object properties do not collide', () => { - // Without Object.create(null), `callTypeCounts["toString"]` would - // resolve to Function.prototype.toString and arithmetic would coerce - // it to a string instead of incrementing. - const now = Date.now() / 1000 - const lines = [ - JSON.stringify({ ts: now, call: 'toString', results: 1, snippet_chars: 1, file_chars: 1 }), - JSON.stringify({ ts: now, call: 'toString', results: 1, snippet_chars: 1, file_chars: 1 }), - JSON.stringify({ ts: now, call: 'hasOwnProperty', results: 1, snippet_chars: 1, file_chars: 1 }), - ] - writeFileSync(statsFile, `${lines.join('\n')}\n`) - - const summary = buildSavingsSummary() - expect(summary.callTypeCounts).toEqual({ toString: 2, hasOwnProperty: 1 }) - }) -}) - -describe('clearSavings', () => { - test('deletes an existing stats file and reports cleared=true', () => { - appendFileSync(statsFile, `${JSON.stringify({ ts: 1, call: 'search', results: 1, snippet_chars: 1, file_chars: 2 })}\n`) - expect(existsSync(statsFile)).toBe(true) - const result = clearSavings() - expect(result).toEqual({ path: statsFile, cleared: true }) - expect(existsSync(statsFile)).toBe(false) - }) - - test('reports cleared=false when no stats file exists', () => { - expect(existsSync(statsFile)).toBe(false) - expect(clearSavings()).toEqual({ path: statsFile, cleared: false }) - }) -}) - -describe('formatSavingsReport', () => { - test('shows "Csp Token Savings" header and bucket labels', () => { - const now = Date.now() / 1000 - appendFileSync( - statsFile, - `${JSON.stringify({ ts: now, call: 'search', results: 1, snippet_chars: 100, file_chars: 400 })}\n`, - ) - - const report = formatSavingsReport() - expect(report).toContain('Csp Token Savings') - expect(report).toContain('Today') - expect(report).toContain('Last 7 days') - expect(report).toContain('All time') - expect(report).not.toContain('Semble Token Savings') - }) - - test('empty / missing stats file returns the "no stats yet" message', () => { - expect(existsSync(statsFile)).toBe(false) - expect(formatSavingsReport()).toBe('No stats yet. Run a search first.') - }) - - test('formats saved tokens with ~Nk suffix at 1500 → ~1.5k', () => { - // file=6400, snippet=400 ⇒ saved=6000 chars ⇒ 6000/4 = 1500 tokens ⇒ "~1.5k" - const now = Date.now() / 1000 - appendFileSync( - statsFile, - `${JSON.stringify({ ts: now, call: 'search', results: 1, snippet_chars: 400, file_chars: 6400 })}\n`, - ) - - const report = formatSavingsReport() - expect(report).toContain('~1.5k') - }) - - test('verbose appends By Call Type section sorted by call count descending', () => { - const now = Date.now() / 1000 - // search appears 2×, find_related 1× ⇒ search ranks first by count. - appendFileSync( - statsFile, - `${JSON.stringify({ ts: now, call: 'search', results: 1, snippet_chars: 100, file_chars: 400 })}\n`, - ) - appendFileSync( - statsFile, - `${JSON.stringify({ ts: now, call: 'search', results: 1, snippet_chars: 100, file_chars: 400 })}\n`, - ) - appendFileSync( - statsFile, - `${JSON.stringify({ ts: now, call: 'find_related', results: 1, snippet_chars: 50, file_chars: 200 })}\n`, - ) - - const report = formatSavingsReport({ verbose: true }) - expect(report).toContain('By Call Type') - expect(report).toContain('Call type') - expect(report).toContain('search') - expect(report).toContain('find_related') - // Sorted by count descending — the more frequent `search` precedes `find_related`. - const searchIdx = report.indexOf('By Call Type') - const breakdown = report.slice(searchIdx) - expect(breakdown.indexOf('search')).toBeLessThan(breakdown.indexOf('find_related')) - }) - - test('By Call Type section is hidden without --verbose', () => { - const now = Date.now() / 1000 - appendFileSync( - statsFile, - `${JSON.stringify({ ts: now, call: 'search', results: 1, snippet_chars: 100, file_chars: 400 })}\n`, - ) - expect(formatSavingsReport()).not.toContain('By Call Type') - }) - - test('renders By Period bar with filled blocks proportional to ratio', () => { - const now = Date.now() / 1000 - // ratio = 0.75 over a 24-wide bar ⇒ 18 filled / 6 empty. - appendFileSync( - statsFile, - `${JSON.stringify({ ts: now, call: 'search', results: 1, snippet_chars: 100, file_chars: 400 })}\n`, - ) - const report = formatSavingsReport() - expect(report).toContain(`${'█'.repeat(18)}${'░'.repeat(6)}`) - // Overall ratio is surfaced in the headline summary as "(75%)". - expect(report).toContain('(75%)') - }) - - test('formats saved tokens with ~N.NM suffix at 1M+ tokens', () => { - // saved_chars = 4_000_000 ⇒ tokens = 1_000_000 ⇒ "~1.0M" - const now = Date.now() / 1000 - appendFileSync( - statsFile, - `${JSON.stringify({ ts: now, call: 'search', results: 1, snippet_chars: 0, file_chars: 4_000_000 })}\n`, - ) - const report = formatSavingsReport() - expect(report).toContain('~1.0M') - }) -}) diff --git a/src/stats.ts b/src/stats.ts deleted file mode 100644 index dceb0c3..0000000 --- a/src/stats.ts +++ /dev/null @@ -1,394 +0,0 @@ -// Port of src/semble/stats.py -import { appendFileSync, existsSync, mkdirSync, readFileSync, rmSync } from 'node:fs' -import { homedir } from 'node:os' -import path from 'node:path' -import process from 'node:process' - -/** - * Call type for token-savings tracking. - * - * Mirrors `CallType` from `src/semble/types.py`. Defined here as a minimal - * type to avoid creating a cross-unit dependency before `src/types.ts` - * lands. Once that exists, this should be re-exported from there. - */ -export type CallType = 'search' | 'find_related' - -/** - * Minimal chunk shape needed by `saveSearchStats`. - * - * Uses camelCase fields per the csp public API surface. - */ -export interface StatsChunk { - content: string - filePath: string -} - -/** - * Minimal search-result shape needed by `saveSearchStats`. - */ -export interface StatsSearchResult { - chunk: StatsChunk -} - -/** - * Per-bucket aggregate counters for the savings report. - */ -export class BucketStats { - calls: number = 0 - snippetChars: number = 0 - fileChars: number = 0 - savedChars: number = 0 - - /** Update stats with a call and its character counts. */ - add(snippetChars: number, fileChars: number): void { - this.calls += 1 - this.snippetChars += snippetChars - this.fileChars += fileChars - this.savedChars += Math.max(0, fileChars - snippetChars) - } -} - -/** - * Aggregated savings, grouped into time buckets plus per-call-type counts. - */ -export interface SavingsSummary { - buckets: Record - callTypeCounts: Record -} - -const DEFAULT_STATS_FILE = path.join(homedir(), '.csp', 'savings.jsonl') - -let _STATS_FILE = DEFAULT_STATS_FILE - -/** - * Override the stats file location. Intended for tests only — production - * callers should leave the default in place so behavior matches semble. - */ -export function setStatsFile(filePath: string): void { - _STATS_FILE = filePath -} - -/** Return the current stats file path. */ -export function getStatsFile(): string { - return _STATS_FILE -} - -/** Reset the stats file path back to the default `~/.csp/savings.jsonl`. */ -export function resetStatsFile(): void { - _STATS_FILE = DEFAULT_STATS_FILE -} - -/** - * Save stats about a search or find_related call to the stats file. - * - * Best-effort: any I/O error is silently swallowed so stats writes never - * impact a live search. - */ -export function saveSearchStats( - results: StatsSearchResult[], - callType: CallType, - fileSizes: Record, -): void { - try { - const snippetChars = results.reduce((sum, r) => sum + r.chunk.content.length, 0) - const uniquePaths = new Set(results.map(r => r.chunk.filePath)) - let fileChars = 0 - for (const p of uniquePaths) { - if (Object.hasOwn(fileSizes, p)) { - fileChars += fileSizes[p] ?? 0 - } - } - - const record = { - ts: Date.now() / 1000, - call: callType, - results: results.length, - snippet_chars: snippetChars, - file_chars: fileChars, - } - const dir = path.dirname(_STATS_FILE) - mkdirSync(dir, { recursive: true }) - appendFileSync(_STATS_FILE, `${JSON.stringify(record)}\n`) - } - catch { - // Swallow — stats writes must never throw. - } -} - -/** - * Delete the savings stats file if it exists. - * - * Deletion (not truncation) mirrors semble's `clear` (`path.unlink()`) and - * lets `csp savings` fall back to the "No stats yet" message — a truncated, - * still-present file would instead render an all-zero report. Best-effort: - * a permission error or broken symlink is swallowed and reported as - * `cleared: false` rather than crashing the CLI. - */ -export function clearSavings(): { path: string, cleared: boolean } { - if (!existsSync(_STATS_FILE)) { - return { path: _STATS_FILE, cleared: false } - } - try { - rmSync(_STATS_FILE) - return { path: _STATS_FILE, cleared: true } - } - catch { - return { path: _STATS_FILE, cleared: false } - } -} - -interface StatsRecord { - ts: number - call: string - results: number - snippet_chars: number - file_chars: number -} - -function isStatsRecord(value: unknown): value is StatsRecord { - if (value === null || typeof value !== 'object') { - return false - } - const v = value as Record - // Reject NaN explicitly: `typeof NaN === 'number'` is true, but NaN - // values would propagate into date formatting ("NaN-NaN-NaN") and - // bucket arithmetic. Treat such lines as malformed. - return ( - typeof v.ts === 'number' - && !Number.isNaN(v.ts) - && typeof v.call === 'string' - && typeof v.snippet_chars === 'number' - && !Number.isNaN(v.snippet_chars) - && typeof v.file_chars === 'number' - && !Number.isNaN(v.file_chars) - ) -} - -function ymdUtc(timestampSeconds: number): string { - const d = new Date(timestampSeconds * 1000) - const y = d.getUTCFullYear() - const m = String(d.getUTCMonth() + 1).padStart(2, '0') - const day = String(d.getUTCDate()).padStart(2, '0') - return `${y}-${m}-${day}` -} - -/** - * Read `savings.jsonl` and return a {@link SavingsSummary}. - * - * Malformed lines are skipped silently. If the file is missing, an empty - * summary is returned. - */ -export function buildSavingsSummary(filePath?: string): SavingsSummary { - const target = filePath ?? _STATS_FILE - const now = new Date() - const nowSec = now.getTime() / 1000 - const today = ymdUtc(nowSec) - const sevenDaysAgo = ymdUtc(nowSec - 7 * 24 * 60 * 60) - - const buckets: Record = { - 'Today': new BucketStats(), - 'Last 7 days': new BucketStats(), - 'All time': new BucketStats(), - } - // Use a prototype-less object so JSONL `call` values like "toString" or - // "__proto__" can't collide with built-in object properties. - const callTypeCounts: Record = Object.create(null) as Record - - if (!existsSync(target)) { - return { buckets, callTypeCounts } - } - - const raw = readFileSync(target, 'utf8') - const lines = raw.split('\n') - for (const line of lines) { - if (line.length === 0) { - continue - } - let record: unknown - try { - record = JSON.parse(line) - } - catch { - // Match semble: skip malformed lines silently (semble logs a warning; - // we omit the warning to keep stats imports side-effect-free). - continue - } - if (!isStatsRecord(record)) { - continue - } - - const snippetChars = record.snippet_chars - const fileChars = record.file_chars - const callType = record.call - callTypeCounts[callType] = (callTypeCounts[callType] ?? 0) + 1 - - const day = ymdUtc(record.ts) - const inToday = day === today - const inLast7 = day > sevenDaysAgo - - buckets['All time']!.add(snippetChars, fileChars) - if (inLast7) { - buckets['Last 7 days']!.add(snippetChars, fileChars) - } - if (inToday) { - buckets.Today!.add(snippetChars, fileChars) - } - } - - return { buckets, callTypeCounts } -} - -function padRight(s: string, width: number): string { - if (s.length >= width) { - return s - } - return s + ' '.repeat(width - s.length) -} - -function padLeft(s: string, width: number): string { - if (s.length >= width) { - return s - } - return ' '.repeat(width - s.length) + s -} - -/** - * Whether ANSI colors should be emitted. Mirrors semble's `_use_color`: - * suppressed under `NO_COLOR`, a `dumb` terminal, or a non-TTY stdout. - */ -function useColor(): boolean { - return !('NO_COLOR' in process.env) - && process.env.TERM !== 'dumb' - && Boolean(process.stdout.isTTY) -} - -/** Wrap `text` in an ANSI color `code` when `enabled`. */ -function color(code: string, text: string, enabled: boolean): string { - return enabled ? `[${code}m${text}` : text -} - -/** Color a savings percentage by value: green ≥80, yellow ≥50, red below. */ -function colorRatio(pct: number, enabled: boolean): string { - const code = pct >= 80 ? '32' : pct >= 50 ? '33' : '31' - return color(code, `${pct}%`, enabled) -} - -function formatSavedTokens(savedTokens: number): string { - if (savedTokens >= 1_000_000) { - return `~${(savedTokens / 1_000_000).toFixed(1)}M` - } - if (savedTokens >= 1000) { - return `~${(savedTokens / 1000).toFixed(1)}k` - } - return `~${savedTokens}` -} - -function formatCalls(calls: number): string { - return calls >= 1000 ? `${(calls / 1000).toFixed(1)}k` : String(calls) -} - -export interface FormatSavingsReportOptions { - path?: string - verbose?: boolean -} - -/** - * Return a formatted token-savings report. - * - * Adopts semble's redesigned layout (PR #197): a headline summary - * (Total saved / Total calls / Efficiency bar) followed by a "By Period" - * table, with ANSI color when stdout is a color-capable TTY. Two csp - * divergences are preserved: the header reads "Csp Token Savings" (not - * "Semble Token Savings"), and the "By Call Type" breakdown stays gated - * behind `--verbose` rather than always shown. - */ -export function formatSavingsReport(options: FormatSavingsReportOptions = {}): string { - const target = options.path ?? _STATS_FILE - const verbose = options.verbose ?? false - - if (!existsSync(target)) { - return 'No stats yet. Run a search first.' - } - - const summary = buildSavingsSummary(target) - const enabled = useColor() - const barWidth = 24 - const borderWidth = 72 - const heavyLine = ` ${color('38;5;244', '═'.repeat(borderWidth), enabled)}` - const lightLine = ` ${color('38;5;244', '─'.repeat(borderWidth), enabled)}` - - const allTime = summary.buckets['All time']! - const totalSavedTokens = Math.floor(allTime.savedChars / 4) // ~4 chars/token approximation - const overallPct = allTime.fileChars > 0 - ? Math.round((allTime.savedChars / allTime.fileChars) * 100) - : 0 - const efficiencyFilled = Math.round((overallPct / 100) * barWidth) - const efficiencyBar - = color('32', '█'.repeat(efficiencyFilled), enabled) - + color('38;5;244', '░'.repeat(barWidth - efficiencyFilled), enabled) - - const lines: string[] = [ - '', - ` ${color('1;36', 'Csp Token Savings', enabled)}`, - heavyLine, - '', - ` ${color('1', 'Total saved:', enabled)} ${color('1;33', `${formatSavedTokens(totalSavedTokens)} tokens`, enabled)} (${colorRatio(overallPct, enabled)})`, - ` ${color('1', 'Total calls:', enabled)} ${color('1;33', formatCalls(allTime.calls), enabled)}`, - ` ${color('1', 'Efficiency:', enabled)} ${efficiencyBar} ${colorRatio(overallPct, enabled)}`, - '', - ` ${color('1', 'By Period', enabled)}`, - lightLine, - ` ${padRight('Period', 14)} ${padLeft('Calls', 8)} ${padLeft('Saved', 14)} Ratio`, - lightLine, - ] - - for (const [label, bucket] of Object.entries(summary.buckets)) { - const savedTokens = Math.floor(bucket.savedChars / 4) - const savedStr = `${formatSavedTokens(savedTokens)} tokens` - const callsStr = formatCalls(bucket.calls) - let rowBar: string - let ratioStr: string - if (bucket.fileChars > 0) { - const ratio = bucket.savedChars / bucket.fileChars - const filled = Math.round(ratio * barWidth) - rowBar = color('32', '█'.repeat(filled), enabled) + color('38;5;244', '░'.repeat(barWidth - filled), enabled) - ratioStr = colorRatio(Math.round(ratio * 100), enabled) - } - else { - rowBar = color('38;5;244', '░'.repeat(barWidth), enabled) - ratioStr = color('38;5;244', '–', enabled) - } - lines.push( - ` ${color('1', padRight(label, 14), enabled)} ${color('1;33', padLeft(callsStr, 8), enabled)} ` - + `${color('1;33', padLeft(savedStr, 14), enabled)} ${rowBar} ${ratioStr}`, - ) - } - - const callTypeEntries = Object.entries(summary.callTypeCounts) - if (verbose && callTypeEntries.length > 0) { - lines.push( - '', - ` ${color('1', 'By Call Type', enabled)}`, - lightLine, - ` ${padRight('#', 4)} ${padRight('Call type', 16)} ${padLeft('Calls', 8)} Share`, - lightLine, - ) - const total = callTypeEntries.reduce((sum, [, count]) => sum + count, 0) - // Sort by call count descending; ties keep insertion order. - const sorted = [...callTypeEntries].sort(([, a], [, b]) => b - a) - sorted.forEach(([callType, count], i) => { - const share = total > 0 ? count / total : 0 - const filled = Math.max(1, Math.round(share * 16)) - const bar = color('32', '█'.repeat(filled), enabled) + color('38;5;244', '░'.repeat(16 - filled), enabled) - const rank = `${i + 1}.` - lines.push( - ` ${color('38;5;244', padRight(rank, 4), enabled)} ${padRight(callType, 16)} ` - + `${color('1;33', padLeft(formatCalls(count), 8), enabled)} ${bar} ` - + `${color('38;5;244', padLeft(`${Math.round(share * 100)}%`, 4), enabled)}`, - ) - }) - } - lines.push(heavyLine) - lines.push('') - return lines.join('\n') -} diff --git a/src/tokens.test.ts b/src/tokens.test.ts deleted file mode 100644 index 840db9c..0000000 --- a/src/tokens.test.ts +++ /dev/null @@ -1,107 +0,0 @@ -// Port of src/semble/tokens.py tests - -import { describe, expect, it } from 'bun:test' -import { splitIdentifier, tokenize } from './tokens.ts' - -describe('splitIdentifier', () => { - it('splits PascalCase identifiers', () => { - expect(splitIdentifier('HandlerStack')).toEqual([ - 'handlerstack', - 'handler', - 'stack', - ]) - }) - - it('preserves runs of capitals as a single sub-token', () => { - expect(splitIdentifier('getHTTPResponse')).toEqual([ - 'gethttpresponse', - 'get', - 'http', - 'response', - ]) - }) - - it('handles leading run of capitals', () => { - expect(splitIdentifier('XMLParser')).toEqual([ - 'xmlparser', - 'xml', - 'parser', - ]) - }) - - it('splits snake_case identifiers', () => { - expect(splitIdentifier('my_func')).toEqual(['my_func', 'my', 'func']) - }) - - it('returns only the lowered token when there is no boundary', () => { - expect(splitIdentifier('simple')).toEqual(['simple']) - }) - - it('lowercases an already lower-case token', () => { - expect(splitIdentifier('Already')).toEqual(['already']) - }) - - it('keeps consecutive underscores from collapsing into duplicate parts', () => { - // Python `split('_')` produces empty strings between consecutive - // underscores; the upstream filter drops them. - expect(splitIdentifier('foo__bar')).toEqual(['foo__bar', 'foo', 'bar']) - }) - - it('treats a leading underscore as snake_case with one effective part', () => { - // `_foo`.split('_') === ['', 'foo'] -> filtered to ['foo'] -> len < 2 - expect(splitIdentifier('_foo')).toEqual(['_foo']) - }) - - it('splits digit runs as their own camel sub-token', () => { - expect(splitIdentifier('abc123Def')).toEqual([ - 'abc123def', - 'abc', - '123', - 'def', - ]) - }) - - it('splits kebab-case and dotted path stems on `-`/`.` separators', () => { - // `splitIdentifier` is also called on raw file-path stems (e.g. in - // ranking/boosting.ts). The camel regex treats `-`/`.` as separators, so - // the lowercase fast-path must not short-circuit these. - expect(splitIdentifier('user-service')).toEqual([ - 'user-service', - 'user', - 'service', - ]) - expect(splitIdentifier('foo.bar')).toEqual(['foo.bar', 'foo', 'bar']) - }) -}) - -describe('tokenize', () => { - it('splits plain space-separated words', () => { - expect(tokenize('foo bar baz')).toEqual(['foo', 'bar', 'baz']) - }) - - it('expands compound identifiers and drops non-identifier digits', () => { - // Numbers that do not start an identifier (e.g. "123") are not matched by - // TOKEN_RE, which mirrors the upstream Python behaviour. - expect(tokenize('camelCase_snake_case 123')).toEqual([ - 'camelcase_snake_case', - 'camelcase', - 'snake', - 'case', - ]) - }) - - it('returns an empty array for input with no identifiers', () => { - expect(tokenize(' !!! 123 ???')).toEqual([]) - }) - - it('preserves multiple identifiers and expands each', () => { - expect(tokenize('HandlerStack my_func')).toEqual([ - 'handlerstack', - 'handler', - 'stack', - 'my_func', - 'my', - 'func', - ]) - }) -}) diff --git a/src/tokens.ts b/src/tokens.ts deleted file mode 100644 index a0f7024..0000000 --- a/src/tokens.ts +++ /dev/null @@ -1,61 +0,0 @@ -// Port of src/semble/tokens.py - -const TOKEN_RE = /[a-z_]\w*/gi - -// Split on camelCase/PascalCase boundaries: -// "HandlerStack" -> ["Handler", "Stack"] -// "getHTTPResponse" -> ["get", "HTTP", "Response"] -// "XMLParser" -> ["XML", "Parser"] -const CAMEL_RE = /[A-Z]+(?=[A-Z][a-z])|[A-Z]?[a-z]+|[A-Z]+|\d+/g - -/** - * Split a single identifier into sub-tokens via camelCase/snake_case. - * - * Returns the original token (lowered) plus any sub-tokens. - * E.g. "HandlerStack" -> ["handlerstack", "handler", "stack"] - * "my_func" -> ["my_func", "my", "func"] - * "simple" -> ["simple"] - */ -export function splitIdentifier(token: string): string[] { - const lower = token.toLowerCase() - - // Fast-path: a token made up solely of lowercase ASCII letters cannot split - // further, since `CAMEL_RE` would match it as a single run. This guard is - // intentionally narrow — `splitIdentifier` is also called on raw path stems - // (e.g. "user-service", "foo.bar"), and `CAMEL_RE` treats `-`/`.` as - // separators, so those must fall through to the splitting logic below. - if (/^[a-z]+$/.test(token)) { - return [lower] - } - - let parts: string[] - - if (token.includes('_')) { - // snake_case splitting - parts = lower.split('_').filter(p => p.length > 0) - } - else { - // camelCase / PascalCase splitting - parts = Array.from(token.matchAll(CAMEL_RE), ([m]) => m.toLowerCase()) - } - - if (parts.length >= 2) { - return [lower, ...parts] - } - return [lower] -} - -/** - * Split text into lowercase identifier-like tokens for BM25 indexing. - * - * Compound identifiers (camelCase, PascalCase, snake_case) are expanded - * into sub-tokens so that partial matches work. The original compound - * token is preserved for exact-match boosting. - */ -export function tokenize(text: string): string[] { - const result: string[] = [] - for (const [match] of text.matchAll(TOKEN_RE)) { - result.push(...splitIdentifier(match)) - } - return result -} diff --git a/src/types.test.ts b/src/types.test.ts deleted file mode 100644 index a0328f3..0000000 --- a/src/types.test.ts +++ /dev/null @@ -1,211 +0,0 @@ -// Tests for src/types.ts — port-parity with src/semble/tests/test_types.py. - -import type { Chunk, ChunkDictInput } from './types' -import { describe, expect, test } from 'bun:test' -import { - CallType, - - chunkFromDict, - chunkLocation, - chunkToDict, - ContentType, - searchResultToDict, -} from './types' - -describe('ContentType', () => { - test('enum values match the Python str enum', () => { - expect(ContentType.CODE).toBe('code') - expect(ContentType.DOCS).toBe('docs') - expect(ContentType.CONFIG).toBe('config') - }) -}) - -describe('CallType', () => { - test('enum values match the Python str enum', () => { - expect(CallType.SEARCH).toBe('search') - // Python uses `find_related` (snake_case) — telemetry compatibility. - expect(CallType.FIND_RELATED).toBe('find_related') - }) -}) - -describe('chunkLocation', () => { - test('formats as filePath:startLine-endLine', () => { - const chunk: Chunk = { - content: 'x = 1', - filePath: 'file.ts', - startLine: 10, - endLine: 25, - } - expect(chunkLocation(chunk)).toBe('file.ts:10-25') - }) - - test('handles single-line chunks', () => { - const chunk: Chunk = { - content: 'x = 1', - filePath: 'src/a.py', - startLine: 5, - endLine: 5, - } - expect(chunkLocation(chunk)).toBe('src/a.py:5-5') - }) -}) - -describe('chunkToDict / chunkFromDict roundtrip', () => { - test('preserves all fields with language set', () => { - const original: Chunk = { - content: 'function foo() {}', - filePath: 'src/foo.ts', - startLine: 1, - endLine: 3, - language: 'typescript', - } - const dict = chunkToDict(original) - expect(dict).toEqual({ - content: 'function foo() {}', - filePath: 'src/foo.ts', - startLine: 1, - endLine: 3, - language: 'typescript', - location: 'src/foo.ts:1-3', - }) - const reconstructed = chunkFromDict(dict) - expect(reconstructed).toEqual(original) - }) - - test('preserves all fields with language omitted (undefined)', () => { - const original: Chunk = { - content: 'README content', - filePath: 'README.md', - startLine: 1, - endLine: 10, - } - const dict = chunkToDict(original) - // Python `asdict` emits `None`; we emit `null` to match wire format. - expect(dict.language).toBeNull() - expect(dict.location).toBe('README.md:1-10') - - const reconstructed = chunkFromDict(dict) - expect(reconstructed).toEqual(original) - expect(reconstructed.language).toBeUndefined() - }) - - test('chunkFromDict strips location before reconstruction', () => { - // A malformed `location` must not desync the reconstructed Chunk. - const reconstructed = chunkFromDict({ - content: 'x', - filePath: 'a.ts', - startLine: 1, - endLine: 2, - language: 'ts', - location: 'WRONG:999-999', - }) - // The derived location is recomputed from the line range — never trusted. - expect(chunkLocation(reconstructed)).toBe('a.ts:1-2') - }) - - test('chunkFromDict accepts null language (wire format)', () => { - const reconstructed = chunkFromDict({ - content: 'x', - filePath: 'a.ts', - startLine: 1, - endLine: 2, - language: null, - }) - expect(reconstructed.language).toBeUndefined() - }) - - test('chunkFromDict throws on null or non-object input', () => { - // The compile-time `ChunkDictInput` doesn't reach untrusted JSON callers, - // so the runtime guard must catch these before they pollute the index. - expect(() => chunkFromDict(null as unknown as ChunkDictInput)).toThrow(TypeError) - expect(() => chunkFromDict(undefined as unknown as ChunkDictInput)).toThrow(TypeError) - expect(() => chunkFromDict('oops' as unknown as ChunkDictInput)).toThrow(TypeError) - expect(() => chunkFromDict(42 as unknown as ChunkDictInput)).toThrow(TypeError) - }) - - test('chunkFromDict throws on missing or wrong-typed required fields', () => { - expect(() => chunkFromDict({} as unknown as ChunkDictInput)).toThrow(TypeError) - expect(() => - chunkFromDict({ content: 'x', filePath: 'a.ts', startLine: 1 } as unknown as ChunkDictInput), - ).toThrow(TypeError) - expect(() => - chunkFromDict({ - content: 'x', - filePath: 'a.ts', - startLine: '1', - endLine: 2, - } as unknown as ChunkDictInput), - ).toThrow(TypeError) - expect(() => - chunkFromDict({ - content: 'x', - filePath: 42, - startLine: 1, - endLine: 2, - } as unknown as ChunkDictInput), - ).toThrow(TypeError) - }) - - test('chunkFromDict throws on NaN or non-finite startLine/endLine', () => { - expect(() => - chunkFromDict({ - content: 'x', - filePath: 'a.ts', - startLine: Number.NaN, - endLine: 2, - }), - ).toThrow(TypeError) - expect(() => - chunkFromDict({ - content: 'x', - filePath: 'a.ts', - startLine: 1, - endLine: Number.POSITIVE_INFINITY, - }), - ).toThrow(TypeError) - expect(() => - chunkFromDict({ - content: 'x', - filePath: 'a.ts', - startLine: Number.NEGATIVE_INFINITY, - endLine: 2, - }), - ).toThrow(TypeError) - }) - - test('chunkFromDict throws when language has the wrong type', () => { - expect(() => - chunkFromDict({ - content: 'x', - filePath: 'a.ts', - startLine: 1, - endLine: 2, - language: 42, - } as unknown as ChunkDictInput), - ).toThrow(TypeError) - }) -}) - -describe('searchResultToDict', () => { - test('serialises chunk and score', () => { - const chunk: Chunk = { - content: 'def foo():\n pass', - filePath: 'foo.py', - startLine: 1, - endLine: 2, - language: 'python', - } - const result = { chunk, score: 0.87 } - expect(searchResultToDict(result)).toEqual({ - chunk: { - content: 'def foo():\n pass', - filePath: 'foo.py', - startLine: 1, - endLine: 2, - language: 'python', - location: 'foo.py:1-2', - }, - score: 0.87, - }) - }) -}) diff --git a/src/types.ts b/src/types.ts deleted file mode 100644 index 9138704..0000000 --- a/src/types.ts +++ /dev/null @@ -1,146 +0,0 @@ -// Port of src/semble/types.py - -/** Call type for token-savings tracking. */ -export enum CallType { - SEARCH = 'search', - FIND_RELATED = 'find_related', -} - -/** Content type for indexing and search pipeline selection. */ -export enum ContentType { - CODE = 'code', - DOCS = 'docs', - CONFIG = 'config', -} - -/** A single indexable unit of code. */ -export interface Chunk { - content: string - filePath: string - startLine: number - endLine: number - language?: string | null -} - -/** A single search result with score and source. */ -export interface SearchResult { - chunk: Chunk - score: number - toDict: () => Record -} - -/** Statistics about the current index state. */ -export interface IndexStats { - indexedFiles: number - totalChunks: number - languages: Record -} - -// --------------------------------------------------------------------------- -// Canonical camelCase round-trip serialization -// --------------------------------------------------------------------------- -// -// These helpers are the on-disk / round-trip representation of a `Chunk`: -// camelCase field names (matching the in-memory `Chunk`) plus a derived -// `location`. They are intentionally *separate* from `search.ts`'s wire-format -// `SearchResult.toDict` (snake_case, for CLI/MCP JSON output) — the two -// serializations have different audiences and must not be conflated. - -/** A chunk serialized to a plain camelCase dict (e.g. for `chunks.json`). */ -export interface ChunkDict { - content: string - filePath: string - startLine: number - endLine: number - language: string | null - location: string -} - -/** - * Input accepted by {@link chunkFromDict}. Mirrors {@link ChunkDict} but the - * derived `location` is optional (and ignored on reconstruction). - */ -export interface ChunkDictInput { - content: string - filePath: string - startLine: number - endLine: number - language?: string | null - location?: string -} - -/** Format a chunk's source location as `filePath:startLine-endLine`. */ -export function chunkLocation(chunk: Chunk): string { - return `${chunk.filePath}:${chunk.startLine}-${chunk.endLine}` -} - -/** - * Serialize a {@link Chunk} to a camelCase {@link ChunkDict}. `language` is - * normalized to `null` when absent (matching Python `asdict`'s `None`), and a - * derived `location` is appended. - */ -export function chunkToDict(chunk: Chunk): ChunkDict { - return { - content: chunk.content, - filePath: chunk.filePath, - startLine: chunk.startLine, - endLine: chunk.endLine, - language: chunk.language ?? null, - location: chunkLocation(chunk), - } -} - -function isFiniteNumber(value: unknown): value is number { - return typeof value === 'number' && Number.isFinite(value) -} - -/** - * Reconstruct a {@link Chunk} from a {@link ChunkDictInput}. The derived - * `location` is stripped (never trusted — it is recomputed from the line - * range), `null` language collapses to `undefined`, and malformed input throws - * a `TypeError` so corrupt JSON can't pollute the index. - */ -export function chunkFromDict(dict: ChunkDictInput): Chunk { - if (dict === null || typeof dict !== 'object') { - throw new TypeError('chunkFromDict: expected an object') - } - - const { content, filePath, startLine, endLine, language } = dict as unknown as Record - - if (typeof content !== 'string') { - throw new TypeError('chunkFromDict: `content` must be a string') - } - if (typeof filePath !== 'string') { - throw new TypeError('chunkFromDict: `filePath` must be a string') - } - if (!isFiniteNumber(startLine)) { - throw new TypeError('chunkFromDict: `startLine` must be a finite number') - } - if (!isFiniteNumber(endLine)) { - throw new TypeError('chunkFromDict: `endLine` must be a finite number') - } - if (language !== undefined && language !== null && typeof language !== 'string') { - throw new TypeError('chunkFromDict: `language` must be a string, null, or omitted') - } - - const chunk: Chunk = { content, filePath, startLine, endLine } - if (typeof language === 'string') { - chunk.language = language - } - return chunk -} - -/** - * Serialize a search result to a camelCase dict, embedding the camelCase - * {@link ChunkDict}. Counterpart to {@link chunkToDict} for results. Accepts - * the structural `{ chunk, score }` subset so it does not require the - * wire-format `toDict` closure carried by full {@link SearchResult} values. - */ -export function searchResultToDict( - result: { chunk: Chunk, score: number }, -): { chunk: ChunkDict, score: number } { - return { - chunk: chunkToDict(result.chunk), - score: result.score, - } -} diff --git a/src/utils.test.ts b/src/utils.test.ts deleted file mode 100644 index 03cdc84..0000000 --- a/src/utils.test.ts +++ /dev/null @@ -1,164 +0,0 @@ -import type { Chunk, SearchResult } from './types.ts' -// Port of src/semble/utils.py tests -import { describe, expect, it } from 'bun:test' -import { formatResults, isGitUrl, resolveChunk } from './utils.ts' - -function makeChunk(overrides: Partial = {}): Chunk { - return { - content: 'x', - filePath: 'a.ts', - startLine: 1, - endLine: 10, - ...overrides, - } -} - -describe('isGitUrl', () => { - it('returns true for https URLs', () => { - expect(isGitUrl('https://github.com/foo/bar')).toBe(true) - }) - - it('returns true for http URLs', () => { - expect(isGitUrl('http://example.com/foo/bar.git')).toBe(true) - }) - - it('returns true for ssh:// URLs', () => { - expect(isGitUrl('ssh://git@github.com/foo/bar.git')).toBe(true) - }) - - it('returns true for git:// URLs', () => { - expect(isGitUrl('git://github.com/foo/bar.git')).toBe(true) - }) - - it('returns true for git+ssh:// URLs', () => { - expect(isGitUrl('git+ssh://git@github.com/foo/bar.git')).toBe(true) - }) - - it('returns true for file:// URLs', () => { - expect(isGitUrl('file:///path/to/repo')).toBe(true) - }) - - it('returns true for scp-style git URLs', () => { - expect(isGitUrl('git@github.com:foo/bar.git')).toBe(true) - }) - - it('returns true for scp-style git URLs with dots/dashes', () => { - expect(isGitUrl('git-user.1@my-host.example.com:foo/bar')).toBe(true) - }) - - it('returns false for relative local paths', () => { - expect(isGitUrl('./local/path')).toBe(false) - }) - - it('returns false for absolute local paths', () => { - expect(isGitUrl('/abs/path')).toBe(false) - }) - - it('returns false for bare names', () => { - expect(isGitUrl('some-repo')).toBe(false) - }) - - it('returns false for scp-like input with a slash after the colon (treated as path)', () => { - // user@host:/abs/path is ambiguous; semble's regex excludes it via (?!/). - expect(isGitUrl('user@host:/abs/path')).toBe(false) - }) - - it('returns false for empty string', () => { - expect(isGitUrl('')).toBe(false) - }) -}) - -describe('resolveChunk', () => { - it('returns the inner chunk when line is at the boundary between adjacent chunks', () => { - // chunkA covers 1..10, chunkB covers 10..20. line=10 belongs strictly inside chunkB. - const chunkA = makeChunk({ startLine: 1, endLine: 10, content: 'A' }) - const chunkB = makeChunk({ startLine: 10, endLine: 20, content: 'B' }) - const result = resolveChunk([chunkA, chunkB], 'a.ts', 10) - expect(result).toBe(chunkB) - }) - - it('returns the chunk when line is on its endLine and no inner match exists (fallback)', () => { - const chunkA = makeChunk({ startLine: 1, endLine: 10, content: 'A' }) - const result = resolveChunk([chunkA], 'a.ts', 10) - expect(result).toBe(chunkA) - }) - - it('returns the chunk when line is strictly inside it', () => { - const chunkA = makeChunk({ startLine: 1, endLine: 10, content: 'A' }) - expect(resolveChunk([chunkA], 'a.ts', 5)).toBe(chunkA) - }) - - it('returns the chunk when line equals startLine (strict inner match)', () => { - const chunkA = makeChunk({ startLine: 1, endLine: 10, content: 'A' }) - expect(resolveChunk([chunkA], 'a.ts', 1)).toBe(chunkA) - }) - - it('returns null when line is outside any chunk', () => { - const chunkA = makeChunk({ startLine: 1, endLine: 10, content: 'A' }) - expect(resolveChunk([chunkA], 'a.ts', 11)).toBeNull() - }) - - it('returns null when filePath does not match', () => { - const chunkA = makeChunk({ startLine: 1, endLine: 10, filePath: 'a.ts' }) - expect(resolveChunk([chunkA], 'b.ts', 5)).toBeNull() - }) - - it('returns null for empty chunk list', () => { - expect(resolveChunk([], 'a.ts', 1)).toBeNull() - }) - - it('ignores chunks from other files when matching', () => { - const other = makeChunk({ startLine: 1, endLine: 10, filePath: 'b.ts', content: 'B' }) - const wanted = makeChunk({ startLine: 1, endLine: 10, filePath: 'a.ts', content: 'A' }) - expect(resolveChunk([other, wanted], 'a.ts', 5)).toBe(wanted) - }) - - it('keeps the first fallback when no strict inner match is found across multiple end-boundary candidates', () => { - // Two contiguous end-only matches; the first one wins as the fallback. - const c1 = makeChunk({ startLine: 1, endLine: 10, content: 'c1' }) - const c2 = makeChunk({ startLine: 10, endLine: 10, content: 'c2' }) - expect(resolveChunk([c1, c2], 'a.ts', 10)).toBe(c1) - }) -}) - -describe('formatResults', () => { - it('returns the expected shape', () => { - const chunkDict = { - content: 'x', - file_path: 'a.ts', - start_line: 1, - end_line: 5, - language: null, - location: 'a.ts:1-5', - } - const result: SearchResult = { - chunk: makeChunk({ startLine: 1, endLine: 5 }), - score: 0.42, - toDict: () => ({ chunk: chunkDict, score: 0.42 }), - } - const out = formatResults('hello', [result]) - expect(out).toEqual({ - query: 'hello', - results: [{ chunk: chunkDict, score: 0.42 }], - }) - }) - - it('handles empty results', () => { - expect(formatResults('q', [])).toEqual({ query: 'q', results: [] }) - }) - - it('preserves order of results', () => { - const r1: SearchResult = { - chunk: makeChunk(), - score: 1, - toDict: () => ({ tag: 'first' }), - } - const r2: SearchResult = { - chunk: makeChunk(), - score: 0.5, - toDict: () => ({ tag: 'second' }), - } - const out = formatResults('q', [r1, r2]) - expect(out.results).toEqual([{ tag: 'first' }, { tag: 'second' }]) - }) -}) diff --git a/src/utils.ts b/src/utils.ts deleted file mode 100644 index cbc22d9..0000000 --- a/src/utils.ts +++ /dev/null @@ -1,66 +0,0 @@ -// Port of src/semble/utils.py - -import type { Chunk, SearchResult } from './types.ts' - -const GIT_URL_SCHEMES = [ - 'https://', - 'http://', - 'ssh://', - 'git://', - 'git+ssh://', - 'file://', -] as const - -// scp-style git URL, e.g. `user@host:repo` (but not `user@host:/abs/path`). -const SCP_GIT_URL_RE = /^[\w.-]+@[\w.-]+:(?!\/)/ - -/** Return true if path looks like a remote git URL rather than a local path. */ -export function isGitUrl(path: string): boolean { - for (const scheme of GIT_URL_SCHEMES) { - if (path.startsWith(scheme)) { - return true - } - } - return SCP_GIT_URL_RE.test(path) -} - -/** - * Return the chunk containing `line` in `filePath`, or null. - * - * Mirrors semble.utils.resolve_chunk: a strict inner match (`line < endLine`) - * wins immediately; a boundary match (`line === endLine`) is kept only as a - * fallback so end-of-file lines still resolve. - */ -export function resolveChunk( - chunks: Chunk[], - filePath: string, - line: number, -): Chunk | null { - let fallback: Chunk | null = null - for (const chunk of chunks) { - if ( - chunk.filePath === filePath - && chunk.startLine <= line - && line <= chunk.endLine - ) { - if (line < chunk.endLine) { - return chunk - } - if (fallback === null) { - fallback = chunk - } - } - } - return fallback -} - -/** Render SearchResult objects as a JSONable object. */ -export function formatResults( - query: string, - results: SearchResult[], -): { query: string, results: Record[] } { - return { - query, - results: results.map(r => r.toDict()), - } -} diff --git a/src/version.ts b/src/version.ts deleted file mode 100644 index 7350a5e..0000000 --- a/src/version.ts +++ /dev/null @@ -1,11 +0,0 @@ -// Port of src/semble/version.py. -// -// The Python upstream stores a triple (`(0, 2, 0)`) and joins it for the -// string form. Here we expose a single literal because: -// * `package.json#version` is the source of truth for npm publishing. -// * Bun/tsdown don't read Python-style triples; reconstructing one would -// just be dead code. -// Kept in sync with `package.json#version` by release-please via the -// `x-release-please-version` annotation below (see release-please-config.json -// `extra-files`). -export const version = '0.1.4' // x-release-please-version diff --git a/tsconfig.json b/tsconfig.json index 7de76d0..1e2e1e0 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -21,6 +21,6 @@ "verbatimModuleSyntax": true, "skipLibCheck": true }, - "include": ["src/**/*", "eslint.config.ts", "tsdown.config.ts"], + "include": ["eslint.config.ts"], "exclude": ["node_modules", "dist"] } diff --git a/tsdown.config.ts b/tsdown.config.ts deleted file mode 100644 index 1811be7..0000000 --- a/tsdown.config.ts +++ /dev/null @@ -1,9 +0,0 @@ -import { defineConfig } from 'tsdown' - -export default defineConfig({ - entry: ['src/index.ts', 'src/cli.ts'], - format: 'esm', - dts: true, - clean: true, - unbundle: true, -})