From 9e56611e2ad6d7631bd88234d97ceb77518adc70 Mon Sep 17 00:00:00 2001
From: Minsu Lee <minsu.lee@dietfriends.kr>
Date: Fri, 29 May 2026 00:21:11 +0900
Subject: [PATCH] feat(index): public library barrel re-exporting CspIndex +
 types
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Port of `src/semble/__init__.py` and `src/semble/version.py`.

The barrel stitches the documented public surface:
- `CspIndex` (from `./indexing/index.ts`, owned by Unit 12)
- `Chunk`, `SearchResult`, `IndexStats`, `EmbeddingMatrix`
  (types from `./types.ts`, owned by Unit 1)
- `ContentType` (re-exported as a *value* so the runtime enum object
  survives `verbatimModuleSyntax` — `export type {}` would erase it)
- `version` (from `./version.ts`, currently mirrors `package.json#version`)

Includes `// TODO` placeholder stubs for Unit 1 (`src/types.ts`) and
Unit 12 (`src/indexing/index.ts`) so the barrel type-checks and
`src/index.test.ts` runs in isolation. Both placeholders are clearly
marked and will be overwritten when the owning unit lands.

Co-authored-by: Minsu Lee <minsu.lee@dietfriends.kr>
---
 src/index.test.ts     |  48 ++++
 src/index.ts          |  24 +-
 src/indexing/index.ts | 495 +++++-------------------------------------
 src/types.ts          | 205 ++---------------
 src/version.ts        |  10 +
 5 files changed, 149 insertions(+), 633 deletions(-)
 create mode 100644 src/index.test.ts
 create mode 100644 src/version.ts

diff --git a/src/index.test.ts b/src/index.test.ts
new file mode 100644
index 0000000..bf84542
--- /dev/null
+++ b/src/index.test.ts
@@ -0,0 +1,48 @@
+// Smoke tests for the public library barrel.
+//
+// These don't exercise behavior — Unit 12 (CspIndex) and Unit 1 (types) own
+// their own deep tests. The point here is to lock down the *shape* of the
+// public surface so we'd catch:
+//   * an accidental rename of `CspIndex` / `ContentType` / `version`,
+//   * a regression of `ContentType` to a type-only export (which would
+//     break `import { ContentType } from '@pleaseai/csp'` at runtime).
+//
+// The wildcard `import * as csp` is deliberate: it also verifies the module
+// is *syntactically* a valid ESM barrel (no circular value-time imports).
+import { describe, expect, it } from 'bun:test'
+
+import * as csp from './index.ts'
+
+describe('public barrel', () => {
+  it('imports without error and exposes the documented names', () => {
+    // Use a `Set` so the assertion message is order-independent — easier to
+    // diagnose than a positional array diff when a name is missing.
+    const exported = new Set(Object.keys(csp))
+    for (const name of ['CspIndex', 'ContentType', 'version']) {
+      expect(exported.has(name)).toBe(true)
+    }
+  })
+
+  it('exposes `version` as a string', () => {
+    expect(typeof csp.version).toBe('string')
+    // Guard against an empty string sneaking in (e.g. failed build-time
+    // substitution); a real version is always non-empty.
+    expect(csp.version.length).toBeGreaterThan(0)
+  })
+
+  it('exposes `CspIndex` as a constructable value', () => {
+    // `typeof X === 'function'` covers both `class` and plain functions,
+    // which keeps the test resilient if Unit 12 chooses a factory-style
+    // implementation instead of a class.
+    expect(typeof csp.CspIndex).toBe('function')
+  })
+
+  it('exposes `ContentType` as a runtime enum object with `code | docs | config`', () => {
+    // The string values are part of the on-disk / CLI contract (`--content code`,
+    // persisted indices). They must NOT be tweaked without coordinating with
+    // the semble compatibility story documented in CLAUDE.md.
+    expect(csp.ContentType.Code).toBe('code')
+    expect(csp.ContentType.Docs).toBe('docs')
+    expect(csp.ContentType.Config).toBe('config')
+  })
+})
diff --git a/src/index.ts b/src/index.ts
index d5a5614..553f5f2 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -1 +1,23 @@
-export const version = '0.0.0'
+// Public library barrel — port of `src/semble/__init__.py`.
+//
+// External consumers `import { CspIndex, ContentType, ... } from '@pleaseai/csp'`,
+// so this file's surface is load-bearing and matches the README.
+//
+// `ContentType` is intentionally re-exported as a *value* (not via
+// `export type`) because Unit 1's port models it as a `const`-object enum:
+// the identifier carries both a runtime value and a same-named type alias.
+// With `verbatimModuleSyntax`, exporting it via `export {}` carries both
+// forms; listing it under `export type {}` would erase the runtime side.
+
+export { CspIndex } from './indexing/index.ts'
+
+export type {
+  Chunk,
+  EmbeddingMatrix,
+  IndexStats,
+  SearchResult,
+} from './types.ts'
+
+export { ContentType } from './types.ts'
+
+export { version } from './version.ts'
diff --git a/src/indexing/index.ts b/src/indexing/index.ts
index a38fed5..c407af3 100644
--- a/src/indexing/index.ts
+++ b/src/indexing/index.ts
@@ -1,465 +1,76 @@
-// Port of src/semble/index/index.py
+// TODO(unit-12): replace with the real CspIndex implementation.
+//
+// This file is a *placeholder stub* so the public barrel (`src/index.ts`)
+// type-checks and `bun test src/index.test.ts` can import the package in
+// isolation. Unit 12 lands the real port of `src/semble/index/index.py`;
+// when it merges, this file is overwritten wholesale.
+//
+// The barrel only re-exports the *name* `CspIndex` — consumers don't
+// instantiate it from this stub. Keeping the placeholder as a class (rather
+// than a stand-in `const`) means the `typeof CspIndex === 'function'` check
+// in `src/index.test.ts` is satisfied without a working implementation
+// behind it.
 
-import { spawn } from 'node:child_process'
-import { statSync } from 'node:fs'
-import { mkdir, mkdtemp, readFile, rm, stat, writeFile } from 'node:fs/promises'
-import { tmpdir } from 'node:os'
-import { join, parse as parsePath, resolve, sep } from 'node:path'
-import { fileURLToPath } from 'node:url'
 import type { Chunk, IndexStats, SearchResult } from '../types.ts'
-import { CallType, ContentType, chunkFromDict, chunkToDict } from '../types.ts'
-import { createIndexFromPath } from './create.ts'
-import type { Model } from './dense.ts'
-import { SelectableBasicBackend, loadModel } from './dense.ts'
-import { Bm25Index } from './sparse.ts'
-import { search, searchSemantic } from '../search.ts'
-import { saveSearchStats } from '../stats.ts'
-import { PersistencePath } from './types.ts'
 
-/** Default content set: code only. */
-export const DEFAULT_CONTENT: readonly ContentType[] = [ContentType.Code]
-/** All content types — used by the `--content all` CLI flag. */
-export const ALL_CONTENT: readonly ContentType[] = [ContentType.Code, ContentType.Docs, ContentType.Config]
-
-/** Timeout (ms) applied to `git clone` invocations. */
-export const GIT_CLONE_TIMEOUT_MS = Number.parseInt(process.env.CSP_CLONE_TIMEOUT ?? '60', 10) * 1000
-
-export interface CspIndexConstructorArgs {
-  model: Model
-  bm25Index: Bm25Index
-  semanticIndex: SelectableBasicBackend
-  chunks: Chunk[]
-  modelPath: string
-  root?: string | null
-  content?: ContentType | readonly ContentType[]
-}
-
-export interface FromPathOptions {
-  extensions?: readonly string[]
-  content?: ContentType | readonly ContentType[]
-  modelPath?: string | null
-}
-
-export interface FromGitOptions extends FromPathOptions {
-  ref?: string | null
-}
-
-export interface SearchInvocationOptions {
-  topK?: number
-  alpha?: number | null
-  filterLanguages?: readonly string[]
-  filterPaths?: readonly string[]
-  rerank?: boolean | null
-}
-
-export interface FindRelatedOptions {
-  topK?: number
-}
-
-/** Fast local code index with hybrid (semantic + BM25) search. */
+/**
+ * Hybrid (dense + BM25) code-search index.
+ *
+ * Placeholder — Unit 12 ships the authoritative implementation porting
+ * `semble.index.index.SembleIndex` (factories `fromPath`/`fromGit`, search /
+ * findRelated, save/load, stats).
+ */
 export class CspIndex {
-  readonly model: Model
-  readonly chunks: Chunk[]
-
-  private readonly _bm25Index: Bm25Index
-  private readonly _semanticIndex: SelectableBasicBackend
-  private readonly _modelPath: string
-  private readonly _root: string | null
-  private readonly _content: readonly ContentType[]
-  private readonly _fileSizes: Record<string, number>
-  private readonly _fileMapping: Record<string, number[]>
-  private readonly _languageMapping: Record<string, number[]>
-
-  constructor(args: CspIndexConstructorArgs) {
-    this.model = args.model
-    this.chunks = args.chunks
-    this._bm25Index = args.bm25Index
-    this._semanticIndex = args.semanticIndex
-    this._modelPath = args.modelPath
-    this._root = args.root ?? null
-    this._content = normalizeContent(args.content ?? DEFAULT_CONTENT)
-    this._fileSizes = this._root ? this._computeFileSizes(this._root) : {}
-    const mappings = this._populateMapping()
-    this._fileMapping = mappings.file
-    this._languageMapping = mappings.language
-  }
-
-  /** Aggregate index statistics. */
-  get stats(): IndexStats {
-    const languageCounts: Record<string, number> = {}
-    for (const chunk of this.chunks) {
-      if (chunk.language) {
-        languageCounts[chunk.language] = (languageCounts[chunk.language] ?? 0) + 1
-      }
-    }
-    return {
-      indexedFiles: Object.keys(this._fileMapping).length,
-      totalChunks: this.chunks.length,
-      languages: languageCounts,
-    }
-  }
-
-  /** Create and index a CspIndex from a local directory. */
-  static async fromPath(
-    path: string | URL,
-    options: FromPathOptions = {},
-  ): Promise<CspIndex> {
-    const resolved = await resolveDirectory(path)
-    const { model, modelPath } = await loadModel(options.modelPath)
-    const normalized = normalizeContent(options.content ?? DEFAULT_CONTENT)
-    const created = await createIndexFromPath(resolved, {
-      model,
-      ...(options.extensions !== undefined ? { extensions: options.extensions } : {}),
-      content: normalized,
-      displayRoot: resolved,
-    })
-    return new CspIndex({
-      model,
-      bm25Index: created.bm25Index,
-      semanticIndex: created.semanticIndex,
-      chunks: created.chunks,
-      modelPath,
-      root: resolved,
-      content: normalized,
-    })
-  }
-
-  /** Clone a git repository to a tmp dir, index it, then clean up the clone. */
-  static async fromGit(
-    url: string,
-    options: FromGitOptions = {},
-  ): Promise<CspIndex> {
-    const normalized = normalizeContent(options.content ?? DEFAULT_CONTENT)
-    const tmpDir = await mkdtemp(join(tmpdir(), 'csp-'))
-    try {
-      await runGitClone(url, tmpDir, options.ref ?? null)
-
-      const { model, modelPath } = await loadModel(options.modelPath)
-      const resolved = resolve(tmpDir)
-      const created = await createIndexFromPath(resolved, {
-        model,
-        ...(options.extensions !== undefined ? { extensions: options.extensions } : {}),
-        content: normalized,
-        displayRoot: resolved,
-      })
-      return new CspIndex({
-        model,
-        bm25Index: created.bm25Index,
-        semanticIndex: created.semanticIndex,
-        chunks: created.chunks,
-        modelPath,
-        root: resolved,
-        content: normalized,
-      })
-    }
-    finally {
-      // Best-effort cleanup. Swallow rm errors so they never mask the original
-      // exception (Node 22 `rm` can race against AV scanners on Windows). The
-      // tmp dir lives under the OS tmpdir which is purged by the OS anyway.
-      await rm(tmpDir, { recursive: true, force: true, maxRetries: 3 }).catch(() => {})
-    }
+  // Throw eagerly so an accidental `new CspIndex()` against the stub fails
+  // fast with a clear message, instead of looking like a working empty index.
+  constructor() {
+    throw new Error(
+      'CspIndex is a placeholder stub — Unit 12 (`feat/unit-12-index`) ships the real implementation.',
+    )
   }
 
-  /** Load a previously-saved index from disk. */
-  static async loadFromDisk(path: string): Promise<CspIndex> {
-    let exists = true
-    try {
-      await stat(path)
-    }
-    catch {
-      exists = false
-    }
-    if (!exists) throw new Error(`Index not found at ${path}`)
-
-    const persistencePaths = PersistencePath.fromPath(path)
-    const missing = persistencePaths.nonExisting()
-    if (missing.length > 0) {
-      throw new Error(`Index not found at ${path}. Missing: ${missing.join(', ')}`)
-    }
+  // Method signatures are intentionally omitted; the barrel only needs the
+  // class to *exist* as a value export. Consumers reaching for `.fromPath()`
+  // etc. against this stub would be using it before Unit 12 has merged,
+  // which is a sequencing bug worth surfacing as a `TypeError` at call site.
 
-    const bm25Index = Bm25Index.load(persistencePaths.bm25Index)
-    const semanticIndex = SelectableBasicBackend.load(persistencePaths.semanticIndex)
-    const metadataRaw = await readFile(persistencePaths.metadata, 'utf8')
-    const metadata = JSON.parse(metadataRaw) as {
-      root_path?: string | null
-      model_path?: string | null
-    }
-    const chunkRaw = await readFile(persistencePaths.chunks, 'utf8')
-    const chunkData = JSON.parse(chunkRaw) as Array<Record<string, unknown>>
-    const chunks = chunkData.map(chunkFromDict)
-
-    const { model, modelPath } = await loadModel(metadata.model_path ?? null)
-    return new CspIndex({
-      model,
-      bm25Index,
-      semanticIndex,
-      chunks,
-      modelPath,
-      root: metadata.root_path ?? null,
-    })
+  /** Placeholder — see Unit 12. */
+  static fromPath(..._args: unknown[]): Promise<CspIndex> {
+    return Promise.reject(new Error('CspIndex.fromPath: not implemented (Unit 12).'))
   }
 
-  /** Search the index and return the top-k most relevant chunks. */
-  search(query: string, options: SearchInvocationOptions = {}): SearchResult[] {
-    if (this.chunks.length === 0 || query.trim().length === 0) return []
-
-    const topK = options.topK ?? 10
-    if (topK <= 0) return []
-
-    const filterLanguages = options.filterLanguages
-    const filterPaths = options.filterPaths
-    const resolvedRerank = options.rerank ?? this._content.includes(ContentType.Code)
-    const selector = this._getSelectorVector(filterLanguages, filterPaths)
-    // Honor the user's filter when it matches zero chunks — bypass the
-    // ranking pipeline rather than falling back to an unfiltered search.
-    if (selector !== null && selector.length === 0) {
-      saveSearchStats([], CallType.Search, this._fileSizes)
-      return []
-    }
-
-    const results = search(
-      query,
-      this.model,
-      this._semanticIndex,
-      this._bm25Index,
-      this.chunks,
-      topK,
-      {
-        alpha: options.alpha ?? null,
-        ...(selector !== null ? { selector } : {}),
-        rerank: resolvedRerank,
-      },
-    )
-    saveSearchStats(results, CallType.Search, this._fileSizes)
-    return results
+  /** Placeholder — see Unit 12. */
+  static fromGit(..._args: unknown[]): Promise<CspIndex> {
+    return Promise.reject(new Error('CspIndex.fromGit: not implemented (Unit 12).'))
   }
 
-  /** Return chunks semantically similar to the given chunk or search result. */
-  findRelated(
-    source: Chunk | SearchResult,
-    options: FindRelatedOptions = {},
-  ): SearchResult[] {
-    const topK = options.topK ?? 5
-    const target = isSearchResult(source) ? source.chunk : source
-    const selector
-      = target.language
-        ? this._getSelectorVector([target.language], undefined)
-        : null
-    const results = searchSemantic(
-      target.content,
-      this.model,
-      this._semanticIndex,
-      this.chunks,
-      topK + 1,
-      selector,
-    )
-    const filtered = results
-      .filter(r => !sameChunk(r.chunk, target))
-      .slice(0, topK)
-    saveSearchStats(filtered, CallType.FindRelated, this._fileSizes)
-    return filtered
+  /** Placeholder — see Unit 12. */
+  static load(..._args: unknown[]): Promise<CspIndex> {
+    return Promise.reject(new Error('CspIndex.load: not implemented (Unit 12).'))
   }
 
-  /** Persist the index to disk under `path` (created if missing). */
-  async save(path: string): Promise<void> {
-    await mkdir(path, { recursive: true })
-    const persistencePaths = PersistencePath.fromPath(path)
-    this._bm25Index.save(persistencePaths.bm25Index)
-    this._semanticIndex.save(persistencePaths.semanticIndex)
-    const chunksAsDict = this.chunks.map(chunkToDict)
-    await writeFile(persistencePaths.chunks, JSON.stringify(chunksAsDict))
-    const metadata = {
-      root_path: this._root,
-      time: Date.now() / 1000,
-      model_path: this._modelPath,
-    }
-    await writeFile(persistencePaths.metadata, JSON.stringify(metadata))
+  /** Placeholder — see Unit 12. */
+  search(..._args: unknown[]): SearchResult[] {
+    throw new Error('CspIndex.search: not implemented (Unit 12).')
   }
 
-  private _populateMapping(): {
-    file: Record<string, number[]>
-    language: Record<string, number[]>
-  } {
-    const file: Record<string, number[]> = {}
-    const language: Record<string, number[]> = {}
-    for (let i = 0; i < this.chunks.length; i++) {
-      const chunk = this.chunks[i]!
-      if (chunk.language) {
-        const arr = language[chunk.language]
-        if (arr) arr.push(i)
-        else language[chunk.language] = [i]
-      }
-      const arr = file[chunk.filePath]
-      if (arr) arr.push(i)
-      else file[chunk.filePath] = [i]
-    }
-    return { file, language }
+  /** Placeholder — see Unit 12. */
+  findRelated(..._args: unknown[]): SearchResult[] {
+    throw new Error('CspIndex.findRelated: not implemented (Unit 12).')
   }
 
-  private _computeFileSizes(root: string): Record<string, number> {
-    const sizes: Record<string, number> = {}
-    for (const chunk of this.chunks) {
-      if (chunk.filePath in sizes) continue
-      try {
-        // Mirror Python's `root / chunk.file_path`: absolute paths win,
-        // relative paths resolve against `root`.
-        const abs = resolve(root, chunk.filePath)
-        // `statSync` returns the on-disk byte size — avoids reading the file
-        // (cheaper, especially for files up to MAX_FILE_BYTES = 1 MB) and
-        // matches Python's `len(read_text(...))` closely enough for the
-        // savings-tracking use case while reporting actual UTF-8 byte counts.
-        sizes[chunk.filePath] = statSync(abs).size
-      }
-      catch {
-        /* swallow */
-      }
-    }
-    return sizes
+  /** Placeholder — see Unit 12. */
+  save(..._args: unknown[]): Promise<void> {
+    return Promise.reject(new Error('CspIndex.save: not implemented (Unit 12).'))
   }
 
-  private _getSelectorVector(
-    filterLanguages?: readonly string[],
-    filterPaths?: readonly string[],
-  ): number[] | null {
-    // Distinguish "no filter requested" (return null → search everything)
-    // from "filter requested but matched nothing" (return [] → search nothing).
-    // Semble's Python parity check is `if selector` which conflates the two
-    // and falls back to unfiltered search on empty results — that is a latent
-    // correctness bug there. We diverge intentionally to honor user intent.
-    const hasLanguageFilter
-      = filterLanguages !== undefined && filterLanguages.length > 0
-    const hasPathFilter = filterPaths !== undefined && filterPaths.length > 0
-    if (!hasLanguageFilter && !hasPathFilter) return null
-
-    const out = new Set<number>()
-    if (filterLanguages) {
-      for (const language of filterLanguages) {
-        const ids = this._languageMapping[language]
-        if (ids) for (const i of ids) out.add(i)
-      }
-    }
-    if (filterPaths) {
-      for (const filename of filterPaths) {
-        const ids = this._fileMapping[filename]
-        if (ids) for (const i of ids) out.add(i)
-      }
-    }
-    return [...out].sort((a, b) => a - b)
+  /** Placeholder — see Unit 12. */
+  get stats(): IndexStats {
+    throw new Error('CspIndex.stats: not implemented (Unit 12).')
   }
-}
-
-function normalizeContent(
-  content: ContentType | readonly ContentType[],
-): readonly ContentType[] {
-  if (Array.isArray(content)) return content
-  return [content as ContentType]
-}
 
-function isSearchResult(value: Chunk | SearchResult): value is SearchResult {
-  return (value as SearchResult).chunk !== undefined
-    && typeof (value as SearchResult).score === 'number'
-}
-
-function sameChunk(a: Chunk, b: Chunk): boolean {
-  return (
-    a.filePath === b.filePath
-    && a.startLine === b.startLine
-    && a.endLine === b.endLine
-    && a.content === b.content
-  )
-}
-
-async function resolveDirectory(path: string | URL): Promise<string> {
-  const raw = path instanceof URL ? fileURLToPath(path) : path
-  let info
-  try {
-    info = await stat(raw)
-  }
-  catch {
-    throw new Error(`Path does not exist: ${raw}`)
-  }
-  if (!info.isDirectory()) {
-    throw new Error(`Path is not a directory: ${raw}`)
+  /** Placeholder — see Unit 12. */
+  get chunks(): readonly Chunk[] {
+    throw new Error('CspIndex.chunks: not implemented (Unit 12).')
   }
-  // Drop any trailing separator for consistency with semble's Path.resolve()
-  // — but preserve filesystem root paths (`/` on POSIX, `C:\` on Windows)
-  // since stripping their trailing sep would mutate the resolved location.
-  let resolved = resolve(raw)
-  const rootOfResolved = parsePath(resolved).root
-  if (resolved.length > rootOfResolved.length && resolved.endsWith(sep)) {
-    resolved = resolved.slice(0, -1)
-  }
-  return resolved
-}
-
-/**
- * Shell-out to `git clone --depth 1` into `tmpDir`.
- *
- * Uses `spawn` (not `execFile`) so stdin can be redirected to `/dev/null` —
- * this mirrors semble's `subprocess.run(..., stdin=subprocess.DEVNULL)` and
- * prevents a hung remote from blocking on a tty prompt.
- */
-async function runGitClone(url: string, tmpDir: string, ref: string | null): Promise<void> {
-  // `--` prevents `url` from being interpreted as a git option (e.g. `--upload-pack=...`).
-  const args = [
-    'clone',
-    '--depth',
-    '1',
-    ...(ref ? ['--branch', ref] : []),
-    '--',
-    url,
-    tmpDir,
-  ]
-  await new Promise<void>((resolvePromise, rejectPromise) => {
-    let child
-    try {
-      // stdin: 'ignore' mirrors Python's `subprocess.DEVNULL` so a stuck remote
-      // can't block on a tty prompt.
-      // stdout: 'ignore' avoids the OS pipe buffer filling and deadlocking
-      // `git clone` when verbose hooks/configs print large amounts of output.
-      // stderr: 'pipe' so we surface the error message on non-zero exit.
-      child = spawn('git', args, { stdio: ['ignore', 'ignore', 'pipe'] })
-    }
-    catch (err) {
-      const e = err as NodeJS.ErrnoException
-      if (e.code === 'ENOENT') {
-        rejectPromise(new Error('git is not installed or not on PATH'))
-        return
-      }
-      rejectPromise(err as Error)
-      return
-    }
-    let stderr = ''
-    let timedOut = false
-    const timer = setTimeout(() => {
-      timedOut = true
-      child.kill('SIGTERM')
-    }, GIT_CLONE_TIMEOUT_MS)
-    child.stderr?.setEncoding('utf8')
-    child.stderr?.on('data', (chunk: string) => {
-      stderr += chunk
-    })
-    child.on('error', (err: NodeJS.ErrnoException) => {
-      clearTimeout(timer)
-      if (err.code === 'ENOENT') {
-        rejectPromise(new Error('git is not installed or not on PATH'))
-        return
-      }
-      rejectPromise(err)
-    })
-    child.on('close', (code) => {
-      clearTimeout(timer)
-      if (timedOut) {
-        rejectPromise(new Error(
-          `git clone timed out for ${JSON.stringify(url)} (limit: ${GIT_CLONE_TIMEOUT_MS / 1000} s)`,
-        ))
-        return
-      }
-      if (code !== 0) {
-        rejectPromise(new Error(`git clone failed for ${JSON.stringify(url)}:\n${stderr.trim()}`))
-        return
-      }
-      resolvePromise()
-    })
-  })
 }
diff --git a/src/types.ts b/src/types.ts
index 5675bfa..740c4cb 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -1,29 +1,19 @@
-// Port of src/semble/types.py
+// TODO(unit-1): replace with the real port from `feat/unit-1-types`.
 //
-// Public field names are camelCase (not snake_case) — see ARCHITECTURE.md:
-// "Public field names are camelCase, not snake_case." The upstream Python
-// exposes `chunk.file_path` / `start_line` / `end_line`; the TS port exposes
-// `filePath` / `startLine` / `endLine`. This is load-bearing for the public
-// surface documented in README.md.
-
-/**
- * Call type for token-savings tracking.
- *
- * Port of `semble.types.CallType`. Values match the Python `str` enum so
- * serialised telemetry (`~/.csp/savings.jsonl`) stays compatible.
- */
-export const CallType = {
-  Search: 'search',
-  FindRelated: 'find_related',
-} as const
-export type CallType = (typeof CallType)[keyof typeof CallType]
+// This file is a *placeholder stub* so the public barrel (`src/index.ts`)
+// type-checks and `bun test src/index.test.ts` can import the package in
+// isolation. Unit 1 lands the real port of `src/semble/types.py`; when it
+// merges, this file is overwritten wholesale (see PR `feat/unit-1-types`).
+//
+// Keep the exported names and value/type duality of `ContentType` in lockstep
+// with Unit 1 — the barrel re-exports both forms.
 
 /**
  * Content type for indexing and search pipeline selection.
  *
- * Port of `semble.types.ContentType`. Values match the Python `str` enum
- * (`'code' | 'docs' | 'config'`) so CLI flags (`--content code`) and persisted
- * indices round-trip across the two implementations.
+ * Placeholder mirroring Unit 1's `const`-object enum. Values are the same
+ * lowercase strings as the upstream Python `str` enum so CLI flags and
+ * persisted indices round-trip.
  */
 export const ContentType = {
   Code: 'code',
@@ -32,18 +22,7 @@ export const ContentType = {
 } as const
 export type ContentType = (typeof ContentType)[keyof typeof ContentType]
 
-/**
- * A single indexable unit of code.
- *
- * Port of `semble.types.Chunk` (frozen dataclass). Fields are camelCase per
- * the public surface contract; use {@link chunkFromDict} to construct from
- * serialised data and {@link chunkToDict} to serialise.
- *
- * Treat instances as immutable — helpers do not mutate, and consumers should
- * not either. `readonly` makes the shape compile-time immutable; we don't
- * `Object.freeze` at construction time to avoid the runtime cost on hot paths
- * (large `Chunk[]` arrays during indexing).
- */
+/** Placeholder shape — Unit 1 ships the authoritative definition. */
 export interface Chunk {
   readonly content: string
   readonly filePath: string
@@ -52,172 +31,18 @@ export interface Chunk {
   readonly language?: string | undefined
 }
 
-/**
- * A single search result with score and source.
- *
- * Port of `semble.types.SearchResult`.
- */
+/** Placeholder shape — Unit 1 ships the authoritative definition. */
 export interface SearchResult {
   readonly chunk: Chunk
   readonly score: number
 }
 
-/**
- * Statistics about the current index state.
- *
- * Port of `semble.types.IndexStats`.
- */
+/** Placeholder shape — Unit 1 ships the authoritative definition. */
 export interface IndexStats {
   readonly indexedFiles: number
   readonly totalChunks: number
   readonly languages: Readonly<Record<string, number>>
 }
 
-/**
- * Flat row-major Float32 embedding matrix.
- *
- * Port of `semble.types.EmbeddingMatrix` (`npt.NDArray[np.float32]`).
- *
- * We use a single `Float32Array` (row-major) instead of `Float32Array[]`
- * because:
- *   1. Dense retrieval computes `embeddings @ query` as one contiguous BLAS-
- *      style sweep — a flat buffer keeps that hot loop cache-friendly and
- *      avoids per-row indirection.
- *   2. Persistence (semble pickles the numpy matrix) maps cleanly onto a
- *      single binary blob without per-row length headers.
- * The companion {@link EmbeddingShape} carries `(rows, dim)` since a flat
- * `Float32Array` has lost that information.
- */
+/** Placeholder alias — Unit 1 ships the authoritative definition. */
 export type EmbeddingMatrix = Float32Array
-
-/** Shape companion for a flat row-major {@link EmbeddingMatrix}. */
-export interface EmbeddingShape {
-  readonly rows: number
-  readonly dim: number
-}
-
-/**
- * Format a chunk's source location as `filePath:startLine-endLine`.
- *
- * Port of the `Chunk.location` `@property` in Python. Kept as a free function
- * because `Chunk` is a plain interface (no methods) in the TS port.
- */
-export function chunkLocation(chunk: Chunk): string {
-  return `${chunk.filePath}:${chunk.startLine}-${chunk.endLine}`
-}
-
-/**
- * Serialised form of a {@link Chunk}.
- *
- * `location` is included for consumer convenience (matches Python
- * `Chunk.to_dict`) and is reconstructed from the other fields, never trusted
- * on the way back in — see {@link chunkFromDict}.
- */
-export interface ChunkDict {
-  content: string
-  filePath: string
-  startLine: number
-  endLine: number
-  language: string | null
-  location: string
-}
-
-/**
- * Convert a {@link Chunk} to a plain serialisable object.
- *
- * Port of `Chunk.to_dict`. Includes the derived `location` field. Mirrors
- * Python's `dataclasses.asdict`, which represents `Optional[str] = None` as
- * literal `null` rather than omitting the key — keeping that shape preserves
- * JSON parity across the two implementations.
- */
-export function chunkToDict(chunk: Chunk): ChunkDict {
-  return {
-    content: chunk.content,
-    filePath: chunk.filePath,
-    startLine: chunk.startLine,
-    endLine: chunk.endLine,
-    language: chunk.language ?? null,
-    location: chunkLocation(chunk),
-  }
-}
-
-/** Input shape accepted by {@link chunkFromDict} — `location` is ignored. */
-export interface ChunkDictInput {
-  content: string
-  filePath: string
-  startLine: number
-  endLine: number
-  language?: string | null | undefined
-  location?: string | undefined
-}
-
-/**
- * Reconstruct a {@link Chunk} from a {@link ChunkDict}.
- *
- * Port of `Chunk.from_dict`. The `location` field, if present, is stripped
- * before construction (it's a derived value; trusting it on the way in would
- * let a malformed payload desynchronise it from the line range).
- *
- * This is a trust boundary: TypeScript's compile-time `ChunkDictInput` is
- * bypassed when parsing untrusted JSON (persisted indices, MCP payloads,
- * external callers). Validate at runtime so malformed input fails loudly
- * with a `TypeError` instead of producing a `Chunk` with `NaN` line numbers
- * or `undefined` fields that surface as confusing errors deeper in the
- * pipeline.
- */
-export function chunkFromDict(data: ChunkDictInput): Chunk {
-  if (data === null || typeof data !== 'object') {
-    throw new TypeError('chunkFromDict: data must be a non-null object')
-  }
-  const d = data as Record<string, unknown>
-  if (typeof d.content !== 'string'
-    || typeof d.filePath !== 'string'
-    || typeof d.startLine !== 'number'
-    || typeof d.endLine !== 'number'
-    || !Number.isFinite(d.startLine)
-    || !Number.isFinite(d.endLine)) {
-    throw new TypeError(
-      'chunkFromDict: missing or invalid required fields '
-      + '(content: string, filePath: string, startLine: finite number, endLine: finite number)',
-    )
-  }
-  if (d.language !== undefined && d.language !== null && typeof d.language !== 'string') {
-    throw new TypeError('chunkFromDict: language must be a string, null, or omitted')
-  }
-  // `exactOptionalPropertyTypes` distinguishes "language: undefined" from
-  // omitted; build the object conditionally so the resulting Chunk matches
-  // the `language?: string | undefined` signature exactly.
-  const language = d.language ?? undefined
-  return language === undefined
-    ? {
-        content: d.content,
-        filePath: d.filePath,
-        startLine: d.startLine,
-        endLine: d.endLine,
-      }
-    : {
-        content: d.content,
-        filePath: d.filePath,
-        startLine: d.startLine,
-        endLine: d.endLine,
-        language: language as string,
-      }
-}
-
-/** Serialised form of a {@link SearchResult}. */
-export interface SearchResultDict {
-  chunk: ChunkDict
-  score: number
-}
-
-/**
- * Convert a {@link SearchResult} to a plain serialisable object.
- *
- * Port of `SearchResult.to_dict`.
- */
-export function searchResultToDict(result: SearchResult): SearchResultDict {
-  return {
-    chunk: chunkToDict(result.chunk),
-    score: result.score,
-  }
-}
diff --git a/src/version.ts b/src/version.ts
new file mode 100644
index 0000000..88ac6c4
--- /dev/null
+++ b/src/version.ts
@@ -0,0 +1,10 @@
+// Port of src/semble/version.py.
+//
+// The Python upstream stores a triple (`(0, 2, 0)`) and joins it for the
+// string form. Here we expose a single literal because:
+//   * `package.json#version` is the source of truth for npm publishing.
+//   * Bun/tsdown don't read Python-style triples; reconstructing one would
+//     just be dead code.
+// A future integration PR will keep this in sync with `package.json#version`
+// (e.g. via a generated file or a build-time replacement).
+export const version = '0.0.0'