From 5ca6a055ecd3adb71c881e7378a360f65d460066 Mon Sep 17 00:00:00 2001 From: Minsu Lee Date: Thu, 18 Jun 2026 16:17:01 +0900 Subject: [PATCH 1/2] fix(chunking): enable AST chunking by wiring real ALL_LANGUAGES (#28) ALL_LANGUAGES in src/chunking/core.ts was an empty stub Set, so isSupportedLanguage() always returned false and chunk-source.ts never took the tree-sitter AST path -- every file silently fell back to line chunking. Replace the stub with an import of the real, populated set from src/indexing/files.ts. The dependency direction is one-way (indexing -> chunking; files.ts imports nothing), so no cycle is introduced -- confirmed by passing typecheck, the full test suite, and runtime CLI index/search. Also correct core.test.ts, which asserted the broken stub behavior (isSupportedLanguage('typescript') === false). It now expects true for known languages (typescript, python) and false for unknown ones. Closes #28 --- src/chunking/core.test.ts | 6 +++--- src/chunking/core.ts | 5 +---- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/chunking/core.test.ts b/src/chunking/core.test.ts index 5dcfdb4..9900fd3 100644 --- a/src/chunking/core.test.ts +++ b/src/chunking/core.test.ts @@ -20,9 +20,9 @@ describe('constants', () => { }) describe('isSupportedLanguage', () => { - it('returns false for unknown languages (Unit 4 stub)', () => { - expect(isSupportedLanguage('typescript')).toBe(false) - expect(isSupportedLanguage('python')).toBe(false) + it('returns true for known languages and false for unknown ones', () => { + expect(isSupportedLanguage('typescript')).toBe(true) + expect(isSupportedLanguage('python')).toBe(true) expect(isSupportedLanguage('not-a-real-language')).toBe(false) }) }) diff --git a/src/chunking/core.ts b/src/chunking/core.ts index 78da8a5..69e5ad0 100644 --- a/src/chunking/core.ts +++ b/src/chunking/core.ts @@ -8,10 +8,7 @@ // loads even when the package is not yet installed, falling back to the // line chunker in that case. -// Stub for ALL_LANGUAGES until Unit 4 (language detection) lands. -// Once `src/indexing/files.ts` exists, replace this with: -// import { ALL_LANGUAGES } from '../indexing/files.ts' -const ALL_LANGUAGES: ReadonlySet = new Set() +import { ALL_LANGUAGES } from '../indexing/files.ts' export const RECURSION_DEPTH = 500 export const MIN_CHUNK_SIZE = 50 From 961088300e652df0614400a6664a8b9e046f96c3 Mon Sep 17 00:00:00 2001 From: Minsu Lee Date: Thu, 18 Jun 2026 17:05:20 +0900 Subject: [PATCH 2/2] =?UTF-8?q?refactor(languages):=20extract=20language?= =?UTF-8?q?=20tables=20to=20leaf=20module=20to=20break=20chunking=E2=86=94?= =?UTF-8?q?indexing=20cycle=20(#28)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move src/indexing/files.ts → src/languages.ts (a dependency-free leaf) and repoint chunking/core.ts, indexing/cache.ts, indexing/create.ts at it. Previously chunking/core.ts imported ALL_LANGUAGES from ../indexing/files.ts while indexing/create.ts imports ../chunking/chunk-source.ts, forming a package-level circular dependency (ADP violation, flagged by gemini-code-assist on #31). languages.ts has no internal imports, so both chunking and indexing now depend on a lower-level leaf and the directory cycle is gone. No behavior change: pure module relocation + import-path updates. --- src/chunking/core.ts | 2 +- src/indexing/cache.ts | 2 +- src/indexing/create.ts | 2 +- src/{indexing/files.test.ts => languages.test.ts} | 2 +- src/{indexing/files.ts => languages.ts} | 0 5 files changed, 4 insertions(+), 4 deletions(-) rename src/{indexing/files.test.ts => languages.test.ts} (99%) rename src/{indexing/files.ts => languages.ts} (100%) diff --git a/src/chunking/core.ts b/src/chunking/core.ts index 69e5ad0..3c72114 100644 --- a/src/chunking/core.ts +++ b/src/chunking/core.ts @@ -8,7 +8,7 @@ // loads even when the package is not yet installed, falling back to the // line chunker in that case. -import { ALL_LANGUAGES } from '../indexing/files.ts' +import { ALL_LANGUAGES } from '../languages.ts' export const RECURSION_DEPTH = 500 export const MIN_CHUNK_SIZE = 50 diff --git a/src/indexing/cache.ts b/src/indexing/cache.ts index bf20422..1b0bca2 100644 --- a/src/indexing/cache.ts +++ b/src/indexing/cache.ts @@ -20,10 +20,10 @@ import { chmodSync, existsSync, mkdirSync, readdirSync, realpathSync, rmSync } f import { readFile, stat } from 'node:fs/promises' import { homedir } from 'node:os' import { basename, dirname, join, normalize, relative } from 'node:path' +import { getExtensions } from '../languages.ts' import { isGitUrl } from '../utils.ts' import { MAX_FILE_BYTES } from './create.ts' import { walkFiles } from './file-walker.ts' -import { getExtensions } from './files.ts' import { CspIndex, DEFAULT_CONTENT, parseManifest } from './index.ts' /** Directory permissions for every cache directory (owner-only). NFR-003. */ diff --git a/src/indexing/create.ts b/src/indexing/create.ts index a503954..446e75c 100644 --- a/src/indexing/create.ts +++ b/src/indexing/create.ts @@ -5,11 +5,11 @@ import type { Model } from './dense.ts' import { readFileSync, statSync } from 'node:fs' import { relative } from 'node:path' import { chunkSource } from '../chunking/chunk-source.ts' +import { detectLanguage, getExtensions } from '../languages.ts' import { tokenize } from '../tokens.ts' import { ContentType } from '../types.ts' import { embedChunks, SelectableBasicBackend } from './dense.ts' import { walkFiles } from './file-walker.ts' -import { detectLanguage, getExtensions } from './files.ts' import { Bm25Index, enrichForBm25 } from './sparse.ts' /** 1 MB max file size to read and index. */ diff --git a/src/indexing/files.test.ts b/src/languages.test.ts similarity index 99% rename from src/indexing/files.test.ts rename to src/languages.test.ts index 2090559..5a8c69c 100644 --- a/src/indexing/files.test.ts +++ b/src/languages.test.ts @@ -7,7 +7,7 @@ import { DOC_LANGUAGES, EXTENSION_TO_LANGUAGE, getExtensions, -} from './files.ts' +} from './languages.ts' describe('detectLanguage', () => { it('detects typescript from .ts', () => { diff --git a/src/indexing/files.ts b/src/languages.ts similarity index 100% rename from src/indexing/files.ts rename to src/languages.ts