Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 36 additions & 58 deletions src/indexing/index.ts
Original file line number Diff line number Diff line change
@@ -1,76 +1,54 @@
// TODO(unit-12): replace with the real CspIndex implementation.
//
// This file is a *placeholder stub* so the public barrel (`src/index.ts`)
// type-checks and `bun test src/index.test.ts` can import the package in
// isolation. Unit 12 lands the real port of `src/semble/index/index.py`;
// when it merges, this file is overwritten wholesale.
//
// The barrel only re-exports the *name* `CspIndex` — consumers don't
// instantiate it from this stub. Keeping the placeholder as a class (rather
// than a stand-in `const`) means the `typeof CspIndex === 'function'` check
// in `src/index.test.ts` is satisfied without a working implementation
// behind it.
// Port of src/semble/index/index.py
// Minimal stub — full implementation lands in the indexing units.

import type { Chunk, IndexStats, SearchResult } from '../types.ts'
import type { Chunk, ContentType, SearchResult } from '../types.ts'

export interface CspIndexLoadOptions {
modelPath?: string
content?: ContentType[]
}

export interface CspIndexFromGitOptions extends CspIndexLoadOptions {
ref?: string
}

/**
* Hybrid (dense + BM25) code-search index.
* Hybrid (dense + BM25) code search index.
*
* Placeholder — Unit 12 ships the authoritative implementation porting
* `semble.index.index.SembleIndex` (factories `fromPath`/`fromGit`, search /
* findRelated, save/load, stats).
* This is a stub for the MCP unit; the real implementation lands in the
* indexing units. Only the surface area used by the MCP server is declared.
*/
export class CspIndex {
// Throw eagerly so an accidental `new CspIndex()` against the stub fails
// fast with a clear message, instead of looking like a working empty index.
constructor() {
throw new Error(
'CspIndex is a placeholder stub — Unit 12 (`feat/unit-12-index`) ships the real implementation.',
)
}

// Method signatures are intentionally omitted; the barrel only needs the
// class to *exist* as a value export. Consumers reaching for `.fromPath()`
// etc. against this stub would be using it before Unit 12 has merged,
// which is a sequencing bug worth surfacing as a `TypeError` at call site.
readonly chunks: Chunk[]

/** Placeholder — see Unit 12. */
static fromPath(..._args: unknown[]): Promise<CspIndex> {
return Promise.reject(new Error('CspIndex.fromPath: not implemented (Unit 12).'))
constructor(chunks: Chunk[] = []) {
this.chunks = chunks
}

/** Placeholder — see Unit 12. */
static fromGit(..._args: unknown[]): Promise<CspIndex> {
return Promise.reject(new Error('CspIndex.fromGit: not implemented (Unit 12).'))
static async fromPath(
_path: string,
_options: CspIndexLoadOptions = {},
): Promise<CspIndex> {
throw new Error('CspIndex.fromPath: not yet implemented (stub)')
}

/** Placeholder — see Unit 12. */
static load(..._args: unknown[]): Promise<CspIndex> {
return Promise.reject(new Error('CspIndex.load: not implemented (Unit 12).'))
static async fromGit(
_url: string,
_options: CspIndexFromGitOptions = {},
): Promise<CspIndex> {
throw new Error('CspIndex.fromGit: not yet implemented (stub)')
}

/** Placeholder — see Unit 12. */
search(..._args: unknown[]): SearchResult[] {
throw new Error('CspIndex.search: not implemented (Unit 12).')
search(_query: string, _options: { topK?: number } = {}): SearchResult[] {
return []
}

/** Placeholder — see Unit 12. */
findRelated(..._args: unknown[]): SearchResult[] {
throw new Error('CspIndex.findRelated: not implemented (Unit 12).')
}

/** Placeholder — see Unit 12. */
save(..._args: unknown[]): Promise<void> {
return Promise.reject(new Error('CspIndex.save: not implemented (Unit 12).'))
}

/** Placeholder — see Unit 12. */
get stats(): IndexStats {
throw new Error('CspIndex.stats: not implemented (Unit 12).')
findRelated(_chunk: Chunk, _options: { topK?: number } = {}): SearchResult[] {
return []
}
}

/** Placeholder — see Unit 12. */
get chunks(): readonly Chunk[] {
throw new Error('CspIndex.chunks: not implemented (Unit 12).')
}
/** Lazy loader for the embedding model. Returns the cached on-disk path. */
export async function loadModel(): Promise<[unknown, string]> {
throw new Error('loadModel: not yet implemented (stub)')
}
262 changes: 262 additions & 0 deletions src/mcp/server.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,262 @@
import { beforeEach, describe, expect, it, mock } from 'bun:test'

// Mock the indexing module so we can control CspIndex.fromPath/fromGit and
// loadModel without spinning up real embeddings.
let fromPathCalls = 0
let fromGitCalls = 0
let fromPathImpl: () => Promise<unknown> = async () => makeIndex()
let fromGitImpl: () => Promise<unknown> = async () => makeIndex()

let makeIndex: () => FakeIndex = () => new FakeIndex([])

class FakeIndex {
readonly chunks: Array<{
content: string
filePath: string
startLine: number
endLine: number
}>

constructor(chunks: FakeIndex['chunks'] = []) {
this.chunks = chunks
}

search(_q: string, _opts?: { topK?: number }): Array<{
chunk: FakeIndex['chunks'][number]
score: number
toDict: () => Record<string, unknown>
}> {
return []
}

findRelated(_c: FakeIndex['chunks'][number], _opts?: { topK?: number }): Array<{
chunk: FakeIndex['chunks'][number]
score: number
toDict: () => Record<string, unknown>
}> {
return []
}
}

class MockedCspIndex extends FakeIndex {
static async fromPath(..._args: unknown[]): Promise<FakeIndex> {
fromPathCalls++
return fromPathImpl() as Promise<FakeIndex>
}

static async fromGit(..._args: unknown[]): Promise<FakeIndex> {
fromGitCalls++
return fromGitImpl() as Promise<FakeIndex>
}
}

// Wire makeIndex to return instances of the mocked class so instanceof checks
// in the tests pass.
makeIndex = () => new MockedCspIndex([])

await mock.module('../indexing/index.ts', () => ({
CspIndex: MockedCspIndex,
loadModel: async (): Promise<[unknown, string]> => [null, '/tmp/fake-model'],
}))

// Import AFTER mocking so server.ts picks up the mocked module.
const { _internal, createServer, IndexCache } = await import('./server.ts')
const { ContentType } = await import('../types.ts')
const indexing = await import('../indexing/index.ts')

beforeEach(() => {
fromPathCalls = 0
fromGitCalls = 0
fromPathImpl = async () => makeIndex()
fromGitImpl = async () => makeIndex()
})

describe('IndexCache', () => {
it('caches results — second call returns the cached value', async () => {
const cache = new IndexCache({ content: [ContentType.CODE] })
const first = await cache.get('/tmp/some-repo')
const second = await cache.get('/tmp/some-repo')
expect(second).toBe(first)
expect(fromPathCalls).toBe(1)
})

it('deduplicates concurrent get() for the same source', async () => {
const cache = new IndexCache()
const [a, b] = await Promise.all([
cache.get('/tmp/dedup-repo'),
cache.get('/tmp/dedup-repo'),
])
expect(a).toBe(b)
expect(fromPathCalls).toBe(1)
})

it('evict() removes the cached entry so the next get() rebuilds', async () => {
const cache = new IndexCache()
await cache.get('/tmp/repo-to-evict')
expect(fromPathCalls).toBe(1)

await cache.evict('/tmp/repo-to-evict')

await cache.get('/tmp/repo-to-evict')
expect(fromPathCalls).toBe(2)
})

it('LRU: the 11th distinct source evicts the oldest', async () => {
const cache = new IndexCache()
for (let i = 0; i < 10; i++)
await cache.get(`/tmp/lru-${i}`)
expect(cache.size).toBe(10)

await cache.get('/tmp/lru-10')
expect(cache.size).toBe(10)

// /tmp/lru-0 was the oldest and should have been evicted — refetch triggers rebuild.
const before = fromPathCalls
await cache.get('/tmp/lru-0')
expect(fromPathCalls).toBe(before + 1)
})

it('treats git URLs differently from local paths', async () => {
const cache = new IndexCache()
await cache.get('https://github.com/org/repo')
expect(fromGitCalls).toBe(1)
expect(fromPathCalls).toBe(0)

await cache.get('/tmp/local-path')
expect(fromPathCalls).toBe(1)
})

it('evict() awaitably blocks until the cache entry is gone', async () => {
const cache = new IndexCache()
await cache.get('/tmp/await-evict')
expect(cache.size).toBe(1)
await cache.evict('/tmp/await-evict')
expect(cache.size).toBe(0)
})

it('failed get() does not poison the cache entry', async () => {
fromPathImpl = async () => {
throw new Error('boom')
}

const cache = new IndexCache()
await expect(cache.get('/tmp/will-fail')).rejects.toThrow('boom')

// After failure, the next call retries.
fromPathImpl = async () => makeIndex()
await expect(cache.get('/tmp/will-fail')).resolves.toBeInstanceOf(indexing.CspIndex)
})
})

describe('getIndex (safety layer)', () => {
it('rejects ssh:// git URLs', async () => {
const cache = new IndexCache()
await expect(
_internal.getIndex('ssh://git@github.com/org/repo.git', undefined, cache),
).rejects.toThrow(/Only https:\/\/, http:\/\//)
})

it('rejects git:// git URLs', async () => {
const cache = new IndexCache()
await expect(
_internal.getIndex('git://github.com/org/repo.git', undefined, cache),
).rejects.toThrow(/Only https:\/\/, http:\/\//)
})

it('rejects file:// pseudo-URLs', async () => {
const cache = new IndexCache()
await expect(
_internal.getIndex('file:///tmp/whatever', undefined, cache),
).rejects.toThrow(/Only https:\/\/, http:\/\//)
})

it('rejects when repo and defaultSource are both undefined', async () => {
const cache = new IndexCache()
await expect(_internal.getIndex(undefined, undefined, cache)).rejects.toThrow(
/No repo specified/,
)
})

it('falls back to defaultSource when repo is undefined', async () => {
const cache = new IndexCache()
const result = await _internal.getIndex(undefined, '/tmp/default-repo', cache)
expect(result).toBeInstanceOf(indexing.CspIndex)
expect(fromPathCalls).toBe(1)
})

it('accepts https:// git URLs', async () => {
const cache = new IndexCache()
const result = await _internal.getIndex(
'https://github.com/org/repo',
undefined,
cache,
)
expect(result).toBeInstanceOf(indexing.CspIndex)
expect(fromGitCalls).toBe(1)
})

it('wraps underlying index errors in a descriptive message', async () => {
fromPathImpl = async () => {
throw new Error('disk full')
}
const cache = new IndexCache()
await expect(_internal.getIndex('/tmp/bad', undefined, cache)).rejects.toThrow(
/Failed to index .*disk full/,
)
})
})

describe('createServer', () => {
it('returns a server object exposing `search` and `find_related` tools', async () => {
const cache = new IndexCache()
const server = await createServer(cache, '/tmp/default')

expect(server.tools.has('search')).toBe(true)
expect(server.tools.has('find_related')).toBe(true)

const searchTool = server.tools.get('search')!
expect(searchTool.title).toBe(
'Search a codebase with a natural-language or code query.',
)

const findRelatedTool = server.tools.get('find_related')!
expect(findRelatedTool.title).toBe(
'Find code chunks semantically similar to a specific location in a file.',
)
})

it('`search` handler returns "No results" JSON when the index yields nothing', async () => {
const cache = new IndexCache()
const server = await createServer(cache, '/tmp/default')
const searchTool = server.tools.get('search')!
const out = await searchTool.handler({ query: 'foo' })
expect(JSON.parse(out)).toEqual({ error: 'No results found.' })
})

it('`search` handler surfaces safety errors as plain strings', async () => {
const cache = new IndexCache()
const server = await createServer(cache) // no defaultSource
const searchTool = server.tools.get('search')!
const out = await searchTool.handler({ query: 'foo' }) // no repo either
expect(out).toMatch(/No repo specified/)
})

it('`search` handler rejects ssh:// git URLs as a plain-string error', async () => {
const cache = new IndexCache()
const server = await createServer(cache)
const searchTool = server.tools.get('search')!
const out = await searchTool.handler({
query: 'foo',
repo: 'ssh://git@github.com/org/repo',
})
expect(out).toMatch(/Only https:\/\/, http:\/\//)
})

it('`find_related` handler returns a helpful message when the chunk is missing', async () => {
const cache = new IndexCache()
const server = await createServer(cache, '/tmp/default')
const tool = server.tools.get('find_related')!
const out = await tool.handler({ file_path: 'nope.ts', line: 42 })
expect(out).toMatch(/No chunk found at nope.ts:42/)
})
})
Loading