-
Notifications
You must be signed in to change notification settings - Fork 0
feat(utils): port isGitUrl/resolveChunk/formatResults from semble #2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,164 @@ | ||
| // Port of src/semble/utils.py tests | ||
| import { describe, expect, it } from 'bun:test' | ||
| import type { Chunk, SearchResult } from './utils.ts' | ||
| import { formatResults, isGitUrl, resolveChunk } from './utils.ts' | ||
|
|
||
| function makeChunk(overrides: Partial<Chunk> = {}): Chunk { | ||
| return { | ||
| content: 'x', | ||
| filePath: 'a.ts', | ||
| startLine: 1, | ||
| endLine: 10, | ||
| ...overrides, | ||
| } | ||
| } | ||
|
|
||
| describe('isGitUrl', () => { | ||
| it('returns true for https URLs', () => { | ||
| expect(isGitUrl('https://github.com/foo/bar')).toBe(true) | ||
| }) | ||
|
|
||
| it('returns true for http URLs', () => { | ||
| expect(isGitUrl('http://example.com/foo/bar.git')).toBe(true) | ||
| }) | ||
|
|
||
| it('returns true for ssh:// URLs', () => { | ||
| expect(isGitUrl('ssh://git@github.com/foo/bar.git')).toBe(true) | ||
| }) | ||
|
|
||
| it('returns true for git:// URLs', () => { | ||
| expect(isGitUrl('git://github.com/foo/bar.git')).toBe(true) | ||
| }) | ||
|
|
||
| it('returns true for git+ssh:// URLs', () => { | ||
| expect(isGitUrl('git+ssh://git@github.com/foo/bar.git')).toBe(true) | ||
| }) | ||
|
|
||
| it('returns true for file:// URLs', () => { | ||
| expect(isGitUrl('file:///path/to/repo')).toBe(true) | ||
| }) | ||
|
|
||
| it('returns true for scp-style git URLs', () => { | ||
| expect(isGitUrl('git@github.com:foo/bar.git')).toBe(true) | ||
| }) | ||
|
|
||
| it('returns true for scp-style git URLs with dots/dashes', () => { | ||
| expect(isGitUrl('git-user.1@my-host.example.com:foo/bar')).toBe(true) | ||
| }) | ||
|
|
||
| it('returns false for relative local paths', () => { | ||
| expect(isGitUrl('./local/path')).toBe(false) | ||
| }) | ||
|
|
||
| it('returns false for absolute local paths', () => { | ||
| expect(isGitUrl('/abs/path')).toBe(false) | ||
| }) | ||
|
|
||
| it('returns false for bare names', () => { | ||
| expect(isGitUrl('some-repo')).toBe(false) | ||
| }) | ||
|
|
||
| it('returns false for scp-like input with a slash after the colon (treated as path)', () => { | ||
| // user@host:/abs/path is ambiguous; semble's regex excludes it via (?!/). | ||
| expect(isGitUrl('user@host:/abs/path')).toBe(false) | ||
| }) | ||
|
|
||
| it('returns false for empty string', () => { | ||
| expect(isGitUrl('')).toBe(false) | ||
| }) | ||
| }) | ||
|
|
||
| describe('resolveChunk', () => { | ||
| it('returns the inner chunk when line is at the boundary between adjacent chunks', () => { | ||
| // chunkA covers 1..10, chunkB covers 10..20. line=10 belongs strictly inside chunkB. | ||
| const chunkA = makeChunk({ startLine: 1, endLine: 10, content: 'A' }) | ||
| const chunkB = makeChunk({ startLine: 10, endLine: 20, content: 'B' }) | ||
| const result = resolveChunk([chunkA, chunkB], 'a.ts', 10) | ||
| expect(result).toBe(chunkB) | ||
| }) | ||
|
|
||
| it('returns the chunk when line is on its endLine and no inner match exists (fallback)', () => { | ||
| const chunkA = makeChunk({ startLine: 1, endLine: 10, content: 'A' }) | ||
| const result = resolveChunk([chunkA], 'a.ts', 10) | ||
| expect(result).toBe(chunkA) | ||
| }) | ||
|
|
||
| it('returns the chunk when line is strictly inside it', () => { | ||
| const chunkA = makeChunk({ startLine: 1, endLine: 10, content: 'A' }) | ||
| expect(resolveChunk([chunkA], 'a.ts', 5)).toBe(chunkA) | ||
| }) | ||
|
|
||
| it('returns the chunk when line equals startLine (strict inner match)', () => { | ||
| const chunkA = makeChunk({ startLine: 1, endLine: 10, content: 'A' }) | ||
| expect(resolveChunk([chunkA], 'a.ts', 1)).toBe(chunkA) | ||
| }) | ||
|
|
||
| it('returns null when line is outside any chunk', () => { | ||
| const chunkA = makeChunk({ startLine: 1, endLine: 10, content: 'A' }) | ||
| expect(resolveChunk([chunkA], 'a.ts', 11)).toBeNull() | ||
| }) | ||
|
|
||
| it('returns null when filePath does not match', () => { | ||
| const chunkA = makeChunk({ startLine: 1, endLine: 10, filePath: 'a.ts' }) | ||
| expect(resolveChunk([chunkA], 'b.ts', 5)).toBeNull() | ||
| }) | ||
|
|
||
| it('returns null for empty chunk list', () => { | ||
| expect(resolveChunk([], 'a.ts', 1)).toBeNull() | ||
| }) | ||
|
|
||
| it('ignores chunks from other files when matching', () => { | ||
| const other = makeChunk({ startLine: 1, endLine: 10, filePath: 'b.ts', content: 'B' }) | ||
| const wanted = makeChunk({ startLine: 1, endLine: 10, filePath: 'a.ts', content: 'A' }) | ||
| expect(resolveChunk([other, wanted], 'a.ts', 5)).toBe(wanted) | ||
| }) | ||
|
|
||
| it('keeps the first fallback when no strict inner match is found across multiple end-boundary candidates', () => { | ||
| // Two contiguous end-only matches; the first one wins as the fallback. | ||
| const c1 = makeChunk({ startLine: 1, endLine: 10, content: 'c1' }) | ||
| const c2 = makeChunk({ startLine: 10, endLine: 10, content: 'c2' }) | ||
| expect(resolveChunk([c1, c2], 'a.ts', 10)).toBe(c1) | ||
| }) | ||
| }) | ||
|
|
||
| describe('formatResults', () => { | ||
| it('returns the expected shape', () => { | ||
| const chunkDict = { | ||
| content: 'x', | ||
| file_path: 'a.ts', | ||
| start_line: 1, | ||
| end_line: 5, | ||
| language: null, | ||
| location: 'a.ts:1-5', | ||
| } | ||
| const result: SearchResult = { | ||
| chunk: makeChunk({ startLine: 1, endLine: 5 }), | ||
| score: 0.42, | ||
| toDict: () => ({ chunk: chunkDict, score: 0.42 }), | ||
| } | ||
| const out = formatResults('hello', [result]) | ||
| expect(out).toEqual({ | ||
| query: 'hello', | ||
| results: [{ chunk: chunkDict, score: 0.42 }], | ||
| }) | ||
| }) | ||
|
|
||
| it('handles empty results', () => { | ||
| expect(formatResults('q', [])).toEqual({ query: 'q', results: [] }) | ||
| }) | ||
|
|
||
| it('preserves order of results', () => { | ||
| const r1: SearchResult = { | ||
| chunk: makeChunk(), | ||
| score: 1, | ||
| toDict: () => ({ tag: 'first' }), | ||
| } | ||
| const r2: SearchResult = { | ||
| chunk: makeChunk(), | ||
| score: 0.5, | ||
| toDict: () => ({ tag: 'second' }), | ||
| } | ||
| const out = formatResults('q', [r1, r2]) | ||
| expect(out.results).toEqual([{ tag: 'first' }, { tag: 'second' }]) | ||
| }) | ||
| }) | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,79 @@ | ||
| // Port of src/semble/utils.py | ||
|
|
||
| // Stopgap structural types until ./types.ts lands. | ||
| // Mirror semble.types.Chunk / SearchResult with camelCase field names per | ||
| // the @pleaseai/csp public-API conventions. | ||
| export interface Chunk { | ||
| content: string | ||
| filePath: string | ||
| startLine: number | ||
| endLine: number | ||
| language?: string | null | ||
| } | ||
|
|
||
| export interface SearchResult { | ||
| chunk: Chunk | ||
| score: number | ||
| toDict: () => Record<string, unknown> | ||
| } | ||
|
|
||
| const GIT_URL_SCHEMES = [ | ||
| 'https://', | ||
| 'http://', | ||
| 'ssh://', | ||
| 'git://', | ||
| 'git+ssh://', | ||
| 'file://', | ||
| ] as const | ||
|
|
||
| // scp-style git URL, e.g. `user@host:repo` (but not `user@host:/abs/path`). | ||
| const SCP_GIT_URL_RE = /^[\w.-]+@[\w.-]+:(?!\/)/ | ||
|
|
||
| /** Return true if path looks like a remote git URL rather than a local path. */ | ||
| export function isGitUrl(path: string): boolean { | ||
| for (const scheme of GIT_URL_SCHEMES) { | ||
| if (path.startsWith(scheme)) | ||
| return true | ||
| } | ||
| return SCP_GIT_URL_RE.test(path) | ||
| } | ||
|
|
||
| /** | ||
| * Return the chunk containing `line` in `filePath`, or null. | ||
| * | ||
| * Mirrors semble.utils.resolve_chunk: a strict inner match (`line < endLine`) | ||
| * wins immediately; a boundary match (`line === endLine`) is kept only as a | ||
| * fallback so end-of-file lines still resolve. | ||
| */ | ||
| export function resolveChunk( | ||
| chunks: Chunk[], | ||
| filePath: string, | ||
| line: number, | ||
| ): Chunk | null { | ||
| let fallback: Chunk | null = null | ||
| for (const chunk of chunks) { | ||
| if ( | ||
| chunk.filePath === filePath | ||
| && chunk.startLine <= line | ||
| && line <= chunk.endLine | ||
| ) { | ||
| if (line < chunk.endLine) | ||
| return chunk | ||
| // line === endLine: boundary; keep as fallback for end-of-file chunks. | ||
| if (fallback === null) | ||
| fallback = chunk | ||
| } | ||
| } | ||
| return fallback | ||
| } | ||
|
amondnet marked this conversation as resolved.
|
||
|
|
||
| /** Render SearchResult objects as a JSONable object. */ | ||
| export function formatResults( | ||
| query: string, | ||
| results: SearchResult[], | ||
| ): { query: string, results: Record<string, unknown>[] } { | ||
| return { | ||
| query, | ||
| results: results.map(r => r.toDict()), | ||
| } | ||
| } | ||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.