|
| 1 | +import { describe, expect, it } from 'vitest'; |
| 2 | +import { GenericAnalyzer } from '../src/analyzers/generic/index'; |
| 3 | +import { MAX_AST_CHUNK_FILE_LINES } from '../src/utils/ast-chunker'; |
| 4 | + |
| 5 | +// --------------------------------------------------------------------------- |
| 6 | +// Fixtures |
| 7 | +// --------------------------------------------------------------------------- |
| 8 | + |
| 9 | +const TYPESCRIPT_FIXTURE = ` |
| 10 | +import { EventEmitter } from 'events'; |
| 11 | +
|
| 12 | +const MAX_RETRIES = 3; |
| 13 | +
|
| 14 | +export class UserService extends EventEmitter { |
| 15 | + private users: Map<string, User> = new Map(); |
| 16 | +
|
| 17 | + constructor(private readonly db: Database) { |
| 18 | + super(); |
| 19 | + this.init(); |
| 20 | + } |
| 21 | +
|
| 22 | + async getById(id: string): Promise<User | null> { |
| 23 | + if (!id) { |
| 24 | + throw new Error('ID required'); |
| 25 | + } |
| 26 | + const cached = this.users.get(id); |
| 27 | + if (cached) return cached; |
| 28 | + const user = await this.db.findUser(id); |
| 29 | + if (user) { |
| 30 | + this.users.set(id, user); |
| 31 | + } |
| 32 | + return user; |
| 33 | + } |
| 34 | +
|
| 35 | + async updateUser(id: string, data: Partial<User>): Promise<User> { |
| 36 | + const user = await this.getById(id); |
| 37 | + if (!user) { |
| 38 | + throw new Error(\`User \${id} not found\`); |
| 39 | + } |
| 40 | + const updated = { ...user, ...data }; |
| 41 | + this.users.set(id, updated); |
| 42 | + this.emit('user:updated', updated); |
| 43 | + return updated; |
| 44 | + } |
| 45 | +
|
| 46 | + private init(): void { |
| 47 | + console.log('UserService initialized'); |
| 48 | + } |
| 49 | +} |
| 50 | +
|
| 51 | +interface User { |
| 52 | + id: string; |
| 53 | + name: string; |
| 54 | + email: string; |
| 55 | +} |
| 56 | +
|
| 57 | +interface Database { |
| 58 | + findUser(id: string): Promise<User | null>; |
| 59 | +} |
| 60 | +
|
| 61 | +export function createUserService(db: Database): UserService { |
| 62 | + return new UserService(db); |
| 63 | +} |
| 64 | +`.trim(); |
| 65 | + |
| 66 | +// --------------------------------------------------------------------------- |
| 67 | +// Tests |
| 68 | +// --------------------------------------------------------------------------- |
| 69 | + |
| 70 | +const analyzer = new GenericAnalyzer(); |
| 71 | + |
| 72 | +describe('AST Chunker Integration', () => { |
| 73 | + // Test 1: Supported language, normal file — AST chunks with scope prefixes |
| 74 | + it('produces AST-aligned chunks with scope prefixes for a normal TypeScript file', async () => { |
| 75 | + const result = await analyzer.analyze('/virtual/user-service.ts', TYPESCRIPT_FIXTURE); |
| 76 | + |
| 77 | + expect(result.metadata.chunkStrategy).toBe('ast-aligned'); |
| 78 | + expect(result.metadata.symbolAware).toBe(true); |
| 79 | + |
| 80 | + // Should have symbol-aware chunks |
| 81 | + const symbolChunks = result.chunks.filter((c) => c.metadata?.symbolAware === true); |
| 82 | + expect(symbolChunks.length).toBeGreaterThan(0); |
| 83 | + |
| 84 | + // Check key symbols exist |
| 85 | + const names = symbolChunks.map((c) => c.metadata.symbolName); |
| 86 | + expect(names.some((n) => n?.includes('getById'))).toBe(true); |
| 87 | + expect(names.some((n) => n?.includes('updateUser'))).toBe(true); |
| 88 | + expect(names.some((n) => n?.includes('createUserService'))).toBe(true); |
| 89 | + |
| 90 | + // Every symbol chunk should have a scope prefix (starts with //) |
| 91 | + for (const chunk of symbolChunks) { |
| 92 | + expect(chunk.content.startsWith('//')).toBe(true); |
| 93 | + } |
| 94 | + }); |
| 95 | + |
| 96 | + // Test 2: Oversized file — falls back to line chunks |
| 97 | + it('falls back to line-based chunking for oversized files (>10K lines)', async () => { |
| 98 | + // Generate a large file exceeding MAX_AST_CHUNK_FILE_LINES |
| 99 | + const bigLines: string[] = []; |
| 100 | + bigLines.push('// Large generated file'); |
| 101 | + for (let i = 1; i <= MAX_AST_CHUNK_FILE_LINES + 100; i++) { |
| 102 | + bigLines.push(`export const var_${i} = ${i};`); |
| 103 | + } |
| 104 | + const bigContent = bigLines.join('\n'); |
| 105 | + |
| 106 | + const result = await analyzer.analyze('/virtual/huge-file.ts', bigContent); |
| 107 | + |
| 108 | + // Should NOT be ast-aligned due to file ceiling |
| 109 | + expect(result.chunks.length).toBeGreaterThan(0); |
| 110 | + |
| 111 | + // Chunks should be produced (via line/component fallback) |
| 112 | + const hasAstAligned = result.chunks.some((c) => c.metadata?.chunkStrategy === 'ast-aligned'); |
| 113 | + expect(hasAstAligned).toBe(false); |
| 114 | + }); |
| 115 | + |
| 116 | + // Test 3: Parse error simulation — fallback, no crash |
| 117 | + it('falls back gracefully on files with syntax errors', async () => { |
| 118 | + // Content with syntax errors that cause Tree-sitter hasError |
| 119 | + const badContent = [ |
| 120 | + 'export class Broken {', |
| 121 | + ' method() {', |
| 122 | + ' const x = {{{{{;', // severe syntax error |
| 123 | + ' return \\\\\\\\;', |
| 124 | + ' }', |
| 125 | + ' another() {', |
| 126 | + ' return 42;', |
| 127 | + ' }', |
| 128 | + '}' |
| 129 | + ].join('\n'); |
| 130 | + |
| 131 | + // Should not throw |
| 132 | + const result = await analyzer.analyze('/virtual/broken.ts', badContent); |
| 133 | + |
| 134 | + // Chunks should still be produced (via fallback) |
| 135 | + expect(result.chunks.length).toBeGreaterThan(0); |
| 136 | + }); |
| 137 | + |
| 138 | + // Test 4: Unsupported language — regex/line fallback |
| 139 | + it('produces chunks via fallback for unsupported languages (.rb)', async () => { |
| 140 | + const rubyContent = [ |
| 141 | + 'class Calculator', |
| 142 | + ' def add(a, b)', |
| 143 | + ' a + b', |
| 144 | + ' end', |
| 145 | + '', |
| 146 | + ' def subtract(a, b)', |
| 147 | + ' a - b', |
| 148 | + ' end', |
| 149 | + 'end', |
| 150 | + '', |
| 151 | + 'def standalone_function(x)', |
| 152 | + ' x * 2', |
| 153 | + 'end' |
| 154 | + ].join('\n'); |
| 155 | + |
| 156 | + const result = await analyzer.analyze('/virtual/calculator.rb', rubyContent); |
| 157 | + |
| 158 | + // Chunks produced |
| 159 | + expect(result.chunks.length).toBeGreaterThan(0); |
| 160 | + |
| 161 | + // Should NOT be ast-aligned (Ruby has no grammar) |
| 162 | + expect(result.metadata.chunkStrategy).toBe('line-or-component'); |
| 163 | + expect(result.metadata.symbolAware).toBeUndefined(); |
| 164 | + |
| 165 | + // No chunk should have AST-related metadata |
| 166 | + for (const chunk of result.chunks) { |
| 167 | + expect(chunk.metadata?.symbolAware).not.toBe(true); |
| 168 | + } |
| 169 | + }); |
| 170 | + |
| 171 | + // Test 5: Scope prefix correctness — nested class > method format |
| 172 | + it('generates correct scope prefix format for nested symbols', async () => { |
| 173 | + const result = await analyzer.analyze('/virtual/user-service.ts', TYPESCRIPT_FIXTURE); |
| 174 | + |
| 175 | + const symbolChunks = result.chunks.filter((c) => c.metadata?.symbolAware === true); |
| 176 | + |
| 177 | + // Find a method chunk inside UserService |
| 178 | + const getByIdChunk = symbolChunks.find((c) => c.metadata.symbolName === 'getById'); |
| 179 | + if (getByIdChunk) { |
| 180 | + // Should have prefix format: // UserService > getById :: (...) |
| 181 | + const firstLine = getByIdChunk.content.split('\n')[0]; |
| 182 | + expect(firstLine).toMatch(/\/\/\s*UserService\s*>\s*getById\s*::/); |
| 183 | + } |
| 184 | + |
| 185 | + // Find standalone function chunk |
| 186 | + const createChunk = symbolChunks.find((c) => |
| 187 | + c.metadata.symbolName?.includes('createUserService') |
| 188 | + ); |
| 189 | + if (createChunk) { |
| 190 | + // Should have prefix format: // createUserService :: (...) |
| 191 | + const firstLine = createChunk.content.split('\n')[0]; |
| 192 | + expect(firstLine).toMatch(/\/\/\s*createUserService\s*::/); |
| 193 | + // Should NOT have parent path separator |
| 194 | + expect(firstLine).not.toMatch(/>/); |
| 195 | + } |
| 196 | + }); |
| 197 | + |
| 198 | + // Test 6: Full coverage verification — chunks cover the file with small |
| 199 | + // structural gaps only where container headers/footers are below the |
| 200 | + // 2-non-blank-line threshold. |
| 201 | + it('AST chunks cover the file with at most small structural gaps', async () => { |
| 202 | + const result = await analyzer.analyze('/virtual/user-service.ts', TYPESCRIPT_FIXTURE); |
| 203 | + |
| 204 | + // Only check when we get AST-aligned chunks |
| 205 | + expect(result.metadata.chunkStrategy).toBe('ast-aligned'); |
| 206 | + |
| 207 | + const sorted = [...result.chunks].sort((a, b) => a.startLine - b.startLine); |
| 208 | + const totalLines = TYPESCRIPT_FIXTURE.split('\n').length; |
| 209 | + |
| 210 | + // Collect all line numbers covered by chunks |
| 211 | + const coveredLines = new Set<number>(); |
| 212 | + for (const chunk of sorted) { |
| 213 | + for (let line = chunk.startLine; line <= chunk.endLine; line++) { |
| 214 | + coveredLines.add(line); |
| 215 | + } |
| 216 | + } |
| 217 | + |
| 218 | + // Count uncovered lines — should be minimal (small headers/footers below threshold) |
| 219 | + const allLines = TYPESCRIPT_FIXTURE.split('\n'); |
| 220 | + const uncoveredLines: number[] = []; |
| 221 | + for (let i = 1; i <= totalLines; i++) { |
| 222 | + if (!coveredLines.has(i)) { |
| 223 | + uncoveredLines.push(i); |
| 224 | + } |
| 225 | + } |
| 226 | + |
| 227 | + // Uncovered lines should be small structural fragments (class opening/closing braces, etc.) |
| 228 | + // Allow up to 15% uncovered for container header/footer gaps |
| 229 | + const uncoveredPct = (uncoveredLines.length / totalLines) * 100; |
| 230 | + expect(uncoveredPct).toBeLessThan(15); |
| 231 | + |
| 232 | + // Every uncovered line should be structurally trivial (blank, brace, or short header) |
| 233 | + for (const lineNum of uncoveredLines) { |
| 234 | + const line = allLines[lineNum - 1].trim(); |
| 235 | + const isTrivial = line === '' || line === '}' || line === '};' || line.length < 60; |
| 236 | + expect(isTrivial).toBe(true); |
| 237 | + } |
| 238 | + |
| 239 | + // Verify no overlapping line ranges |
| 240 | + for (let i = 1; i < sorted.length; i++) { |
| 241 | + expect(sorted[i].startLine).toBeGreaterThan(sorted[i - 1].endLine); |
| 242 | + } |
| 243 | + |
| 244 | + // Content from chunks (minus scope prefixes) should contain all significant source lines |
| 245 | + const chunkContent: string[] = []; |
| 246 | + for (const chunk of sorted) { |
| 247 | + const lines = chunk.content.split('\n'); |
| 248 | + for (const line of lines) { |
| 249 | + // Skip scope prefix lines |
| 250 | + if (line.match(/^\/\/\s*.+\s*::\s*.+/) && !TYPESCRIPT_FIXTURE.includes(line)) { |
| 251 | + continue; |
| 252 | + } |
| 253 | + chunkContent.push(line); |
| 254 | + } |
| 255 | + } |
| 256 | + const joined = chunkContent.join('\n'); |
| 257 | + |
| 258 | + // All important function/class names must be present in reconstructed content |
| 259 | + // Note: 'class UserService' may be in a dropped header (<= 2 non-blank lines) |
| 260 | + // but the methods and standalone functions must be present |
| 261 | + expect(joined).toContain('async getById'); |
| 262 | + expect(joined).toContain('async updateUser'); |
| 263 | + expect(joined).toContain('function createUserService'); |
| 264 | + }); |
| 265 | +}); |
0 commit comments