Skip to content

Commit 3dbd43e

Browse files
committed
feat(05-02): add scope-aware prefix generation to AST chunks
- Add generateScopePrefix() producing '// path > to > symbol :: signature' format - Extract signature hints from function params/return types, class/interface labels - Integrate prefix prepending into processNode for all symbol chunks - Add includeScopePrefix option to ASTChunkOptions (default true) - Add MAX_AST_CHUNK_FILE_SIZE (500KB) and MAX_AST_CHUNK_FILE_LINES (10K) constants - All 189 existing tests pass with no regressions
1 parent 68a2d6d commit 3dbd43e

File tree

1 file changed

+121
-34
lines changed

1 file changed

+121
-34
lines changed

src/utils/ast-chunker.ts

Lines changed: 121 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,80 @@ export interface ASTChunkOptions {
2828
language: string;
2929
framework?: string;
3030
componentType?: string;
31+
includeScopePrefix?: boolean;
3132
}
3233

3334
export const DEFAULT_AST_CHUNK_OPTIONS = {
3435
minChunkLines: 10,
3536
maxChunkLines: 150
3637
} as const;
3738

39+
// File ceiling constants: beyond these, AST chunking is skipped in favor of line chunks
40+
export const MAX_AST_CHUNK_FILE_SIZE = 500_000; // 500KB
41+
export const MAX_AST_CHUNK_FILE_LINES = 10_000; // 10K lines
42+
43+
// ---------------------------------------------------------------------------
44+
// Scope Prefix Generation
45+
// ---------------------------------------------------------------------------
46+
47+
/**
48+
* Generate a human-readable scope prefix for a symbol chunk.
49+
*
50+
* Format: `// [scope_path] :: [signature_hint]`
51+
*
52+
* Examples:
53+
* - Top-level function: `// getData :: (id: string): Promise<User>`
54+
* - Method inside class: `// UserService > getData :: (id: string): Promise<User>`
55+
* - Standalone constant: `// MAX_RETRIES :: const`
56+
*/
57+
export function generateScopePrefix(node: SymbolNode, ancestors: SymbolNode[]): string {
58+
// Build scope path: ancestor names joined with " > ", then current node
59+
const pathParts = ancestors.map((a) => a.symbol.name);
60+
pathParts.push(node.symbol.name);
61+
const scopePath = pathParts.join(' > ');
62+
63+
// Build signature hint from the symbol content
64+
const hint = extractSignatureHint(node.symbol);
65+
66+
return `// ${scopePath} :: ${hint}`;
67+
}
68+
69+
/**
70+
* Extract a short signature hint from a symbol's content.
71+
*/
72+
function extractSignatureHint(sym: TreeSitterSymbol): string {
73+
const kind = sym.kind;
74+
75+
if (kind === 'class') return 'class';
76+
if (kind === 'interface') return 'interface';
77+
if (kind === 'type') return 'type';
78+
if (kind === 'enum') return 'enum';
79+
if (kind === 'struct') return 'struct';
80+
if (kind === 'trait') return 'trait';
81+
82+
// For functions/methods, try to extract parameter list and return type
83+
if (kind === 'function' || kind === 'method') {
84+
const content = sym.content;
85+
// Match params and optional return type: (params): ReturnType
86+
const match = content.match(/\(([^)]*)\)(?:\s*:\s*([^{=>\n]*))?/);
87+
if (match) {
88+
let params = match[1].trim();
89+
if (params.length > 80) {
90+
params = params.slice(0, 77) + '...';
91+
}
92+
const returnType = match[2]?.trim();
93+
if (returnType) {
94+
return `(${params}): ${returnType.slice(0, 40)}`;
95+
}
96+
return `(${params})`;
97+
}
98+
return 'function';
99+
}
100+
101+
// Constants/variables
102+
return 'const';
103+
}
104+
38105
// ---------------------------------------------------------------------------
39106
// 1. buildSymbolTree
40107
// ---------------------------------------------------------------------------
@@ -120,13 +187,15 @@ export function generateASTChunks(
120187
const chunks: CodeChunk[] = [];
121188
let cursor = 1; // 1-based line cursor
122189

190+
const shouldPrefix = options.includeScopePrefix !== false;
191+
123192
for (const root of roots) {
124193
// Gap before this root
125194
if (root.symbol.startLine > cursor) {
126195
chunks.push(makeFillerChunk(lines, cursor, root.symbol.startLine - 1, options));
127196
}
128197
// Process the root symbol (recurse for containers)
129-
chunks.push(...processNode(root, lines, options, null));
198+
chunks.push(...processNode(root, lines, options, null, [], shouldPrefix));
130199
cursor = root.symbol.endLine + 1;
131200
}
132201

@@ -143,14 +212,24 @@ function processNode(
143212
node: SymbolNode,
144213
lines: string[],
145214
options: ASTChunkOptions,
146-
parentName: string | null
215+
parentName: string | null,
216+
ancestors: SymbolNode[],
217+
shouldPrefix: boolean
147218
): CodeChunk[] {
148219
const sym = node.symbol;
149220
const symbolPath = parentName ? [parentName, sym.name] : [sym.name];
150221

151222
if (node.children.length === 0) {
152223
// Leaf symbol → single chunk
153-
return [makeSymbolChunk(sym, lines, options, symbolPath, parentName)];
224+
const chunk = makeSymbolChunk(sym, lines, options, symbolPath, parentName);
225+
if (shouldPrefix) {
226+
const prefix = generateScopePrefix(node, ancestors);
227+
chunk.content = prefix + '\n' + chunk.content;
228+
}
229+
if (chunk.metadata) {
230+
(chunk.metadata as Record<string, unknown>).symbolPath = symbolPath;
231+
}
232+
return [chunk];
154233
}
155234

156235
// Container symbol — split into header, children, footer
@@ -163,22 +242,25 @@ function processNode(
163242
const headerLines = extractLines(lines, sym.startLine, headerEnd);
164243
const nonBlank = headerLines.filter((l) => l.trim().length > 0).length;
165244
if (nonBlank > 2) {
166-
chunks.push(
167-
makeSymbolChunk(
168-
{
169-
...sym,
170-
name: `${sym.name}:header`,
171-
startLine: sym.startLine,
172-
endLine: headerEnd,
173-
content: headerLines.join('\n')
174-
},
175-
lines,
176-
options,
177-
symbolPath,
178-
parentName,
179-
true // use provided content
180-
)
245+
const headerChunk = makeSymbolChunk(
246+
{
247+
...sym,
248+
name: `${sym.name}:header`,
249+
startLine: sym.startLine,
250+
endLine: headerEnd,
251+
content: headerLines.join('\n')
252+
},
253+
lines,
254+
options,
255+
symbolPath,
256+
parentName,
257+
true // use provided content
181258
);
259+
if (shouldPrefix) {
260+
const prefix = generateScopePrefix(node, ancestors);
261+
headerChunk.content = `${prefix}\n${headerChunk.content}`;
262+
}
263+
chunks.push(headerChunk);
182264
}
183265
}
184266

@@ -195,7 +277,9 @@ function processNode(
195277
chunks.push(makeFillerChunk(lines, gapStart, gapEnd, options));
196278
}
197279
}
198-
chunks.push(...processNode(child, lines, options, sym.name));
280+
chunks.push(
281+
...processNode(child, lines, options, sym.name, [...ancestors, node], shouldPrefix)
282+
);
199283
childCursor = child.symbol.endLine + 1;
200284
}
201285

@@ -206,22 +290,25 @@ function processNode(
206290
const footerLines = extractLines(lines, footerStart, sym.endLine);
207291
const nonBlank = footerLines.filter((l) => l.trim().length > 0).length;
208292
if (nonBlank > 2) {
209-
chunks.push(
210-
makeSymbolChunk(
211-
{
212-
...sym,
213-
name: `${sym.name}:footer`,
214-
startLine: footerStart,
215-
endLine: sym.endLine,
216-
content: footerLines.join('\n')
217-
},
218-
lines,
219-
options,
220-
symbolPath,
221-
parentName,
222-
true
223-
)
293+
const footerChunk = makeSymbolChunk(
294+
{
295+
...sym,
296+
name: `${sym.name}:footer`,
297+
startLine: footerStart,
298+
endLine: sym.endLine,
299+
content: footerLines.join('\n')
300+
},
301+
lines,
302+
options,
303+
symbolPath,
304+
parentName,
305+
true
224306
);
307+
if (shouldPrefix) {
308+
const prefix = generateScopePrefix(node, ancestors);
309+
footerChunk.content = `${prefix}\n${footerChunk.content}`;
310+
}
311+
chunks.push(footerChunk);
225312
}
226313
}
227314

0 commit comments

Comments
 (0)