Skip to content

Commit 336f7f7

Browse files
committed
perf(linter/plugins): faster conversion of span to Location (#20507)
Speed up `computeLoc`, which converts a `start` + `end` offset pair to a `Location` of form `{ start: { line: ?, column: ? }, end: { line: ?, column: ? } }`. `computeLoc` is only called with `start` and `end` pairs from AST nodes, tokens, and comments, which are all produced by Oxc's parser, so it's guaranteed that `start < end`. Use this invariant for 2 optimizations: 1. Fast path for common case where `start` and `end` are on same line. 2. Reduce range of lines which are searched looking for end to only lines which are after the line `start` is on. `end` can't be on an earlier line, so there's not point search them.
1 parent f064f80 commit 336f7f7

File tree

1 file changed

+68
-20
lines changed

1 file changed

+68
-20
lines changed

apps/oxlint/src-js/plugins/location.ts

Lines changed: 68 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -148,29 +148,14 @@ export function getLineColumnFromOffset(offset: number): LineColumn {
148148
// This also decodes `sourceText` if it wasn't already.
149149
if (lines.length === 0) initLines();
150150
debugAssertIsNonNull(sourceText);
151+
debugAssertLinesIsInitialized();
151152

152153
if (offset > sourceText.length) {
153154
throw new RangeError(
154155
`Index out of range (requested index ${offset}, but source text has length ${sourceText.length}).`,
155156
);
156157
}
157158

158-
const lineCol: LineColumn = { line: 0, column: 0 };
159-
populateLineColumn(offset, lineCol);
160-
return lineCol;
161-
}
162-
163-
/**
164-
* Populate an existing `LineColumn` object from a source text offset.
165-
*
166-
* Caller must ensure `lineStartIndices` is initialized before calling this function.
167-
*
168-
* @param offset - The index of a character in a file.
169-
* @param out - `LineColumn` object to populate.
170-
*/
171-
function populateLineColumn(offset: number, out: LineColumn): void {
172-
debugAssertLinesIsInitialized();
173-
174159
// Find first line that starts *after* `offset`, via binary search of `lineStartIndices`.
175160
// `lineStartIndices` is sorted and `lineStartIndices[0]` is always 0.
176161
//
@@ -190,8 +175,10 @@ function populateLineColumn(offset: number, out: LineColumn): void {
190175
}
191176
} while (low < high);
192177

193-
out.line = low; // 1-indexed line number
194-
out.column = offset - lineStartIndices[low - 1]; // Offset from start of the line
178+
return {
179+
line: low, // 1-indexed line number
180+
column: offset - lineStartIndices[low - 1], // Offset from start of the line
181+
};
195182
}
196183

197184
/**
@@ -332,7 +319,13 @@ const LOC_DESCRIPTOR: PropertyDescriptor = {
332319
* @returns Location
333320
*/
334321
export function computeLoc(start: number, end: number): Location {
322+
// All AST nodes, tokens and comments have `start < end`, with only one exception:
323+
// `Program` node can have `start === end` if it has no directives or statements - either 0-length file,
324+
// or purely comments and/or whitespace and/or hashbang. But `start > end` is impossible.
325+
debugAssert(start <= end, "`start` must be <= `end`");
326+
335327
if (lines.length === 0) initLines();
328+
debugAssertLinesIsInitialized();
336329

337330
// Reuse a cached `Location` object if available, otherwise create a new one.
338331
// Note: The comparison `activeLocationsCount < cachedLocations.length` must be this way around
@@ -348,8 +341,63 @@ export function computeLoc(start: number, end: number): Location {
348341

349342
activeLocationsCount++;
350343

351-
populateLineColumn(start, loc.start);
352-
populateLineColumn(end, loc.end);
344+
const linesLen = lineStartIndices.length;
345+
346+
// Find first line that starts *after* `start`, via binary search of `lineStartIndices`.
347+
// `lineStartIndices` is sorted and `lineStartIndices[0]` is always 0.
348+
//
349+
// After the loop, `line` is the index of the first line whose start is *past* `start`.
350+
// This is also the 1-indexed line number of the line containing `start`.
351+
// e.g. if `start` is on the 3rd line, `line` = 3, and `lineStartIndices[2]` is that line's start.
352+
// `do...while` is safe because `lineStartIndices` always has at least one entry, so `line < high` at start of loop.
353+
let line = 0,
354+
high = linesLen,
355+
mid: number;
356+
do {
357+
mid = (line + high) >>> 1;
358+
if (start < lineStartIndices[mid]) {
359+
high = mid;
360+
} else {
361+
line = mid + 1;
362+
}
363+
} while (line < high);
364+
365+
const lineStart = lineStartIndices[line - 1];
366+
367+
const locStart = loc.start;
368+
locStart.line = line;
369+
locStart.column = start - lineStart;
370+
371+
// Fast path: If `end` is on the same line as `start`, skip the second binary search.
372+
// Most tokens (and many small AST nodes) are on a single line, so this is the common case.
373+
// `line` indexes the *next* line's start in `lineStartIndices`.
374+
// If we're on the last line, or `end` is before the next line's start, `end` is on the same line as `start`.
375+
const locEnd = loc.end;
376+
if (line === linesLen || end < lineStartIndices[line]) {
377+
locEnd.line = line;
378+
locEnd.column = end - lineStart;
379+
} else {
380+
// `end` is on a later line than `start`.
381+
//
382+
// Find first line that starts *after* `end`, via binary search of `lineStartIndices`.
383+
// Start search from the line after the one containing `start`, to narrow the search range.
384+
//
385+
// After the loop, `line` is the index of the first line whose start is *past* `end`.
386+
// This is also the 1-indexed line number of the line containing `end`.
387+
line++;
388+
high = linesLen;
389+
while (line < high) {
390+
mid = (line + high) >>> 1;
391+
if (end < lineStartIndices[mid]) {
392+
high = mid;
393+
} else {
394+
line = mid + 1;
395+
}
396+
}
397+
398+
locEnd.line = line;
399+
locEnd.column = end - lineStartIndices[line - 1];
400+
}
353401

354402
return loc;
355403
}

0 commit comments

Comments
 (0)