Skip to content

Commit 59c318e

Browse files
committed
Deploy PR #1288 preview
1 parent 53c3e2c commit 59c318e

File tree

13 files changed

+55087
-0
lines changed

13 files changed

+55087
-0
lines changed

docs-preview-1288/assets/main.js

Lines changed: 740 additions & 0 deletions
Large diffs are not rendered by default.

docs-preview-1288/assets/search-data.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.
Lines changed: 298 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,298 @@
1+
const isWordBoundary = (char) =>
2+
/[A-Z]/.test(char) || /[-_\/.]/.test(char) || /\s/.test(char);
3+
4+
const isCaseTransition = (prev, curr) => {
5+
const prevIsUpper = prev.toLowerCase() !== prev;
6+
const currIsUpper = curr.toLowerCase() !== curr;
7+
return (
8+
prevIsUpper && currIsUpper && prev.toLowerCase() !== curr.toLowerCase()
9+
);
10+
};
11+
12+
const findBestSubsequenceMatch = (query, target) => {
13+
const n = query.length;
14+
const m = target.length;
15+
16+
if (n === 0 || m === 0) return null;
17+
18+
const positions = [];
19+
20+
const memo = new Map();
21+
const key = (qIdx, tIdx, gap) => `${qIdx}:${tIdx}:${gap}`;
22+
23+
const findBest = (qIdx, tIdx, currentGap) => {
24+
if (qIdx === n) {
25+
return { done: true, positions: [...positions], gap: currentGap };
26+
}
27+
28+
const memoKey = key(qIdx, tIdx, currentGap);
29+
if (memo.has(memoKey)) {
30+
return memo.get(memoKey);
31+
}
32+
33+
let bestResult = null;
34+
35+
for (let i = tIdx; i < m; i++) {
36+
if (target[i] === query[qIdx]) {
37+
positions.push(i);
38+
const gap = qIdx === 0 ? 0 : i - positions[positions.length - 2] - 1;
39+
const newGap = currentGap + gap;
40+
41+
if (newGap > m) {
42+
positions.pop();
43+
continue;
44+
}
45+
46+
const result = findBest(qIdx + 1, i + 1, newGap);
47+
positions.pop();
48+
49+
if (result && (!bestResult || result.gap < bestResult.gap)) {
50+
bestResult = result;
51+
if (result.gap === 0) break;
52+
}
53+
}
54+
}
55+
56+
memo.set(memoKey, bestResult);
57+
return bestResult;
58+
};
59+
60+
const result = findBest(0, 0, 0);
61+
if (!result) return null;
62+
63+
const consecutive = (() => {
64+
let c = 1;
65+
for (let i = 1; i < result.positions.length; i++) {
66+
if (result.positions[i] === result.positions[i - 1] + 1) {
67+
c++;
68+
}
69+
}
70+
return c;
71+
})();
72+
73+
return {
74+
positions: result.positions,
75+
consecutive,
76+
score: calculateMatchScore(query, target, result.positions, consecutive),
77+
};
78+
};
79+
80+
const calculateMatchScore = (query, target, positions, consecutive) => {
81+
const n = positions.length;
82+
const m = target.length;
83+
84+
if (n === 0) return 0;
85+
86+
let score = 1.0;
87+
88+
const startBonus = (m - positions[0]) / m;
89+
score += startBonus * 0.5;
90+
91+
let gapPenalty = 0;
92+
for (let i = 1; i < n; i++) {
93+
const gap = positions[i] - positions[i - 1] - 1;
94+
if (gap > 0) {
95+
gapPenalty += Math.min(gap / m, 1.0) * 0.3;
96+
}
97+
}
98+
score -= gapPenalty;
99+
100+
const consecutiveBonus = consecutive / n;
101+
score += consecutiveBonus * 0.3;
102+
103+
let boundaryBonus = 0;
104+
for (let i = 0; i < n; i++) {
105+
const char = target[positions[i]];
106+
if (i === 0 || isWordBoundary(char)) {
107+
boundaryBonus += 0.05;
108+
}
109+
if (i > 0) {
110+
const prevChar = target[positions[i - 1]];
111+
if (isCaseTransition(prevChar, char)) {
112+
boundaryBonus += 0.03;
113+
}
114+
}
115+
}
116+
score = Math.min(1.0, score + boundaryBonus);
117+
118+
const lengthPenalty = Math.abs(query.length - n) / Math.max(query.length, m);
119+
score -= lengthPenalty * 0.2;
120+
121+
return Math.max(0, Math.min(1.0, score));
122+
};
123+
124+
const fuzzyMatch = (query, target) => {
125+
const lowerQuery = query.toLowerCase();
126+
const lowerTarget = target.toLowerCase();
127+
128+
if (lowerQuery.length === 0) return null;
129+
if (lowerTarget.length === 0) return null;
130+
131+
if (lowerTarget === lowerQuery) {
132+
return 1.0;
133+
}
134+
135+
if (lowerTarget.includes(lowerQuery)) {
136+
const ratio = lowerQuery.length / lowerTarget.length;
137+
return 0.8 + ratio * 0.2;
138+
}
139+
140+
const match = findBestSubsequenceMatch(lowerQuery, lowerTarget);
141+
if (!match) {
142+
return null;
143+
}
144+
145+
return Math.min(1.0, match.score);
146+
};
147+
148+
self.onmessage = function (e) {
149+
const { messageId, type, data } = e.data;
150+
151+
const respond = (type, data) => {
152+
self.postMessage({ messageId, type, data });
153+
};
154+
155+
const respondError = (error) => {
156+
self.postMessage({
157+
messageId,
158+
type: "error",
159+
error: error.message || String(error),
160+
});
161+
};
162+
163+
try {
164+
if (type === "tokenize") {
165+
const text = typeof data === "string" ? data : "";
166+
const words = text.toLowerCase().match(/\b[a-zA-Z0-9_-]+\b/g) || [];
167+
const tokens = words.filter((word) => word.length > 2);
168+
const uniqueTokens = Array.from(new Set(tokens));
169+
respond("tokens", uniqueTokens);
170+
} else if (type === "search") {
171+
const { query, limit = 10 } = data;
172+
173+
if (!query || typeof query !== "string") {
174+
respond("results", []);
175+
return;
176+
}
177+
178+
const rawQuery = query.toLowerCase();
179+
const text = typeof query === "string" ? query : "";
180+
const words = text.toLowerCase().match(/\b[a-zA-Z0-9_-]+\b/g) || [];
181+
const searchTerms = words.filter((word) => word.length > 2);
182+
183+
let documents = [];
184+
if (typeof data.documents === "string") {
185+
documents = JSON.parse(data.documents);
186+
} else if (Array.isArray(data.documents)) {
187+
documents = data.documents;
188+
} else if (typeof data.transferables === "string") {
189+
documents = JSON.parse(data.transferables);
190+
}
191+
192+
if (!Array.isArray(documents) || documents.length === 0) {
193+
respond("results", []);
194+
return;
195+
}
196+
197+
const useFuzzySearch = rawQuery.length >= 3;
198+
199+
if (searchTerms.length === 0 && rawQuery.length < 3) {
200+
respond("results", []);
201+
return;
202+
}
203+
204+
const pageMatches = new Map();
205+
206+
// Pre-compute lower-case strings for each document
207+
const processedDocs = documents.map((doc, docId) => {
208+
const title = typeof doc.title === "string" ? doc.title : "";
209+
const content = typeof doc.content === "string" ? doc.content : "";
210+
211+
return {
212+
docId,
213+
doc,
214+
lowerTitle: title.toLowerCase(),
215+
lowerContent: content.toLowerCase(),
216+
};
217+
});
218+
219+
// First pass: Score pages with fuzzy matching
220+
processedDocs.forEach(({ docId, doc, lowerTitle, lowerContent }) => {
221+
let match = pageMatches.get(docId);
222+
if (!match) {
223+
match = { doc, pageScore: 0, matchingAnchors: [] };
224+
pageMatches.set(docId, match);
225+
}
226+
227+
if (useFuzzySearch) {
228+
const fuzzyTitleScore = fuzzyMatch(rawQuery, lowerTitle);
229+
if (fuzzyTitleScore !== null) {
230+
match.pageScore += fuzzyTitleScore * 100;
231+
}
232+
233+
const fuzzyContentScore = fuzzyMatch(rawQuery, lowerContent);
234+
if (fuzzyContentScore !== null) {
235+
match.pageScore += fuzzyContentScore * 30;
236+
}
237+
}
238+
239+
// Token-based exact matching
240+
searchTerms.forEach((term) => {
241+
if (lowerTitle.includes(term)) {
242+
match.pageScore += lowerTitle === term ? 20 : 10;
243+
}
244+
if (lowerContent.includes(term)) {
245+
match.pageScore += 2;
246+
}
247+
});
248+
});
249+
250+
// Second pass: Find matching anchors
251+
pageMatches.forEach((match) => {
252+
const doc = match.doc;
253+
if (
254+
!doc.anchors ||
255+
!Array.isArray(doc.anchors) ||
256+
doc.anchors.length === 0
257+
) {
258+
return;
259+
}
260+
261+
doc.anchors.forEach((anchor) => {
262+
if (!anchor || !anchor.text) return;
263+
264+
const anchorText = anchor.text.toLowerCase();
265+
let anchorMatches = false;
266+
267+
if (useFuzzySearch) {
268+
const fuzzyScore = fuzzyMatch(rawQuery, anchorText);
269+
if (fuzzyScore !== null && fuzzyScore >= 0.4) {
270+
anchorMatches = true;
271+
}
272+
}
273+
274+
if (!anchorMatches) {
275+
searchTerms.forEach((term) => {
276+
if (anchorText.includes(term)) {
277+
anchorMatches = true;
278+
}
279+
});
280+
}
281+
282+
if (anchorMatches) {
283+
match.matchingAnchors.push(anchor);
284+
}
285+
});
286+
});
287+
288+
const results = Array.from(pageMatches.values())
289+
.filter((m) => m.pageScore > 5)
290+
.sort((a, b) => b.pageScore - a.pageScore)
291+
.slice(0, limit);
292+
293+
respond("results", results);
294+
}
295+
} catch (error) {
296+
respondError(error);
297+
}
298+
};

0 commit comments

Comments
 (0)