Skip to content

Commit eec17ae

Browse files
danezdoowb
andcommitted
Merge commit from fork
* fix: add option for maximum nested extglobs with default depth of 2 Also find other risky globs and treat them as literals * Update test/options.maxExtglobRecursion.js Co-authored-by: Brian Woodward <brian.woodward@gmail.com> * Fix bug for single character nested star-only extglobs * Move constant to constants.js and set to 0 by default --------- Co-authored-by: Brian Woodward <brian.woodward@gmail.com> # Conflicts: # .verb.md # README.md
1 parent 78f8ca4 commit eec17ae

File tree

5 files changed

+455
-2
lines changed

5 files changed

+455
-2
lines changed

.verb.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,8 @@ The following options may be used with the main `picomatch()` function or any of
108108
| `literalBrackets` | `boolean` | `undefined` | When `true`, brackets in the glob pattern will be escaped so that only literal brackets will be matched. |
109109
| `lookbehinds` | `boolean` | `true` | Support regex positive and negative lookbehinds. Note that you must be using Node 8.1.10 or higher to enable regex lookbehinds. |
110110
| `matchBase` | `boolean` | `false` | Alias for `basename` |
111-
| `maxLength` | `boolean` | `65536` | Limit the max length of the input string. An error is thrown if the input string is longer than this value. |
111+
| `maxLength` | `number` | `65536` | Limit the max length of the input string. An error is thrown if the input string is longer than this value. |
112+
| `maxExtglobRecursion` | `number\|boolean` | `0` | Limit nested quantified extglobs and other risky repeated extglob forms. When the limit is exceeded, the extglob is treated as a literal string instead of being compiled to regex. Set to `false` to disable this safeguard. |
112113
| `nobrace` | `boolean` | `false` | Disable brace matching, so that `{a,b}` and `{1..3}` would be treated as literal characters. |
113114
| `nobracket` | `boolean` | `undefined` | Disable matching with regex brackets. |
114115
| `nocase` | `boolean` | `false` | Make matching case-insensitive. Equivalent to the regex `i` flag. Note that this option is overridden by the `flags` option. |
@@ -324,6 +325,13 @@ console.log(pm.isMatch('foo.bar', '!(foo).!(bar)')); // false
324325

325326
// supports nested extglobs
326327
console.log(pm.isMatch('foo.bar', '!(!(foo)).!(!(bar))')); // true
328+
329+
// risky quantified extglobs are treated literally by default
330+
console.log(pm.makeRe('+(a|aa)'));
331+
//=> /^(?:\+\(a\|aa\))$/
332+
333+
// increase the limit to allow a small amount of nested quantified extglobs
334+
console.log(pm.isMatch('aaa', '+(+(a))', { maxExtglobRecursion: 1 })); // true
327335
```
328336

329337
#### POSIX brackets

README.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,8 @@ The following options may be used with the main `picomatch()` function or any of
318318
| `keepQuotes` | `boolean` | `false` | Retain quotes in the generated regex, since quotes may also be used as an alternative to backslashes. |
319319
| `literalBrackets` | `boolean` | `undefined` | When `true`, brackets in the glob pattern will be escaped so that only literal brackets will be matched. |
320320
| `matchBase` | `boolean` | `false` | Alias for `basename` |
321-
| `maxLength` | `boolean` | `65536` | Limit the max length of the input string. An error is thrown if the input string is longer than this value. |
321+
| `maxLength` | `number` | `65536` | Limit the max length of the input string. An error is thrown if the input string is longer than this value. |
322+
| `maxExtglobRecursion` | `number\|boolean` | `0` | Limit nested quantified extglobs and other risky repeated extglob forms. When the limit is exceeded, the extglob is treated as a literal string instead of being compiled to regex. Set to `false` to disable this safeguard. |
322323
| `nobrace` | `boolean` | `false` | Disable brace matching, so that `{a,b}` and `{1..3}` would be treated as literal characters. |
323324
| `nobracket` | `boolean` | `undefined` | Disable matching with regex brackets. |
324325
| `nocase` | `boolean` | `false` | Make matching case-insensitive. Equivalent to the regex `i` flag. Note that this option is overridden by the `flags` option. |
@@ -533,6 +534,13 @@ console.log(pm.isMatch('foo.bar', '!(foo).!(bar)')); // false
533534
534535
// supports nested extglobs
535536
console.log(pm.isMatch('foo.bar', '!(!(foo)).!(!(bar))')); // true
537+
538+
// risky quantified extglobs are treated literally by default
539+
console.log(pm.makeRe('+(a|aa)'));
540+
//=> /^(?:\+\(a\|aa\))$/
541+
542+
// increase the limit to allow a small amount of nested quantified extglobs
543+
console.log(pm.isMatch('aaa', '+(+(a))', { maxExtglobRecursion: 1 })); // true
536544
```
537545
538546
#### POSIX brackets

lib/constants.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ const path = require('path');
44
const WIN_SLASH = '\\\\/';
55
const WIN_NO_SLASH = `[^${WIN_SLASH}]`;
66

7+
const DEFAULT_MAX_EXTGLOB_RECURSION = 0;
8+
79
/**
810
* Posix glob regex
911
*/
@@ -84,6 +86,7 @@ const POSIX_REGEX_SOURCE = {
8486
};
8587

8688
module.exports = {
89+
DEFAULT_MAX_EXTGLOB_RECURSION,
8790
MAX_LENGTH: 1024 * 64,
8891
POSIX_REGEX_SOURCE,
8992

lib/parse.js

Lines changed: 301 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,277 @@ const syntaxError = (type, char) => {
4545
return `Missing ${type}: "${char}" - use "\\\\${char}" to match literal characters`;
4646
};
4747

48+
const splitTopLevel = input => {
49+
const parts = [];
50+
let bracket = 0;
51+
let paren = 0;
52+
let quote = 0;
53+
let value = '';
54+
let escaped = false;
55+
56+
for (const ch of input) {
57+
if (escaped === true) {
58+
value += ch;
59+
escaped = false;
60+
continue;
61+
}
62+
63+
if (ch === '\\') {
64+
value += ch;
65+
escaped = true;
66+
continue;
67+
}
68+
69+
if (ch === '"') {
70+
quote = quote === 1 ? 0 : 1;
71+
value += ch;
72+
continue;
73+
}
74+
75+
if (quote === 0) {
76+
if (ch === '[') {
77+
bracket++;
78+
} else if (ch === ']' && bracket > 0) {
79+
bracket--;
80+
} else if (bracket === 0) {
81+
if (ch === '(') {
82+
paren++;
83+
} else if (ch === ')' && paren > 0) {
84+
paren--;
85+
} else if (ch === '|' && paren === 0) {
86+
parts.push(value);
87+
value = '';
88+
continue;
89+
}
90+
}
91+
}
92+
93+
value += ch;
94+
}
95+
96+
parts.push(value);
97+
return parts;
98+
};
99+
100+
const isPlainBranch = branch => {
101+
let escaped = false;
102+
103+
for (const ch of branch) {
104+
if (escaped === true) {
105+
escaped = false;
106+
continue;
107+
}
108+
109+
if (ch === '\\') {
110+
escaped = true;
111+
continue;
112+
}
113+
114+
if (/[?*+@!()[\]{}]/.test(ch)) {
115+
return false;
116+
}
117+
}
118+
119+
return true;
120+
};
121+
122+
const normalizeSimpleBranch = branch => {
123+
let value = branch.trim();
124+
let changed = true;
125+
126+
while (changed === true) {
127+
changed = false;
128+
129+
if (/^@\([^\\()[\]{}|]+\)$/.test(value)) {
130+
value = value.slice(2, -1);
131+
changed = true;
132+
}
133+
}
134+
135+
if (!isPlainBranch(value)) {
136+
return;
137+
}
138+
139+
return value.replace(/\\(.)/g, '$1');
140+
};
141+
142+
const hasRepeatedCharPrefixOverlap = branches => {
143+
const values = branches.map(normalizeSimpleBranch).filter(Boolean);
144+
145+
for (let i = 0; i < values.length; i++) {
146+
for (let j = i + 1; j < values.length; j++) {
147+
const a = values[i];
148+
const b = values[j];
149+
const char = a[0];
150+
151+
if (!char || a !== char.repeat(a.length) || b !== char.repeat(b.length)) {
152+
continue;
153+
}
154+
155+
if (a === b || a.startsWith(b) || b.startsWith(a)) {
156+
return true;
157+
}
158+
}
159+
}
160+
161+
return false;
162+
};
163+
164+
const parseRepeatedExtglob = (pattern, requireEnd = true) => {
165+
if ((pattern[0] !== '+' && pattern[0] !== '*') || pattern[1] !== '(') {
166+
return;
167+
}
168+
169+
let bracket = 0;
170+
let paren = 0;
171+
let quote = 0;
172+
let escaped = false;
173+
174+
for (let i = 1; i < pattern.length; i++) {
175+
const ch = pattern[i];
176+
177+
if (escaped === true) {
178+
escaped = false;
179+
continue;
180+
}
181+
182+
if (ch === '\\') {
183+
escaped = true;
184+
continue;
185+
}
186+
187+
if (ch === '"') {
188+
quote = quote === 1 ? 0 : 1;
189+
continue;
190+
}
191+
192+
if (quote === 1) {
193+
continue;
194+
}
195+
196+
if (ch === '[') {
197+
bracket++;
198+
continue;
199+
}
200+
201+
if (ch === ']' && bracket > 0) {
202+
bracket--;
203+
continue;
204+
}
205+
206+
if (bracket > 0) {
207+
continue;
208+
}
209+
210+
if (ch === '(') {
211+
paren++;
212+
continue;
213+
}
214+
215+
if (ch === ')') {
216+
paren--;
217+
218+
if (paren === 0) {
219+
if (requireEnd === true && i !== pattern.length - 1) {
220+
return;
221+
}
222+
223+
return {
224+
type: pattern[0],
225+
body: pattern.slice(2, i),
226+
end: i
227+
};
228+
}
229+
}
230+
}
231+
};
232+
233+
const getStarExtglobSequenceOutput = pattern => {
234+
let index = 0;
235+
const chars = [];
236+
237+
while (index < pattern.length) {
238+
const match = parseRepeatedExtglob(pattern.slice(index), false);
239+
240+
if (!match || match.type !== '*') {
241+
return;
242+
}
243+
244+
const branches = splitTopLevel(match.body).map(branch => branch.trim());
245+
if (branches.length !== 1) {
246+
return;
247+
}
248+
249+
const branch = normalizeSimpleBranch(branches[0]);
250+
if (!branch || branch.length !== 1) {
251+
return;
252+
}
253+
254+
chars.push(branch);
255+
index += match.end + 1;
256+
}
257+
258+
if (chars.length < 1) {
259+
return;
260+
}
261+
262+
const source = chars.length === 1
263+
? utils.escapeRegex(chars[0])
264+
: `[${chars.map(ch => utils.escapeRegex(ch)).join('')}]`;
265+
266+
return `${source}*`;
267+
};
268+
269+
const repeatedExtglobRecursion = pattern => {
270+
let depth = 0;
271+
let value = pattern.trim();
272+
let match = parseRepeatedExtglob(value);
273+
274+
while (match) {
275+
depth++;
276+
value = match.body.trim();
277+
match = parseRepeatedExtglob(value);
278+
}
279+
280+
return depth;
281+
};
282+
283+
const analyzeRepeatedExtglob = (body, options) => {
284+
if (options.maxExtglobRecursion === false) {
285+
return { risky: false };
286+
}
287+
288+
const max =
289+
typeof options.maxExtglobRecursion === 'number'
290+
? options.maxExtglobRecursion
291+
: constants.DEFAULT_MAX_EXTGLOB_RECURSION;
292+
293+
const branches = splitTopLevel(body).map(branch => branch.trim());
294+
295+
if (branches.length > 1) {
296+
if (
297+
branches.some(branch => branch === '') ||
298+
branches.some(branch => /^[*?]+$/.test(branch)) ||
299+
hasRepeatedCharPrefixOverlap(branches)
300+
) {
301+
return { risky: true };
302+
}
303+
}
304+
305+
for (const branch of branches) {
306+
const safeOutput = getStarExtglobSequenceOutput(branch);
307+
if (safeOutput) {
308+
return { risky: true, safeOutput };
309+
}
310+
311+
if (repeatedExtglobRecursion(branch) > max) {
312+
return { risky: true };
313+
}
314+
}
315+
316+
return { risky: false };
317+
};
318+
48319
/**
49320
* Parse the given input string.
50321
* @param {String} input
@@ -226,6 +497,8 @@ const parse = (input, options) => {
226497
token.prev = prev;
227498
token.parens = state.parens;
228499
token.output = state.output;
500+
token.startIndex = state.index;
501+
token.tokensIndex = tokens.length;
229502
const output = (opts.capture ? '(' : '') + token.open;
230503

231504
increment('parens');
@@ -235,6 +508,34 @@ const parse = (input, options) => {
235508
};
236509

237510
const extglobClose = token => {
511+
const literal = input.slice(token.startIndex, state.index + 1);
512+
const body = input.slice(token.startIndex + 2, state.index);
513+
const analysis = analyzeRepeatedExtglob(body, opts);
514+
515+
if ((token.type === 'plus' || token.type === 'star') && analysis.risky) {
516+
const safeOutput = analysis.safeOutput
517+
? (token.output ? '' : ONE_CHAR) + (opts.capture ? `(${analysis.safeOutput})` : analysis.safeOutput)
518+
: undefined;
519+
const open = tokens[token.tokensIndex];
520+
521+
open.type = 'text';
522+
open.value = literal;
523+
open.output = safeOutput || utils.escapeRegex(literal);
524+
525+
for (let i = token.tokensIndex + 1; i < tokens.length; i++) {
526+
tokens[i].value = '';
527+
tokens[i].output = '';
528+
delete tokens[i].suffix;
529+
}
530+
531+
state.output = token.output + open.output;
532+
state.backtrack = true;
533+
534+
push({ type: 'paren', extglob: true, value, output: '' });
535+
decrement('parens');
536+
return;
537+
}
538+
238539
let output = token.close + (opts.capture ? ')' : '');
239540
let rest;
240541

0 commit comments

Comments
 (0)