diff --git a/src/Common/src/CoreLib/System/Collections/Generic/ValueListBuilder.cs b/src/Common/src/CoreLib/System/Collections/Generic/ValueListBuilder.cs index 72da4a9e197f..aea6052f030a 100644 --- a/src/Common/src/CoreLib/System/Collections/Generic/ValueListBuilder.cs +++ b/src/Common/src/CoreLib/System/Collections/Generic/ValueListBuilder.cs @@ -21,7 +21,16 @@ public ValueListBuilder(Span initialSpan) _pos = 0; } - public int Length => _pos; + public int Length + { + get => _pos; + set + { + Debug.Assert(value >= 0); + Debug.Assert(value <= _span.Length); + _pos = value; + } + } public ref T this[int index] { diff --git a/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs b/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs index 77158bf5ca1a..4e392d8ab235 100644 --- a/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs +++ b/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs @@ -112,16 +112,16 @@ private Regex(string pattern, RegexOptions options, TimeSpan matchTimeout, bool internalMatchTimeout = matchTimeout; // Cache handling. Try to look up this regex in the cache. - string cultureKey = (options & RegexOptions.CultureInvariant) != 0 ? - CultureInfo.InvariantCulture.ToString() : - CultureInfo.CurrentCulture.ToString(); - var key = new CachedCodeEntryKey(options, cultureKey, pattern); + CultureInfo culture = (options & RegexOptions.CultureInvariant) != 0 ? + CultureInfo.InvariantCulture : + CultureInfo.CurrentCulture; + var key = new CachedCodeEntryKey(options, culture.ToString(), pattern); CachedCodeEntry cached = GetCachedCode(key, false); if (cached == null) { // Parse the input - RegexTree tree = RegexParser.Parse(pattern, roptions); + RegexTree tree = RegexParser.Parse(pattern, roptions, culture); // Extract the relevant information capnames = tree.CapNames; diff --git a/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexBoyerMoore.cs b/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexBoyerMoore.cs index e44cb58986c3..a2daac6ce453 100644 --- a/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexBoyerMoore.cs +++ b/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexBoyerMoore.cs @@ -13,8 +13,6 @@ using System.Diagnostics; using System.Globalization; -using System.IO; -using System.Text; namespace System.Text.RegularExpressions { @@ -39,20 +37,7 @@ public RegexBoyerMoore(string pattern, bool caseInsensitive, bool rightToLeft, C // Sorry, you just can't use Boyer-Moore to find an empty pattern. // We're doing this for your own protection. (Really, for speed.) Debug.Assert(pattern.Length != 0, "RegexBoyerMoore called with an empty string. This is bad for perf"); - - if (caseInsensitive) - { - pattern = string.Create(pattern.Length, (pattern, culture), (span, state) => - { - // We do the ToLower character by character for consistency. With surrogate chars, doing - // a ToLower on the entire string could actually change the surrogate pair. This is more correct - // linguistically, but since Regex doesn't support surrogates, it's more important to be - // consistent. - TextInfo textInfo = state.culture.TextInfo; - for (int i = 0; i < state.pattern.Length; i++) - span[i] = textInfo.ToLower(state.pattern[i]); - }); - } + Debug.Assert(!caseInsensitive || pattern.ToLower(culture) == pattern, "RegexBoyerMoore called with a pattern which is not lowercased with caseInsensitive true."); Pattern = pattern; RightToLeft = rightToLeft; @@ -229,17 +214,7 @@ private bool MatchPattern(string text, int index) return false; } - TextInfo textinfo = _culture.TextInfo; - for (int i = 0; i < Pattern.Length; i++) - { - Debug.Assert(textinfo.ToLower(Pattern[i]) == Pattern[i], "pattern should be converted to lower case in constructor!"); - if (textinfo.ToLower(text[index + i]) != Pattern[i]) - { - return false; - } - } - - return true; + return (0 == string.Compare(Pattern, 0, text, index, Pattern.Length, CaseInsensitive, _culture)); } else { diff --git a/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs b/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs index 547660f06419..d9c7500c045d 100644 --- a/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs +++ b/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs @@ -12,15 +12,15 @@ using System.Collections; using System.Collections.Generic; +using System.Diagnostics; using System.Globalization; -using System.IO; -using System.Text; namespace System.Text.RegularExpressions { - internal sealed class RegexParser + internal ref struct RegexParser { private const int EscapeMaxBufferSize = 256; + private const int OptionStackDefaultSize = 32; private const int MaxValueDiv10 = int.MaxValue / 10; private const int MaxValueMod10 = int.MaxValue % 10; @@ -30,9 +30,9 @@ internal sealed class RegexParser private RegexNode _concatenation; private RegexNode _unit; - private string _pattern; + private readonly string _pattern; private int _currentPos; - private CultureInfo _culture; + private readonly CultureInfo _culture; private int _autocap; private int _capcount; @@ -46,62 +46,76 @@ internal sealed class RegexParser private List _capnamelist; private RegexOptions _options; - private List _optionsStack; + private ValueListBuilder _optionsStack; - private bool _ignoreNextParen = false; + private bool _ignoreNextParen; // flag to skip capturing a parentheses group - /* - * This static call constructs a RegexTree from a regular expression - * pattern string and an option string. - * - * The method creates, drives, and drops a parser instance. - */ - public static RegexTree Parse(string re, RegexOptions op) + private RegexParser(string pattern, RegexOptions options, CultureInfo culture, Hashtable caps, int capsize, Hashtable capnames, Span optionSpan) { - RegexParser p; - RegexNode root; - string[] capnamelist; - - p = new RegexParser((op & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture); - - p._options = op; + Debug.Assert(pattern != null, "Pattern must be set"); + Debug.Assert(culture != null, "Culture must be set"); - p.SetPattern(re); - p.CountCaptures(); - p.Reset(op); - root = p.ScanRegex(); + _pattern = pattern; + _options = options; + _culture = culture; + _caps = caps; + _capsize = capsize; + _capnames = capnames; - if (p._capnamelist == null) - capnamelist = null; - else - capnamelist = p._capnamelist.ToArray(); + _optionsStack = new ValueListBuilder(optionSpan); + _stack = default; + _group = default; + _alternation = default; + _concatenation = default; + _unit = default; + _currentPos = 0; + _autocap = default; + _capcount = default; + _captop = default; + _capnumlist = default; + _capnamelist = default; + _ignoreNextParen = false; + } - return new RegexTree(root, p._caps, p._capnumlist, p._captop, p._capnames, capnamelist, op); + private RegexParser(string pattern, RegexOptions options, CultureInfo culture, Span optionSpan) + : this(pattern, options, culture, new Hashtable(), default, null, optionSpan) + { } - /* - * This static call constructs a flat concatenation node given - * a replacement pattern. - */ - public static RegexReplacement ParseReplacement(string rep, Hashtable caps, int capsize, Hashtable capnames, RegexOptions op) + public static RegexTree Parse(string pattern, RegexOptions options, CultureInfo culture) { - RegexParser p; - RegexNode root; + Span optionSpan = stackalloc RegexOptions[OptionStackDefaultSize]; + var parser = new RegexParser(pattern, options, culture, optionSpan); + + parser.CountCaptures(); + parser.Reset(options); + RegexNode root = parser.ScanRegex(); + string[] capnamelist = parser._capnamelist?.ToArray(); + var tree = new RegexTree(root, parser._caps, parser._capnumlist, parser._captop, parser._capnames, capnamelist, options); + parser.Dispose(); - p = new RegexParser((op & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture); + return tree; + } - p._options = op; + /// + /// This static call constructs a flat concatenation node given a replacement pattern. + /// + public static RegexReplacement ParseReplacement(string pattern, RegexOptions options, Hashtable caps, int capsize, Hashtable capnames) + { + CultureInfo culture = (options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture; + Span optionSpan = stackalloc RegexOptions[OptionStackDefaultSize]; + var parser = new RegexParser(pattern, options, culture, caps, capsize, capnames, optionSpan); - p.NoteCaptures(caps, capsize, capnames); - p.SetPattern(rep); - root = p.ScanReplacement(); + RegexNode root = parser.ScanReplacement(); + var regexReplacement = new RegexReplacement(pattern, root, caps); + parser.Dispose(); - return new RegexReplacement(rep, root, caps); + return regexReplacement; } - /* - * Escapes all metacharacters (including |,(,),[,{,|,^,$,*,+,?,\, spaces and #) - */ + /// + /// Escapes all metacharacters (including |,(,),[,{,|,^,$,*,+,?,\, spaces and #) + /// public static string Escape(string input) { for (int i = 0; i < input.Length; i++) @@ -128,9 +142,8 @@ private static string EscapeImpl(string input, int i) new ValueStringBuilder(input.Length + 200); char ch = input[i]; - int lastpos; - vsb.Append(input.AsSpan(0, i)); + do { vsb.Append('\\'); @@ -151,7 +164,7 @@ private static string EscapeImpl(string input, int i) } vsb.Append(ch); i++; - lastpos = i; + int lastpos = i; while (i < input.Length) { @@ -168,9 +181,9 @@ private static string EscapeImpl(string input, int i) return vsb.ToString(); } - /* - * Unescapes all metacharacters (including (,),[,],{,},|,^,$,*,+,?,\, spaces and #) - */ + /// + /// Unescapes all metacharacters (including (,),[,],{,},|,^,$,*,+,?,\, spaces and #) + /// public static string Unescape(string input) { for (int i = 0; i < input.Length; i++) @@ -186,9 +199,8 @@ public static string Unescape(string input) private static string UnescapeImpl(string input, int i) { - RegexParser p = new RegexParser(CultureInfo.InvariantCulture); - int lastpos; - p.SetPattern(input); + Span optionSpan = stackalloc RegexOptions[OptionStackDefaultSize]; + var parser = new RegexParser(input, RegexOptions.None, CultureInfo.InvariantCulture, optionSpan); // In the worst case the escaped string has the same length. // For small inputs we use stack allocation. @@ -201,59 +213,43 @@ private static string UnescapeImpl(string input, int i) do { i++; - p.Textto(i); + parser.Textto(i); if (i < input.Length) - vsb.Append(p.ScanCharEscape()); - i = p.Textpos(); - lastpos = i; + vsb.Append(parser.ScanCharEscape()); + i = parser.Textpos(); + int lastpos = i; while (i < input.Length && input[i] != '\\') i++; vsb.Append(input.AsSpan(lastpos, i - lastpos)); } while (i < input.Length); - return vsb.ToString(); - } + parser.Dispose(); - /* - * Private constructor. - */ - private RegexParser(CultureInfo culture) - { - _culture = culture; - _optionsStack = new List(); - _caps = new Hashtable(); - } - - /* - * Drops a string into the pattern buffer. - */ - private void SetPattern(string Re) - { - if (Re == null) - Re = string.Empty; - _pattern = Re; - _currentPos = 0; + return vsb.ToString(); } - /* - * Resets parsing to the beginning of the pattern. - */ - private void Reset(RegexOptions topopts) + /// + /// Resets parsing to the beginning of the pattern. + /// + private void Reset(RegexOptions options) { _currentPos = 0; _autocap = 1; _ignoreNextParen = false; - - if (_optionsStack.Count > 0) - _optionsStack.RemoveRange(0, _optionsStack.Count - 1); - - _options = topopts; + _optionsStack.Length = 0; + _options = options; _stack = null; } + public void Dispose() + { + _optionsStack.Dispose(); + } + /* * The main parsing function. */ + private RegexNode ScanRegex() { char ch = '@'; // nonspecial ch, means at beginning @@ -273,10 +269,10 @@ private RegexNode ScanRegex() // move past all of the normal characters. We'll stop when we hit some kind of control character, // or if IgnorePatternWhiteSpace is on, we'll stop when we see some whitespace. if (UseOptionX()) - while (CharsRight() > 0 && (!IsStopperX(ch = RightChar()) || ch == '{' && !IsTrueQuantifier())) + while (CharsRight() > 0 && (!IsStopperX(ch = RightChar()) || (ch == '{' && !IsTrueQuantifier()))) MoveRight(); else - while (CharsRight() > 0 && (!IsSpecial(ch = RightChar()) || ch == '{' && !IsTrueQuantifier())) + while (CharsRight() > 0 && (!IsSpecial(ch = RightChar()) || (ch == '{' && !IsTrueQuantifier()))) MoveRight(); int endpos = Textpos(); @@ -353,6 +349,9 @@ private RegexNode ScanRegex() break; case '\\': + if (CharsRight() == 0) + throw MakeException(RegexParseError.IllegalEndEscape, SR.IllegalEndEscape); + AddUnitNode(ScanBackslash(scanOnly: false)); break; @@ -489,18 +488,15 @@ private RegexNode ScanRegex() */ private RegexNode ScanReplacement() { - int c; - int startpos; - _concatenation = new RegexNode(RegexNode.Concatenate, _options); for (; ;) { - c = CharsRight(); + int c = CharsRight(); if (c == 0) break; - startpos = Textpos(); + int startpos = Textpos(); while (c > 0 && RightChar() != '$') { @@ -622,18 +618,14 @@ private RegexCharClass ScanCharClass(bool caseInsensitive, bool scanOnly) // It currently doesn't do anything other than skip the whole thing! if (CharsRight() > 0 && RightChar() == ':' && !inRange) { - string name; int savePos = Textpos(); MoveRight(); - name = ScanCapname(); if (CharsRight() < 2 || RightCharMoveRight() != ':' || RightCharMoveRight() != ']') Textto(savePos); - // else lookup name (nyi) } } - if (inRange) { inRange = false; @@ -707,11 +699,6 @@ private RegexCharClass ScanCharClass(bool caseInsensitive, bool scanOnly) */ private RegexNode ScanGroupOpen() { - char ch = '\0'; - int NodeType; - char close = '>'; - - // just return a RegexNode if we have: // 1. "(" followed by nothing // 2. "(x" where x != ? @@ -734,6 +721,9 @@ private RegexNode ScanGroupOpen() if (CharsRight() == 0) break; + int NodeType; + char close = '>'; + char ch; switch (ch = RightCharMoveRight()) { case ':': @@ -1002,6 +992,7 @@ private void ScanBlank() RightChar(1) != '?' || RightChar() != '(') return; + // skip comment (?# ...) while (CharsRight() > 0 && RightChar() != ')') MoveRight(); if (CharsRight() == 0) @@ -1017,12 +1008,9 @@ private void ScanBlank() */ private RegexNode ScanBackslash(bool scanOnly) { - char ch; - RegexCharClass cc; - - if (CharsRight() == 0) - throw MakeException(RegexParseError.IllegalEndEscape, SR.IllegalEndEscape); + Debug.Assert(CharsRight() > 0, "The current reading position must not be at the end of the pattern"); + char ch; switch (ch = RightChar()) { case 'b': @@ -1089,7 +1077,7 @@ private RegexNode ScanBackslash(bool scanOnly) MoveRight(); if (scanOnly) return null; - cc = new RegexCharClass(); + var cc = new RegexCharClass(); cc.AddCategoryFromName(ParseProperty(), (ch != 'p'), UseOptionI(), _pattern, _currentPos); if (UseOptionI()) cc.AddLowercase(_culture); @@ -1109,13 +1097,10 @@ private RegexNode ScanBasicBackslash(bool scanOnly) if (CharsRight() == 0) throw MakeException(RegexParseError.IllegalEndEscape, SR.IllegalEndEscape); - char ch; - bool angled = false; + int backpos = Textpos(); char close = '\0'; - int backpos; - - backpos = Textpos(); - ch = RightChar(); + bool angled = false; + char ch = RightChar(); // allow \k instead of \, which is now deprecated @@ -1378,13 +1363,10 @@ private string ScanCapname() */ private char ScanOctal() { + // Consume octal chars only up to 3 digits and value 0377 + int c = 3; int d; int i; - int c; - - // Consume octal chars only up to 3 digits and value 0377 - - c = 3; if (c > CharsRight()) c = CharsRight(); @@ -1432,11 +1414,9 @@ private int ScanDecimal() */ private char ScanHex(int c) { - int i; + int i = 0; int d; - i = 0; - if (CharsRight() >= c) { for (; c > 0 && ((d = HexDigit(RightCharMoveRight())) >= 0); c -= 1) @@ -1476,12 +1456,10 @@ private static int HexDigit(char ch) */ private char ScanControl() { - char ch; - - if (CharsRight() <= 0) + if (CharsRight() == 0) throw MakeException(RegexParseError.MissingControl, SR.MissingControl); - ch = RightCharMoveRight(); + char ch = RightCharMoveRight(); // \ca interpreted as \cA @@ -1497,12 +1475,11 @@ private char ScanControl() /* * Returns true for options allowed only at the top level */ - private bool IsOnlyTopOption(RegexOptions option) + private bool IsOnlyTopOption(RegexOptions options) { - return (option == RegexOptions.RightToLeft - || option == RegexOptions.CultureInvariant - || option == RegexOptions.ECMAScript - ); + return options == RegexOptions.RightToLeft || + options == RegexOptions.CultureInvariant || + options == RegexOptions.ECMAScript; } /* @@ -1510,13 +1487,9 @@ private bool IsOnlyTopOption(RegexOptions option) */ private void ScanOptions() { - char ch; - bool off; - RegexOptions option; - - for (off = false; CharsRight() > 0; MoveRight()) + for (bool off = false; CharsRight() > 0; MoveRight()) { - ch = RightChar(); + char ch = RightChar(); if (ch == '-') { @@ -1528,14 +1501,14 @@ private void ScanOptions() } else { - option = OptionFromCode(ch); - if (option == 0 || IsOnlyTopOption(option)) + RegexOptions options = OptionFromCode(ch); + if (options == 0 || IsOnlyTopOption(options)) return; if (off) - _options &= ~option; + _options &= ~options; else - _options |= option; + _options |= options; } } } @@ -1545,9 +1518,7 @@ private void ScanOptions() */ private char ScanCharEscape() { - char ch; - - ch = RightCharMoveRight(); + char ch = RightCharMoveRight(); if (ch >= '0' && ch <= '7') { @@ -1595,6 +1566,7 @@ private string ParseProperty() { throw MakeException(RegexParseError.IncompleteSlashP, SR.IncompleteSlashP); } + char ch = RightCharMoveRight(); if (ch != '{') { @@ -1683,8 +1655,6 @@ private static RegexOptions OptionFromCode(char ch) */ private void CountCaptures() { - char ch; - NoteCaptureSlot(0, 0); _autocap = 1; @@ -1692,7 +1662,7 @@ private void CountCaptures() while (CharsRight() > 0) { int pos = Textpos(); - ch = RightCharMoveRight(); + char ch = RightCharMoveRight(); switch (ch) { case '\\': @@ -1720,6 +1690,7 @@ private void CountCaptures() case '(': if (CharsRight() >= 2 && RightChar(1) == '#' && RightChar() == '?') { + // we have a comment (?# MoveLeft(); ScanBlank(); } @@ -1777,6 +1748,9 @@ private void CountCaptures() } else { + // Simple (unnamed) capture group. + // Add unnamend parentheses if ExplicitCapture is not set + // and the next parentheses is not ignored. if (!UseOptionN() && !_ignoreNextParen) NoteCaptureSlot(_autocap++, pos); } @@ -1804,10 +1778,7 @@ private void NoteCaptureSlot(int i, int pos) if (_captop <= i) { - if (i == int.MaxValue) - _captop = i; - else - _captop = i + 1; + _captop = i == int.MaxValue ? i : i + 1; } } } @@ -1830,16 +1801,6 @@ private void NoteCaptureName(string name, int pos) } } - /* - * For when all the used captures are known: note them all at once - */ - private void NoteCaptures(Hashtable caps, int capsize, Hashtable capnames) - { - _caps = caps; - _capsize = capsize; - _capnames = capnames; - } - /* * Assigns unused slot numbers to the capture names */ @@ -1901,7 +1862,7 @@ private void AssignNameSlots() for (int i = 0; i < _capcount; i++) { - int j = (_capnumlist == null) ? i : (int)_capnumlist[i]; + int j = (_capnumlist == null) ? i : _capnumlist[i]; if (next == j) { @@ -2041,22 +2002,28 @@ private static bool IsQuantifier(char ch) private bool IsTrueQuantifier() { - int nChars = CharsRight(); - if (nChars == 0) - return false; + Debug.Assert(CharsRight() > 0, "The current reading position must not be at the end of the pattern"); + int startpos = Textpos(); char ch = CharAt(startpos); if (ch != '{') return ch <= '{' && s_category[ch] >= Q; + int pos = startpos; + int nChars = CharsRight(); while (--nChars > 0 && (ch = CharAt(++pos)) >= '0' && ch <= '9') ; + if (nChars == 0 || pos - startpos == 1) return false; + if (ch == '}') return true; + if (ch != ',') return false; + while (--nChars > 0 && (ch = CharAt(++pos)) >= '0' && ch <= '9') ; + return nChars > 0 && ch == '}'; } @@ -2082,28 +2049,17 @@ private static bool IsMetachar(char ch) */ private void AddConcatenate(int pos, int cch, bool isReplacement) { - RegexNode node; - if (cch == 0) return; + RegexNode node; if (cch > 1) { string str; if (UseOptionI() && !isReplacement) { str = string.Create(cch, (_pattern, _culture, pos, cch), (span, state) => - { - ReadOnlySpan input = state._pattern.AsSpan(pos, cch); - - // We do the ToLower character by character for consistency. With surrogate chars, doing - // a ToLower on the entire string could actually change the surrogate pair. This is more correct - // linguistically, but since Regex doesn't support surrogates, it's more important to be - // consistent. - TextInfo textInfo = state._culture.TextInfo; - for (int i = 0; i < input.Length; i++) - span[i] = textInfo.ToLower(input[i]); - }); + state._pattern.AsSpan(state.pos, state.cch).ToLower(span, state._culture)); } else { @@ -2294,7 +2250,7 @@ private void AddGroup() */ private void PushOptions() { - _optionsStack.Add(_options); + _optionsStack.Append(_options); } /* @@ -2302,8 +2258,7 @@ private void PushOptions() */ private void PopOptions() { - _options = _optionsStack[_optionsStack.Count - 1]; - _optionsStack.RemoveAt(_optionsStack.Count - 1); + _options = _optionsStack.Pop(); } /* @@ -2311,15 +2266,15 @@ private void PopOptions() */ private bool EmptyOptionsStack() { - return (_optionsStack.Count == 0); + return _optionsStack.Length == 0; } /* - * Pops the option stack, but keeps the current options unchanged. + * Pops the options stack, but keeps the current options unchanged. */ private void PopKeepOptions() { - _optionsStack.RemoveAt(_optionsStack.Count - 1); + _optionsStack.Length--; } /* diff --git a/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexReplacement.cs b/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexReplacement.cs index 5200341df9b8..1c6f126a6390 100644 --- a/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexReplacement.cs +++ b/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexReplacement.cs @@ -94,7 +94,7 @@ public static RegexReplacement GetOrCreate(WeakReference replR if (!replRef.TryGetTarget(out repl) || !repl.Pattern.Equals(replacement)) { - repl = RegexParser.ParseReplacement(replacement, caps, capsize, capnames, roptions); + repl = RegexParser.ParseReplacement(replacement, roptions, caps, capsize, capnames); replRef.SetTarget(repl); } diff --git a/src/System.Text.RegularExpressions/tests/Regex.Groups.Tests.cs b/src/System.Text.RegularExpressions/tests/Regex.Groups.Tests.cs index c1ade0a8c3cb..faab74c3eba5 100644 --- a/src/System.Text.RegularExpressions/tests/Regex.Groups.Tests.cs +++ b/src/System.Text.RegularExpressions/tests/Regex.Groups.Tests.cs @@ -759,5 +759,22 @@ public void GroupsBasic() return SuccessExitCode; }).Dispose(); } + + [Fact] + public void Synchronized_NullGroup_Throws() + { + AssertExtensions.Throws("inner", () => Group.Synchronized(null)); + } + + [Theory] + [InlineData(@"(cat)([\v]*)(dog)", "cat\v\v\vdog")] + [InlineData("abc", "def")] // no match + public void Synchronized_ValidGroup_Success(string pattern, string input) + { + Match match = Regex.Match(input, pattern); + + Group synchronizedGroup = Group.Synchronized(match.Groups[0]); + Assert.NotNull(synchronizedGroup); + } } } diff --git a/src/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs b/src/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs index edad94f24f6b..e9ae205347b5 100644 --- a/src/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs +++ b/src/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs @@ -76,7 +76,11 @@ public static IEnumerable Match_Basic_TestData() yield return new object[] { "[^0-9]+(?>[0-9]+)3", "abc123", RegexOptions.None, 0, 6, false, string.Empty }; // Using beginning/end of string chars \A, \Z: Actual - "\\Aaaa\\w+zzz\\Z" - yield return new object[] { @"\Aaaa\w+zzz\Z", "aaaasdfajsdlfjzzz", RegexOptions.None, 0, 17, true, "aaaasdfajsdlfjzzz" }; + yield return new object[] { @"\Aaaa\w+zzz\Z", "aaaasdfajsdlfjzzz", RegexOptions.IgnoreCase, 0, 17, true, "aaaasdfajsdlfjzzz" }; + yield return new object[] { @"\Aaaaaa\w+zzz\Z", "aaaa", RegexOptions.IgnoreCase, 0, 4, false, string.Empty }; + yield return new object[] { @"\Aaaaaa\w+zzz\Z", "aaaa", RegexOptions.RightToLeft, 0, 4, false, string.Empty }; + yield return new object[] { @"\Aaaaaa\w+zzzzz\Z", "aaaa", RegexOptions.RightToLeft, 0, 4, false, string.Empty }; + yield return new object[] { @"\Aaaaaa\w+zzz\Z", "aaaa", RegexOptions.RightToLeft | RegexOptions.IgnoreCase, 0, 4, false, string.Empty }; // Using beginning/end of string chars \A, \Z: Actual - "\\Aaaa\\w+zzz\\Z" yield return new object[] { @"\Aaaa\w+zzz\Z", "aaaasdfajsdlfjzzza", RegexOptions.None, 0, 18, false, string.Empty }; @@ -97,7 +101,7 @@ public static IEnumerable Match_Basic_TestData() yield return new object[] { "(abbc)(?(1)111|222)", "abbc222", RegexOptions.None, 0, 7, false, string.Empty }; // "x" option. Removes unescaped whitespace from the pattern: Actual - " ([^/]+) ","x" - yield return new object[] { " ((.)+) ", "abc", RegexOptions.IgnorePatternWhitespace, 0, 3, true, "abc" }; + yield return new object[] { " ((.)+) #comment ", "abc", RegexOptions.IgnorePatternWhitespace, 0, 3, true, "abc" }; // "x" option. Removes unescaped whitespace from the pattern. : Actual - "\x20([^/]+)\x20","x" yield return new object[] { "\x20([^/]+)\x20\x20\x20\x20\x20\x20\x20", " abc ", RegexOptions.IgnorePatternWhitespace, 0, 10, true, " abc " }; @@ -109,13 +113,13 @@ public static IEnumerable Match_Basic_TestData() } // Turning off case insensitive option in mid-pattern : Actual - "aaa(?-i:match this)bbb", "i" - yield return new object[] { "aaa(?-i:match this)bbb", "AaAmatch thisBBb", RegexOptions.IgnoreCase, 0, 16, true, "AaAmatch thisBBb" }; + yield return new object[] { "aAa(?-i:match this)bbb", "AaAmatch thisBBb", RegexOptions.IgnoreCase, 0, 16, true, "AaAmatch thisBBb" }; // Turning on/off all the options at once : Actual - "aaa(?imnsx-imnsx:match this)bbb", "i" - yield return new object[] { "aaa(?-i:match this)bbb", "AaAmatcH thisBBb", RegexOptions.IgnoreCase, 0, 16, false, string.Empty }; + yield return new object[] { "aaa(?imnsx-imnsx:match this)bbb", "AaAmatcH thisBBb", RegexOptions.IgnoreCase, 0, 16, false, string.Empty }; // Actual - "aaa(?#ignore this completely)bbb" - yield return new object[] { "aaa(?#ignore this completely)bbb", "aaabbb", RegexOptions.None, 0, 6, true, "aaabbb" }; + yield return new object[] { "aAa(?#ignore this completely)bbb", "aAabbb", RegexOptions.None, 0, 6, true, "aAabbb" }; // Trying empty string: Actual "[a-z0-9]+", "" yield return new object[] { "[a-z0-9]+", "", RegexOptions.None, 0, 0, false, string.Empty }; @@ -291,6 +295,9 @@ public static IEnumerable Match_Basic_TestData() // \c if (!PlatformDetection.IsFullFramework) // missing fix for #26501 yield return new object[] { @"(cat)(\c[*)(dog)", "asdlkcat\u00FFdogiwod", RegexOptions.None, 0, 15, false, string.Empty }; + + // Surrogate pairs splitted up into UTF-16 code units. + yield return new object[] { @"(\uD82F[\uDCA0-\uDCA3])", "\uD82F\uDCA2", RegexOptions.CultureInvariant, 0, 2, true, "\uD82F\uDCA2" }; } [Theory] @@ -631,6 +638,24 @@ public static IEnumerable Match_Advanced_TestData() new CaptureData("aaa", 0, 3) } }; + + // RightToLeft with anchor + yield return new object[] + { + "^aaa", "aaabbb", RegexOptions.RightToLeft, 3, 3, + new CaptureData[] + { + new CaptureData("aaa", 0, 3) + } + }; + yield return new object[] + { + "bbb$", "aaabbb", RegexOptions.RightToLeft, 0, 3, + new CaptureData[] + { + new CaptureData("bbb", 0, 3) + } + }; } [Theory] diff --git a/src/System.Text.RegularExpressions/tests/RegexParserTests.cs b/src/System.Text.RegularExpressions/tests/RegexParserTests.cs index de3a50015eef..15ee898d399b 100644 --- a/src/System.Text.RegularExpressions/tests/RegexParserTests.cs +++ b/src/System.Text.RegularExpressions/tests/RegexParserTests.cs @@ -862,6 +862,8 @@ public void Parse(string pattern, RegexOptions options, object errorObj) [InlineData("a{2147483648,}", RegexOptions.None, RegexParseError.CaptureGroupOutOfRange)] [InlineData("a{0,2147483647}", RegexOptions.None, null)] [InlineData("a{0,2147483648}", RegexOptions.None, RegexParseError.CaptureGroupOutOfRange)] + // Surrogate pair which is parsed as [char,char-char,char] as we operate on UTF-16 code units. + [InlineData("[\uD82F\uDCA0-\uD82F\uDCA3]", RegexOptions.IgnoreCase, RegexParseError.ReversedCharRange)] public void Parse_NotNetFramework(string pattern, RegexOptions options, object error) { Parse(pattern, options, error);