From ec2f26968905103fbc8bbade828eb59432b99449 Mon Sep 17 00:00:00 2001 From: Jose Perez Rodriguez Date: Tue, 1 Mar 2022 12:26:48 -0800 Subject: [PATCH 1/2] Adding Count(ReadOnlySpan) Overloads --- .../ref/System.Text.RegularExpressions.cs | 4 + .../Text/RegularExpressions/Regex.Count.cs | 53 ++++ .../System/Text/RegularExpressions/Regex.cs | 233 ++++++++++-------- .../FunctionalTests/Regex.Count.Tests.cs | 16 ++ 4 files changed, 208 insertions(+), 98 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/ref/System.Text.RegularExpressions.cs b/src/libraries/System.Text.RegularExpressions/ref/System.Text.RegularExpressions.cs index dd6956d11dc7ff..d856f1cbfabafa 100644 --- a/src/libraries/System.Text.RegularExpressions/ref/System.Text.RegularExpressions.cs +++ b/src/libraries/System.Text.RegularExpressions/ref/System.Text.RegularExpressions.cs @@ -162,9 +162,13 @@ public static void CompileToAssembly(System.Text.RegularExpressions.RegexCompila [System.ObsoleteAttribute("Regex.CompileToAssembly is obsolete and not supported. Use the RegexGeneratorAttribute with the regular expression source generator instead.", DiagnosticId = "SYSLIB0036", UrlFormat = "https://aka.ms/dotnet-warnings/{0}")] public static void CompileToAssembly(System.Text.RegularExpressions.RegexCompilationInfo[] regexinfos, System.Reflection.AssemblyName assemblyname, System.Reflection.Emit.CustomAttributeBuilder[]? attributes, string? resourceFile) { } public int Count(string input) { throw null; } + public int Count(System.ReadOnlySpan input) { throw null; } public static int Count(string input, [System.Diagnostics.CodeAnalysis.StringSyntax(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex)] string pattern) { throw null; } public static int Count(string input, [System.Diagnostics.CodeAnalysis.StringSyntax(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex, "options")] string pattern, System.Text.RegularExpressions.RegexOptions options) { throw null; } public static int Count(string input, [System.Diagnostics.CodeAnalysis.StringSyntax(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex, "options")] string pattern, System.Text.RegularExpressions.RegexOptions options, System.TimeSpan matchTimeout) { throw null; } + public static int Count(System.ReadOnlySpan input, [System.Diagnostics.CodeAnalysis.StringSyntax(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex)] string pattern) { throw null; } + public static int Count(System.ReadOnlySpan input, [System.Diagnostics.CodeAnalysis.StringSyntax(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex, "options")] string pattern, System.Text.RegularExpressions.RegexOptions options) { throw null; } + public static int Count(System.ReadOnlySpan input, [System.Diagnostics.CodeAnalysis.StringSyntax(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex, "options")] string pattern, System.Text.RegularExpressions.RegexOptions options, System.TimeSpan matchTimeout) { throw null; } public static string Escape(string str) { throw null; } public string[] GetGroupNames() { throw null; } public int[] GetGroupNumbers() { throw null; } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Count.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Count.cs index 18c08ee3242e2d..5347a170ca1aea 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Count.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Count.cs @@ -29,6 +29,24 @@ public int Count(string input) return count; } + /// + /// Searches an input span for all occurrences of a regular expression and returns the number of matches. + /// + /// The span to search for a match. + /// The number of matches. + public int Count(ReadOnlySpan input) + { + int count = 0; + + Run(input, 0, ref count, static (ref int count, Match match) => + { + count++; + return true; + }, reuseMatchObject: true); + + return count; + } + /// Searches an input string for all occurrences of a regular expression and returns the number of matches. /// The string to search for a match. /// The regular expression pattern to match. @@ -60,5 +78,40 @@ public static int Count(string input, [StringSyntax(StringSyntaxAttribute.Regex, /// A regular expression parsing error occurred. public static int Count(string input, [StringSyntax(StringSyntaxAttribute.Regex, "options")] string pattern, RegexOptions options, TimeSpan matchTimeout) => RegexCache.GetOrAdd(pattern, options, matchTimeout).Count(input); + + /// + /// Searches an input span for all occurrences of a regular expression and returns the number of matches. + /// + /// The span to search for a match. + /// The regular expression pattern to match. + /// The number of matches. + /// A regular expression parsing error occurred. + public static int Count(ReadOnlySpan input, [StringSyntax(StringSyntaxAttribute.Regex)] string pattern) => + RegexCache.GetOrAdd(pattern).Count(input); + + /// + /// Searches an input span for all occurrences of a regular expression and returns the number of matches. + /// + /// The span to search for a match. + /// The regular expression pattern to match. + /// A bitwise combination of the enumeration values that specify options for matching. + /// The number of matches. + /// is not a valid bitwise combination of RegexOptions values. + /// A regular expression parsing error occurred. + public static int Count(ReadOnlySpan input, [StringSyntax(StringSyntaxAttribute.Regex, "options")] string pattern, RegexOptions options) => + RegexCache.GetOrAdd(pattern, options, s_defaultMatchTimeout).Count(input); + + /// + /// Searches an input span for all occurrences of a regular expression and returns the number of matches. + /// + /// The span to search for a match. + /// The regular expression pattern to match. + /// A bitwise combination of the enumeration values that specify options for matching. + /// A time-out interval, or to indicate that the method should not time out. + /// The number of matches. + /// is not a valid bitwise combination of RegexOptions values, or is negative, zero, or greater than approximately 24 days. + /// A regular expression parsing error occurred. + public static int Count(ReadOnlySpan input, [StringSyntax(StringSyntaxAttribute.Regex, "options")] string pattern, RegexOptions options, TimeSpan matchTimeout) => + RegexCache.GetOrAdd(pattern, options, matchTimeout).Count(input); } } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs index 57ee7df1c6914f..75e5395d6c5e5b 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs @@ -392,7 +392,7 @@ protected void InitializeReferences() runner.runtextpos += RightToLeft ? -1 : 1; } - return InternalPerformScan(quick, input, beginning, runner, span, returnNullIfQuick: true); + return RunInternal(quick, input, beginning, runner, span, returnNullIfQuick: true); } finally { @@ -401,46 +401,6 @@ protected void InitializeReferences() } } - private static Match? InternalPerformScan(bool quick, string input, int beginning, RegexRunner runner, ReadOnlySpan span, bool returnNullIfQuick) - { - runner.Scan(span); - - Match? match = runner.runmatch; - Debug.Assert(match is not null); - - // If we got a match, do some cleanup and return it, or return null if quick is true; - if (match.FoundMatch) - { - if (!quick) - { - // We're about to return the Match object. Store the input into it and remove it from the runner. - match.Text = input; - runner.runmatch = null; - } - else if (returnNullIfQuick) - { - match.Text = null; - return null; - } - - match.Tidy(runner.runtextpos); - - // If the passed in beginning was not 0 then we need to adjust the offsets on the match object. - if (beginning != 0) - { - match.AddBeginningToIndex(beginning); - } - - return match; - } - - // We failed to match, so we will return Match.Empty which means we can reuse runmatch object. - // We do however need to clear its Text in case it was set, so as to not keep it alive in some cache. - runner.runmatch!.Text = null; - - return RegularExpressions.Match.Empty; - } - internal Match? Run(ReadOnlySpan input, int startat) { // startat parameter is always either 0 or input.Length since public API for IsMatch doesn't have an overload @@ -465,6 +425,10 @@ protected void InitializeReferences() } } + /// + /// Performs the matching against a given string input, which will call the passed in for every + /// match that is found, along with the relevant . + /// internal void Run(string input, int startat, ref TState state, MatchCallback callback, bool reuseMatchObject) { Debug.Assert((uint)startat <= (uint)input.Length); @@ -472,86 +436,159 @@ internal void Run(string input, int startat, ref TState state, MatchCall RegexRunner runner = Interlocked.Exchange(ref _runner, null) ?? CreateRunner(); try { - runner.InitializeTimeout(internalMatchTimeout); + // For the string overload, we need to set runtext before starting the match attempts. runner.runtext = input; - int runtextpos = startat; - while (true) - { - runner.InitializeForScan(this, input, startat, false); - runner.runtextpos = runtextpos; + RunInternal(input, startat, ref state, callback, runner, usingStringOverload: true, reuseMatchObject); + } + finally + { + runner.runtext = null; // drop reference to text to avoid keeping it alive in a cache. + _runner = runner; + } + } + + /// + /// Performs the matching against a given span input, which will call the passed in for every + /// match that is found, along with the relevant . + /// + internal void Run(ReadOnlySpan input, int startat, ref TState state, MatchCallback callback, bool reuseMatchObject) + { + Debug.Assert((uint)startat <= (uint)input.Length); + + RegexRunner runner = Interlocked.Exchange(ref _runner, null) ?? CreateRunner(); + try + { + RunInternal(input, startat, ref state, callback, runner, usingStringOverload: false, reuseMatchObject); + } + finally + { + _runner = runner; + } + } + + /// + /// Performs the internal loop which will search for matches on the given , and will call the for + /// each match. If is set to , then we may need to do additional cleanup which ensures that + /// we are not keeping a reference to the input text alive in a cache. + /// + private void RunInternal(ReadOnlySpan input, int startat, ref TState state, MatchCallback callback, RegexRunner runner, bool usingStringOverload, bool reuseMatchObject) + { + runner.InitializeTimeout(internalMatchTimeout); + int runtextpos = startat; + while (true) + { + runner.InitializeForScan(this, input, startat, false); + runner.runtextpos = runtextpos; - int stoppos = RightToLeft ? 0 : input.Length; + int stoppos = RightToLeft ? 0 : input.Length; - Match? match = InternalPerformScan(reuseMatchObject, input, 0, runner, input, returnNullIfQuick: false); - Debug.Assert(match is not null); + // We get the Match by calling Scan. 'input' parameter is used to set the Match text which is only relevante if we are using the Run string + // overload, as APIs that call the span overload (like Count) don't require match.Text to be set, so we pass null in that case. + Match? match = RunInternal(reuseMatchObject, input: usingStringOverload ? runner.runtext : null, 0, runner, input, returnNullIfQuick: false); + Debug.Assert(match is not null); - // if we got a match, then call the callback function with the match and prepare for next iteration. - if (match.Success) + // if we got a match, then call the callback function with the match and prepare for next iteration. + if (match.Success) + { + if (!reuseMatchObject) { - if (!reuseMatchObject) - { - // We're not reusing match objects, so null out our field reference to the instance. - // It'll be recreated the next time one is needed. - runner.runmatch = null; - } + // We're not reusing match objects, so null out our field reference to the instance. + // It'll be recreated the next time one is needed. + runner.runmatch = null; + } - if (!callback(ref state, match)) - { - // If the callback returns false, we're done. + if (!callback(ref state, match)) + { + // If the callback returns false, we're done. - if (reuseMatchObject) - { - // We're reusing the single match instance, so clear out its text as well. - // We don't do this if we're not reusing instances, as in that case we're - // dropping the whole reference to the match, and we no longer own the instance - // having handed it out to the callback. - match.Text = null; - } - return; + if (usingStringOverload && reuseMatchObject) + { + // We're reusing the single match instance and we were called via the string overload + // which would have set the match's text, so clear it out as well. + // We don't do this if we're not reusing instances, as in that case we're + // dropping the whole reference to the match, and we no longer own the instance + // having handed it out to the callback. + match.Text = null; } + return; + } - // Now that we've matched successfully, update the starting position to reflect - // the current position, just as Match.NextMatch() would pass in _textpos as textstart. - runtextpos = startat = runner.runtextpos; + // Now that we've matched successfully, update the starting position to reflect + // the current position, just as Match.NextMatch() would pass in _textpos as textstart. + runtextpos = startat = runner.runtextpos; - // Reset state for another iteration. - runner.runtrackpos = runner.runtrack!.Length; - runner.runstackpos = runner.runstack!.Length; - runner.runcrawlpos = runner.runcrawl!.Length; + // Reset state for another iteration. + runner.runtrackpos = runner.runtrack!.Length; + runner.runstackpos = runner.runstack!.Length; + runner.runcrawlpos = runner.runcrawl!.Length; - if (match.Length == 0) + if (match.Length == 0) + { + if (runner.runtextpos == stoppos) { - if (runner.runtextpos == stoppos) + if (usingStringOverload && reuseMatchObject) { - if (reuseMatchObject) - { - // See above comment. - match.Text = null; - } - return; + // See above comment. + match.Text = null; } - - runtextpos += RightToLeft ? -1 : 1; + return; } - // Loop around to perform next match from where we left off. - continue; + runtextpos += RightToLeft ? -1 : 1; } - else + + // Loop around to perform next match from where we left off. + continue; + } + else + { + // We failed to match at this position. If we're at the stopping point, we're done. + if (runner.runtextpos == stoppos) { - // We failed to match at this position. If we're at the stopping point, we're done. - if (runner.runtextpos == stoppos) - { - return; - } + return; } } } - finally + } + + private static Match? RunInternal(bool quick, string? input, int beginning, RegexRunner runner, ReadOnlySpan span, bool returnNullIfQuick) + { + runner.Scan(span); + + Match? match = runner.runmatch; + Debug.Assert(match is not null); + + // If we got a match, do some cleanup and return it, or return null if quick is true; + if (match.FoundMatch) { - runner.runtext = null; // drop reference to text to avoid keeping it alive in a cache. - _runner = runner; + if (!quick) + { + // We're about to return the Match object. Store the input into it and remove it from the runner. + match.Text = input; + runner.runmatch = null; + } + else if (returnNullIfQuick) + { + match.Text = null; + return null; + } + + match.Tidy(runner.runtextpos); + + // If the passed in beginning was not 0 then we need to adjust the offsets on the match object. + if (beginning != 0) + { + match.AddBeginningToIndex(beginning); + } + + return match; } + + // We failed to match, so we will return Match.Empty which means we can reuse runmatch object. + // We do however need to clear its Text in case it was set, so as to not keep it alive in some cache. + runner.runmatch!.Text = null; + + return RegularExpressions.Match.Empty; } /// Creates a new runner instance. diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Count.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Count.Tests.cs index 272cbb143d69d4..619d676570388e 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Count.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Count.Tests.cs @@ -16,11 +16,14 @@ public async Task Count_ReturnsExpectedCount(RegexEngine engine, string pattern, { Regex r = await RegexHelpers.GetRegexAsync(engine, pattern, options); Assert.Equal(expectedCount, r.Count(input)); + Assert.Equal(expectedCount, r.Count(input.AsSpan())); Assert.Equal(r.Count(input), r.Matches(input).Count); + Assert.Equal(r.Count(input.AsSpan()), r.Matches(input).Count); if (options == RegexOptions.None && engine == RegexEngine.Interpreter) { Assert.Equal(expectedCount, Regex.Count(input, pattern)); + Assert.Equal(expectedCount, Regex.Count(input.AsSpan(), pattern)); } switch (engine) @@ -30,7 +33,9 @@ public async Task Count_ReturnsExpectedCount(RegexEngine engine, string pattern, case RegexEngine.NonBacktracking: RegexOptions engineOptions = RegexHelpers.OptionsFromEngine(engine); Assert.Equal(expectedCount, Regex.Count(input, pattern, options | engineOptions)); + Assert.Equal(expectedCount, Regex.Count(input.AsSpan(), pattern, options | engineOptions)); Assert.Equal(expectedCount, Regex.Count(input, pattern, options | engineOptions, Regex.InfiniteMatchTimeout)); + Assert.Equal(expectedCount, Regex.Count(input.AsSpan(), pattern, options | engineOptions, Regex.InfiniteMatchTimeout)); break; } } @@ -69,22 +74,31 @@ public void Count_InvalidArguments_Throws() // pattern is null AssertExtensions.Throws("pattern", () => Regex.Count("input", null)); + AssertExtensions.Throws("pattern", () => Regex.Count("input".AsSpan(), null)); AssertExtensions.Throws("pattern", () => Regex.Count("input", null, RegexOptions.None)); + AssertExtensions.Throws("pattern", () => Regex.Count("input".AsSpan(), null, RegexOptions.None)); AssertExtensions.Throws("pattern", () => Regex.Count("input", null, RegexOptions.None, TimeSpan.FromMilliseconds(1))); + AssertExtensions.Throws("pattern", () => Regex.Count("input".AsSpan(), null, RegexOptions.None, TimeSpan.FromMilliseconds(1))); // pattern is invalid #pragma warning disable RE0001 // invalid regex pattern AssertExtensions.Throws(() => Regex.Count("input", @"[abc")); + AssertExtensions.Throws(() => Regex.Count("input".AsSpan(), @"[abc")); AssertExtensions.Throws(() => Regex.Count("input", @"[abc", RegexOptions.None)); + AssertExtensions.Throws(() => Regex.Count("input".AsSpan(), @"[abc", RegexOptions.None)); AssertExtensions.Throws(() => Regex.Count("input", @"[abc", RegexOptions.None, TimeSpan.FromMilliseconds(1))); + AssertExtensions.Throws(() => Regex.Count("input".AsSpan(), @"[abc", RegexOptions.None, TimeSpan.FromMilliseconds(1))); #pragma warning restore RE0001 // options is invalid AssertExtensions.Throws("options", () => Regex.Count("input", @"[abc]", (RegexOptions)(-1))); + AssertExtensions.Throws("options", () => Regex.Count("input".AsSpan(), @"[abc]", (RegexOptions)(-1))); AssertExtensions.Throws("options", () => Regex.Count("input", @"[abc]", (RegexOptions)(-1), TimeSpan.FromMilliseconds(1))); + AssertExtensions.Throws("options", () => Regex.Count("input".AsSpan(), @"[abc]", (RegexOptions)(-1), TimeSpan.FromMilliseconds(1))); // matchTimeout is invalid AssertExtensions.Throws("matchTimeout", () => Regex.Count("input", @"[abc]", RegexOptions.None, TimeSpan.FromMilliseconds(-2))); + AssertExtensions.Throws("matchTimeout", () => Regex.Count("input".AsSpan(), @"[abc]", RegexOptions.None, TimeSpan.FromMilliseconds(-2))); } [Theory] @@ -104,6 +118,7 @@ public async Task Count_Timeout_ThrowsAfterTooLongExecution(RegexEngine engine) Stopwatch sw = Stopwatch.StartNew(); Assert.Throws(() => r.Count(Input)); + Assert.Throws(() => r.Count(Input.AsSpan())); Assert.InRange(sw.Elapsed.TotalSeconds, 0, 10); // arbitrary upper bound that should be well above what's needed with a 1ms timeout switch (engine) @@ -112,6 +127,7 @@ public async Task Count_Timeout_ThrowsAfterTooLongExecution(RegexEngine engine) case RegexEngine.Compiled: sw = Stopwatch.StartNew(); Assert.Throws(() => Regex.Count(Input, Pattern, RegexHelpers.OptionsFromEngine(engine), TimeSpan.FromMilliseconds(1))); + Assert.Throws(() => Regex.Count(Input.AsSpan(), Pattern, RegexHelpers.OptionsFromEngine(engine), TimeSpan.FromMilliseconds(1))); Assert.InRange(sw.Elapsed.TotalSeconds, 0, 10); // arbitrary upper bound that should be well above what's needed with a 1ms timeout break; } From 772746819156fbfc14574ec7ecf435829363a70d Mon Sep 17 00:00:00 2001 From: Jose Perez Rodriguez Date: Wed, 2 Mar 2022 20:30:49 -0800 Subject: [PATCH 2/2] Address PR Feedback --- .../System/Text/RegularExpressions/Match.cs | 2 +- .../RegularExpressions/MatchCollection.cs | 2 +- .../Text/RegularExpressions/Regex.Count.cs | 4 +- .../Text/RegularExpressions/Regex.Match.cs | 12 +++--- .../Text/RegularExpressions/Regex.Replace.cs | 4 +- .../Text/RegularExpressions/Regex.Split.cs | 4 +- .../System/Text/RegularExpressions/Regex.cs | 40 +++++++++---------- .../RegularExpressions/RegexReplacement.cs | 4 +- 8 files changed, 34 insertions(+), 38 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Match.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Match.cs index 8ae239aac9b3df..adf727a4bfb095 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Match.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Match.cs @@ -106,7 +106,7 @@ public Match NextMatch() Regex? r = _regex; Debug.Assert(Text != null); return r != null ? - r.Run(false, Length, Text, _textbeg, _textend - _textbeg, _textpos)! : + r.RunSingleMatch(false, Length, Text, _textbeg, _textend - _textbeg, _textpos)! : this; } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/MatchCollection.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/MatchCollection.cs index 0f6414d1aeea8a..39007829a8907c 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/MatchCollection.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/MatchCollection.cs @@ -89,7 +89,7 @@ public virtual Match this[int i] Match match; do { - match = _regex.Run(false, _prevlen, _input, 0, _input.Length, _startat)!; + match = _regex.RunSingleMatch(false, _prevlen, _input, 0, _input.Length, _startat)!; if (!match.Success) { _done = true; diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Count.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Count.cs index 5347a170ca1aea..1fecf8efc0c167 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Count.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Count.cs @@ -20,7 +20,7 @@ public int Count(string input) int count = 0; - Run(input, 0, ref count, static (ref int count, Match match) => + RunAllMatchesWithCallback(input, 0, ref count, static (ref int count, Match match) => { count++; return true; @@ -38,7 +38,7 @@ public int Count(ReadOnlySpan input) { int count = 0; - Run(input, 0, ref count, static (ref int count, Match match) => + RunAllMatchesWithCallback(input, 0, ref count, static (ref int count, Match match) => { count++; return true; diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Match.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Match.cs index 3e0adde46b93ab..fe0a2cdc1ca4f2 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Match.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Match.cs @@ -77,7 +77,7 @@ public bool IsMatch(string input) ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input); } - return Run(quick: true, -1, input, 0, input.Length, RightToLeft ? input.Length : 0) is null; + return RunSingleMatch(quick: true, -1, input, 0, input.Length, RightToLeft ? input.Length : 0) is null; } /// @@ -87,7 +87,7 @@ public bool IsMatch(string input) /// if the regular expression finds a match; otherwise, . /// A time-out ocurred. public bool IsMatch(ReadOnlySpan input) => - Run(input, RightToLeft ? input.Length : 0) is null; + RunSingleMatch(input, RightToLeft ? input.Length : 0) is null; /// /// Searches the input string for one or more matches using the previous pattern and options, @@ -100,7 +100,7 @@ public bool IsMatch(string input, int startat) ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input); } - return Run(quick: true, -1, input, 0, input.Length, startat) is null; + return RunSingleMatch(quick: true, -1, input, 0, input.Length, startat) is null; } /// @@ -132,7 +132,7 @@ public Match Match(string input) ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input); } - return Run(quick: false, -1, input, 0, input.Length, RightToLeft ? input.Length : 0)!; + return RunSingleMatch(quick: false, -1, input, 0, input.Length, RightToLeft ? input.Length : 0)!; } /// @@ -146,7 +146,7 @@ public Match Match(string input, int startat) ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input); } - return Run(quick: false, -1, input, 0, input.Length, startat)!; + return RunSingleMatch(quick: false, -1, input, 0, input.Length, startat)!; } /// @@ -159,7 +159,7 @@ public Match Match(string input, int beginning, int length) ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input); } - return Run(quick: false, -1, input, beginning, length, RightToLeft ? beginning + length : beginning)!; + return RunSingleMatch(quick: false, -1, input, beginning, length, RightToLeft ? beginning + length : beginning)!; } /// diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Replace.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Replace.cs index c1c8111cf3dd8e..b29317f6ef3877 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Replace.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Replace.cs @@ -176,7 +176,7 @@ private static string Replace(MatchEvaluator evaluator, Regex regex, string inpu if (!regex.RightToLeft) { - regex.Run(input, startat, ref state, static (ref (SegmentStringBuilder segments, MatchEvaluator evaluator, int prevat, string input, int count) state, Match match) => + regex.RunAllMatchesWithCallback(input, startat, ref state, static (ref (SegmentStringBuilder segments, MatchEvaluator evaluator, int prevat, string input, int count) state, Match match) => { state.segments.Add(state.input.AsMemory(state.prevat, match.Index - state.prevat)); state.prevat = match.Index + match.Length; @@ -195,7 +195,7 @@ private static string Replace(MatchEvaluator evaluator, Regex regex, string inpu { state.prevat = input.Length; - regex.Run(input, startat, ref state, static (ref (SegmentStringBuilder segments, MatchEvaluator evaluator, int prevat, string input, int count) state, Match match) => + regex.RunAllMatchesWithCallback(input, startat, ref state, static (ref (SegmentStringBuilder segments, MatchEvaluator evaluator, int prevat, string input, int count) state, Match match) => { state.segments.Add(state.input.AsMemory(match.Index + match.Length, state.prevat - match.Index - match.Length)); state.prevat = match.Index; diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Split.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Split.cs index 327099750f6f60..359f48e8b89ccf 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Split.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Split.cs @@ -90,7 +90,7 @@ private static string[] Split(Regex regex, string input, int count, int startat) if (!regex.RightToLeft) { - regex.Run(input, startat, ref state, static (ref (List results, int prevat, string input, int count) state, Match match) => + regex.RunAllMatchesWithCallback(input, startat, ref state, static (ref (List results, int prevat, string input, int count) state, Match match) => { state.results.Add(state.input.Substring(state.prevat, match.Index - state.prevat)); state.prevat = match.Index + match.Length; @@ -118,7 +118,7 @@ private static string[] Split(Regex regex, string input, int count, int startat) { state.prevat = input.Length; - regex.Run(input, startat, ref state, static (ref (List results, int prevat, string input, int count) state, Match match) => + regex.RunAllMatchesWithCallback(input, startat, ref state, static (ref (List results, int prevat, string input, int count) state, Match match) => { state.results.Add(state.input.Substring(match.Index + match.Length, state.prevat - match.Index - match.Length)); state.prevat = match.Index; diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs index 75e5395d6c5e5b..e66734ad85406a 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs @@ -359,8 +359,8 @@ protected void InitializeReferences() // assemblies generated by Regex.CompileToAssembly calling it. } - /// Internal worker called by the public APIs - internal Match? Run(bool quick, int prevlen, string input, int beginning, int length, int startat) + /// Internal worker which will scan the passed in string for a match. Used by public APIs. + internal Match? RunSingleMatch(bool quick, int prevlen, string input, int beginning, int length, int startat) { if ((uint)startat > (uint)input.Length) { @@ -392,7 +392,7 @@ protected void InitializeReferences() runner.runtextpos += RightToLeft ? -1 : 1; } - return RunInternal(quick, input, beginning, runner, span, returnNullIfQuick: true); + return ScanInternal(quick, input, beginning, runner, span, returnNullIfQuick: true); } finally { @@ -401,7 +401,8 @@ protected void InitializeReferences() } } - internal Match? Run(ReadOnlySpan input, int startat) + /// Internal worker which will scan the passed in span for a match. Used by public APIs. + internal Match? RunSingleMatch(ReadOnlySpan input, int startat) { // startat parameter is always either 0 or input.Length since public API for IsMatch doesn't have an overload // that takes in startat. @@ -425,11 +426,8 @@ protected void InitializeReferences() } } - /// - /// Performs the matching against a given string input, which will call the passed in for every - /// match that is found, along with the relevant . - /// - internal void Run(string input, int startat, ref TState state, MatchCallback callback, bool reuseMatchObject) + /// Internal worker which will scan the passed in string for all matches, and will call for each match found. + internal void RunAllMatchesWithCallback(string input, int startat, ref TState state, MatchCallback callback, bool reuseMatchObject) { Debug.Assert((uint)startat <= (uint)input.Length); @@ -438,7 +436,7 @@ internal void Run(string input, int startat, ref TState state, MatchCall { // For the string overload, we need to set runtext before starting the match attempts. runner.runtext = input; - RunInternal(input, startat, ref state, callback, runner, usingStringOverload: true, reuseMatchObject); + RunAllMatchesWithCallbackHelper(input, startat, ref state, callback, runner, usingStringOverload: true, reuseMatchObject); } finally { @@ -447,18 +445,15 @@ internal void Run(string input, int startat, ref TState state, MatchCall } } - /// - /// Performs the matching against a given span input, which will call the passed in for every - /// match that is found, along with the relevant . - /// - internal void Run(ReadOnlySpan input, int startat, ref TState state, MatchCallback callback, bool reuseMatchObject) + /// Internal worker which will scan the passed in string for all matches, and will call for each match found. + internal void RunAllMatchesWithCallback(ReadOnlySpan input, int startat, ref TState state, MatchCallback callback, bool reuseMatchObject) { Debug.Assert((uint)startat <= (uint)input.Length); RegexRunner runner = Interlocked.Exchange(ref _runner, null) ?? CreateRunner(); try { - RunInternal(input, startat, ref state, callback, runner, usingStringOverload: false, reuseMatchObject); + RunAllMatchesWithCallbackHelper(input, startat, ref state, callback, runner, usingStringOverload: false, reuseMatchObject); } finally { @@ -467,11 +462,11 @@ internal void Run(ReadOnlySpan input, int startat, ref TState stat } /// - /// Performs the internal loop which will search for matches on the given , and will call the for - /// each match. If is set to , then we may need to do additional cleanup which ensures that - /// we are not keeping a reference to the input text alive in a cache. + /// Helper method used by and + /// which loops to find + /// all matches on the passed in and calls for each match found. /// - private void RunInternal(ReadOnlySpan input, int startat, ref TState state, MatchCallback callback, RegexRunner runner, bool usingStringOverload, bool reuseMatchObject) + private void RunAllMatchesWithCallbackHelper(ReadOnlySpan input, int startat, ref TState state, MatchCallback callback, RegexRunner runner, bool usingStringOverload, bool reuseMatchObject) { runner.InitializeTimeout(internalMatchTimeout); int runtextpos = startat; @@ -484,7 +479,7 @@ private void RunInternal(ReadOnlySpan input, int startat, ref TSta // We get the Match by calling Scan. 'input' parameter is used to set the Match text which is only relevante if we are using the Run string // overload, as APIs that call the span overload (like Count) don't require match.Text to be set, so we pass null in that case. - Match? match = RunInternal(reuseMatchObject, input: usingStringOverload ? runner.runtext : null, 0, runner, input, returnNullIfQuick: false); + Match? match = ScanInternal(reuseMatchObject, input: usingStringOverload ? runner.runtext : null, 0, runner, input, returnNullIfQuick: false); Debug.Assert(match is not null); // if we got a match, then call the callback function with the match and prepare for next iteration. @@ -551,7 +546,8 @@ private void RunInternal(ReadOnlySpan input, int startat, ref TSta } } - private static Match? RunInternal(bool quick, string? input, int beginning, RegexRunner runner, ReadOnlySpan span, bool returnNullIfQuick) + /// Helper method used by RunSingleMatch and RunAllMatchesWithCallback which calls runner.Scan to find a match on the passed in span. + private static Match? ScanInternal(bool quick, string? input, int beginning, RegexRunner runner, ReadOnlySpan span, bool returnNullIfQuick) { runner.Scan(span); diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexReplacement.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexReplacement.cs index 2027763780d2c1..52fed822d1e123 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexReplacement.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexReplacement.cs @@ -214,7 +214,7 @@ public string Replace(Regex regex, string input, int count, int startat) if (!regex.RightToLeft) { - regex.Run(input, startat, ref state, (ref (RegexReplacement thisRef, SegmentStringBuilder segments, ReadOnlyMemory inputMemory, int prevat, int count) state, Match match) => + regex.RunAllMatchesWithCallback(input, startat, ref state, (ref (RegexReplacement thisRef, SegmentStringBuilder segments, ReadOnlyMemory inputMemory, int prevat, int count) state, Match match) => { state.segments.Add(state.inputMemory.Slice(state.prevat, match.Index - state.prevat)); state.prevat = match.Index + match.Length; @@ -233,7 +233,7 @@ public string Replace(Regex regex, string input, int count, int startat) { state.prevat = input.Length; - regex.Run(input, startat, ref state, (ref (RegexReplacement thisRef, SegmentStringBuilder segments, ReadOnlyMemory inputMemory, int prevat, int count) state, Match match) => + regex.RunAllMatchesWithCallback(input, startat, ref state, (ref (RegexReplacement thisRef, SegmentStringBuilder segments, ReadOnlyMemory inputMemory, int prevat, int count) state, Match match) => { state.segments.Add(state.inputMemory.Slice(match.Index + match.Length, state.prevat - match.Index - match.Length)); state.prevat = match.Index;