From 7eb6a14ea7482d8492674509f89d8047275cf641 Mon Sep 17 00:00:00 2001 From: Thomas Kowalski Date: Thu, 28 May 2026 18:12:40 +0200 Subject: [PATCH 1/5] perf: use get_unchecked for TwoWaySearcher --- library/core/src/str/pattern.rs | 44 ++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 4 deletions(-) diff --git a/library/core/src/str/pattern.rs b/library/core/src/str/pattern.rs index 25202ffd67313..d2a65813bc8b4 100644 --- a/library/core/src/str/pattern.rs +++ b/library/core/src/str/pattern.rs @@ -1494,7 +1494,15 @@ impl TwoWaySearcher { let start = if long_period { self.crit_pos } else { cmp::max(self.crit_pos, self.memory) }; for i in start..needle.len() { - if needle[i] != haystack[self.position + i] { + // SAFETY: on every iteration of `'search`, the `haystack.get(self.position + needle_last)` + // check returned `Some`, so `self.position + needle_last < haystack.len()`. + // Since `i < needle.len()` implies `i <= needle_last`, we have + // `self.position + i < haystack.len()`. + // Every path that mutates `self.position` below either returns or re-enters `'search`, + // which re-runs the check before reaching the loop again. + // `i < needle.len()` also guarantees `needle.get_unchecked(i)` is safe. + if unsafe { *needle.get_unchecked(i) != *haystack.get_unchecked(self.position + i) } + { self.position += i - self.crit_pos + 1; if !long_period { self.memory = 0; @@ -1506,7 +1514,15 @@ impl TwoWaySearcher { // See if the left part of the needle matches let start = if long_period { 0 } else { self.memory }; for i in (start..self.crit_pos).rev() { - if needle[i] != haystack[self.position + i] { + // SAFETY: `crit_pos < needle.len()` by construction, so `i < needle.len()` and + // `needle.get_unchecked(i)` is safe. + // The same `self.position + i < haystack.len()` argument as the right-part + // loop applies: `haystack.get(self.position + needle_last)` at the + // top of `'search` established the bound for this iteration, and every mutation + // of `self.position` is followed by `continue 'search` (which re-runs the check) + // or a `return` on match. + if unsafe { *needle.get_unchecked(i) != *haystack.get_unchecked(self.position + i) } + { self.position += self.period; if !long_period { self.memory = needle.len() - self.period; @@ -1581,7 +1597,18 @@ impl TwoWaySearcher { cmp::min(self.crit_pos_back, self.memory_back) }; for i in (0..crit).rev() { - if needle[i] != haystack[self.end - needle.len() + i] { + // SAFETY: + // - `i < crit <= crit_pos_back <= needle.len()`, so `needle.get_unchecked(i)` is safe. + // - On every iteration of `'search`, `haystack.get(self.end.wrapping_sub(needle.len()))` + // returned `Some`, so `self.end >= needle.len()` and `self.end - needle.len() < haystack.len()`. + // Since `self.end <= haystack.len()` and `i < needle.len()`, we have + // `self.end - needle.len() + i < self.end <= haystack.len()`, so + // `haystack.get_unchecked(self.end - needle.len() + i)` is safe. + // - The path that mutates `self.end` either re-enters `'search`, which re-runs the checks + // before reaching this loop again, or returns on match,so the invariant holds. + if unsafe { + *needle.get_unchecked(i) != *haystack.get_unchecked(self.end - needle.len() + i) + } { self.end -= self.crit_pos_back - i; if !long_period { self.memory_back = needle.len(); @@ -1593,7 +1620,16 @@ impl TwoWaySearcher { // See if the right part of the needle matches let needle_end = if long_period { needle.len() } else { self.memory_back }; for i in self.crit_pos_back..needle_end { - if needle[i] != haystack[self.end - needle.len() + i] { + // SAFETY: `needle_end <= needle.len()`, so `i < needle.len()` and + // `needle.get_unchecked(i)` is safe. + // The same `self.end - needle.len() + i < haystack.len()` argument as the + // left-part loop applies: the `haystack.get(self.end.wrapping_sub(needle.len()))` + // check at the top of `'search` established the bound for this iteration, and + // every mutation of `self.end` is followed by `continue 'search` (which re-runs + // the check) or a `return` (which exits before any further unsafe access). + if unsafe { + *needle.get_unchecked(i) != *haystack.get_unchecked(self.end - needle.len() + i) + } { self.end -= self.period; if !long_period { self.memory_back = self.period; From f28bb560d9097af6bfca2620d00d67addb512fef Mon Sep 17 00:00:00 2001 From: Thomas Kowalski Date: Thu, 28 May 2026 18:12:51 +0200 Subject: [PATCH 2/5] perf: haystack only changes --- library/core/src/str/pattern.rs | 29 ++++++++--------------------- 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/library/core/src/str/pattern.rs b/library/core/src/str/pattern.rs index d2a65813bc8b4..032c0ce0b4e03 100644 --- a/library/core/src/str/pattern.rs +++ b/library/core/src/str/pattern.rs @@ -1500,9 +1500,7 @@ impl TwoWaySearcher { // `self.position + i < haystack.len()`. // Every path that mutates `self.position` below either returns or re-enters `'search`, // which re-runs the check before reaching the loop again. - // `i < needle.len()` also guarantees `needle.get_unchecked(i)` is safe. - if unsafe { *needle.get_unchecked(i) != *haystack.get_unchecked(self.position + i) } - { + if needle[i] != unsafe { *haystack.get_unchecked(self.position + i) } { self.position += i - self.crit_pos + 1; if !long_period { self.memory = 0; @@ -1514,15 +1512,12 @@ impl TwoWaySearcher { // See if the left part of the needle matches let start = if long_period { 0 } else { self.memory }; for i in (start..self.crit_pos).rev() { - // SAFETY: `crit_pos < needle.len()` by construction, so `i < needle.len()` and - // `needle.get_unchecked(i)` is safe. - // The same `self.position + i < haystack.len()` argument as the right-part + // SAFETY: The same `self.position + i < haystack.len()` argument as the right-part // loop applies: `haystack.get(self.position + needle_last)` at the // top of `'search` established the bound for this iteration, and every mutation // of `self.position` is followed by `continue 'search` (which re-runs the check) // or a `return` on match. - if unsafe { *needle.get_unchecked(i) != *haystack.get_unchecked(self.position + i) } - { + if needle[i] != unsafe { *haystack.get_unchecked(self.position + i) } { self.position += self.period; if !long_period { self.memory = needle.len() - self.period; @@ -1597,18 +1592,14 @@ impl TwoWaySearcher { cmp::min(self.crit_pos_back, self.memory_back) }; for i in (0..crit).rev() { - // SAFETY: - // - `i < crit <= crit_pos_back <= needle.len()`, so `needle.get_unchecked(i)` is safe. - // - On every iteration of `'search`, `haystack.get(self.end.wrapping_sub(needle.len()))` + // SAFETY: On every iteration of `'search`, `haystack.get(self.end.wrapping_sub(needle.len()))` // returned `Some`, so `self.end >= needle.len()` and `self.end - needle.len() < haystack.len()`. // Since `self.end <= haystack.len()` and `i < needle.len()`, we have // `self.end - needle.len() + i < self.end <= haystack.len()`, so // `haystack.get_unchecked(self.end - needle.len() + i)` is safe. // - The path that mutates `self.end` either re-enters `'search`, which re-runs the checks - // before reaching this loop again, or returns on match,so the invariant holds. - if unsafe { - *needle.get_unchecked(i) != *haystack.get_unchecked(self.end - needle.len() + i) - } { + // before reaching this loop again, or returns on match, so the invariant holds. + if needle[i] != unsafe { *haystack.get_unchecked(self.end - needle.len() + i) } { self.end -= self.crit_pos_back - i; if !long_period { self.memory_back = needle.len(); @@ -1620,16 +1611,12 @@ impl TwoWaySearcher { // See if the right part of the needle matches let needle_end = if long_period { needle.len() } else { self.memory_back }; for i in self.crit_pos_back..needle_end { - // SAFETY: `needle_end <= needle.len()`, so `i < needle.len()` and - // `needle.get_unchecked(i)` is safe. - // The same `self.end - needle.len() + i < haystack.len()` argument as the + // SAFETY: The same `self.end - needle.len() + i < haystack.len()` argument as the // left-part loop applies: the `haystack.get(self.end.wrapping_sub(needle.len()))` // check at the top of `'search` established the bound for this iteration, and // every mutation of `self.end` is followed by `continue 'search` (which re-runs // the check) or a `return` (which exits before any further unsafe access). - if unsafe { - *needle.get_unchecked(i) != *haystack.get_unchecked(self.end - needle.len() + i) - } { + if needle[i] != unsafe { *haystack.get_unchecked(self.end - needle.len() + i) } { self.end -= self.period; if !long_period { self.memory_back = self.period; From 4627283038c25b29249571fcb1a022b2494dd980 Mon Sep 17 00:00:00 2001 From: Thomas Kowalski Date: Thu, 28 May 2026 18:12:59 +0200 Subject: [PATCH 3/5] perf: more benchmarks --- library/coretests/benches/pattern.rs | 48 ++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/library/coretests/benches/pattern.rs b/library/coretests/benches/pattern.rs index b0f8b39c22e16..15dfda9cef1e5 100644 --- a/library/coretests/benches/pattern.rs +++ b/library/coretests/benches/pattern.rs @@ -39,3 +39,51 @@ fn ends_with_str(b: &mut Bencher) { } }) } + +fn make_haystack() -> String { + "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem \ + sit amet dolor ultricies condimentum. Praesent iaculis purus elit, ac malesuada \ + quam malesuada in. Duis sed orci eros. Suspendisse sit amet magna mollis, mollis \ + nunc luctus, imperdiet mi. Integer fringilla non sem ut lacinia. Fusce varius \ + tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec tempus vel, \ + gravida nec quam. In est dui, tincidunt sed tempus interdum, adipiscing laoreet \ + ante. Etiam tempor, tellus quis sagittis interdum, nulla purus mattis sem, quis \ + auctor erat odio ac tellus. In nec nunc sit amet diam volutpat molestie at sed \ + ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan lorem ac dignissim \ + placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat." + .repeat(50) +} + +#[bench] +fn find_str(b: &mut Bencher) { + let s = make_haystack(); + let haystack = black_box(s.as_str()); + b.bytes = haystack.len() as u64; + b.iter(|| black_box(haystack.find("the english language"))) +} + +#[bench] +fn rfind_str(b: &mut Bencher) { + let s = make_haystack(); + let haystack = black_box(s.as_str()); + b.bytes = haystack.len() as u64; + b.iter(|| black_box(haystack.rfind("the english language"))) +} + +#[bench] +fn find_str_worst_case(b: &mut Bencher) { + let near_miss = "the english languagX"; + let haystack_str = near_miss.repeat(2000); + let haystack = black_box(haystack_str.as_str()); + b.bytes = haystack.len() as u64; + b.iter(|| black_box(haystack.find("the english language"))) +} + +#[bench] +fn rfind_str_worst_case(b: &mut Bencher) { + let near_miss = "the english languagX"; + let haystack_str = near_miss.repeat(2000); + let haystack = black_box(haystack_str.as_str()); + b.bytes = haystack.len() as u64; + b.iter(|| black_box(haystack.rfind("the english language"))) +} From 81fd12d5c73dc0e9a8d5f66ec87dbd0c7c237b5d Mon Sep 17 00:00:00 2001 From: Thomas Kowalski Date: Sat, 6 Jun 2026 19:48:25 +0200 Subject: [PATCH 4/5] review: update safety comment --- library/core/src/str/pattern.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/library/core/src/str/pattern.rs b/library/core/src/str/pattern.rs index 032c0ce0b4e03..2b6a760f6f58d 100644 --- a/library/core/src/str/pattern.rs +++ b/library/core/src/str/pattern.rs @@ -1512,11 +1512,12 @@ impl TwoWaySearcher { // See if the left part of the needle matches let start = if long_period { 0 } else { self.memory }; for i in (start..self.crit_pos).rev() { - // SAFETY: The same `self.position + i < haystack.len()` argument as the right-part - // loop applies: `haystack.get(self.position + needle_last)` at the - // top of `'search` established the bound for this iteration, and every mutation - // of `self.position` is followed by `continue 'search` (which re-runs the check) - // or a `return` on match. + // SAFETY: on every iteration of `'search`, the `haystack.get(self.position + needle_last)` + // check returned `Some`, so `self.position + needle_last < haystack.len()`. + // Since `i < self.crit_pos <= needle.len()`, we have `i <= needle_last`, and thus + // `self.position + i <= self.position + needle_last < haystack.len()`. + // Every path that mutates `self.position` below either returns or re-enters `'search`, + // which re-runs the check before reaching the loop again. if needle[i] != unsafe { *haystack.get_unchecked(self.position + i) } { self.position += self.period; if !long_period { From 3a293414548234bd309a34866bb241f833875967 Mon Sep 17 00:00:00 2001 From: Thomas Kowalski Date: Sat, 6 Jun 2026 20:42:47 +0200 Subject: [PATCH 5/5] misc: exclude file from typos check --- typos.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/typos.toml b/typos.toml index 8e14ce58dcb4f..f680f5b0e8abf 100644 --- a/typos.toml +++ b/typos.toml @@ -13,6 +13,7 @@ extend-exclude = [ # generated lorem ipsum texts "library/alloctests/benches/str.rs", "library/alloctests/tests/str.rs", + "library/coretests/benches/pattern.rs", ] [default.extend-words]