From c705f5f1899bf522b2f5bfd2339c1ffc948b974c Mon Sep 17 00:00:00 2001 From: weili <541602953@qq.com> Date: Tue, 9 Jun 2026 08:26:47 +0000 Subject: [PATCH] find: -printf: don't panic on a multibyte char after an octal escape MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `\NNN` octal escapes were parsed by slicing a fixed 3 bytes off the format string, which panics when fewer than 3 octal digits are followed by a multibyte character (e.g. `-printf '\0€'`): the 3-byte slice lands inside the multibyte char and trips a char-boundary assertion. Parse the octal escape from the leading octal digits only (1 to 3, all ASCII) and advance by their byte length. This also fixes `\1`..`\7`, which previously fell through to the single-character escape table and errored instead of being treated as octal, matching GNU find. --- src/find/matchers/printf.rs | 50 +++++++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 10 deletions(-) diff --git a/src/find/matchers/printf.rs b/src/find/matchers/printf.rs index ea3a5fe4..3d0b8313 100644 --- a/src/find/matchers/printf.rs +++ b/src/find/matchers/printf.rs @@ -167,16 +167,21 @@ impl FormatStringParser<'_> { // Try parsing an octal sequence first. let first = self.front()?; if first.is_digit(OCTAL_RADIX) { - if let Ok(code) = self.peek(OCTAL_LEN).and_then(|octal| { - u32::from_str_radix(octal, OCTAL_RADIX).map_err(std::convert::Into::into) - }) { - // safe to unwrap: .peek() already succeeded above. - let octal = self.advance_by(OCTAL_LEN).unwrap(); - return match char::from_u32(code) { - Some(c) => Ok(FormatComponent::Literal(c.to_string())), - None => Err(format!("Invalid character value: \\{octal}").into()), - }; - } + // A GNU octal escape is 1 to 3 octal digits. Consume only the leading + // octal digits (which are ASCII), rather than slicing a fixed 3 bytes + // that can land inside a following multibyte char. + let octal: String = self + .string + .chars() + .take(OCTAL_LEN) + .take_while(|c| c.is_digit(OCTAL_RADIX)) + .collect(); + let code = u32::from_str_radix(&octal, OCTAL_RADIX)?; + self.advance_by(octal.len())?; + return match char::from_u32(code) { + Some(c) => Ok(FormatComponent::Literal(c.to_string())), + None => Err(format!("Invalid character value: \\{octal}").into()), + }; } self.advance_one()?; @@ -688,6 +693,31 @@ mod tests { assert!(FormatString::parse("\\").is_err()); } + #[test] + fn test_parse_octal_escape_before_multibyte_char() { + assert_eq!( + FormatString::parse("\\0€").unwrap().components, + vec![ + FormatComponent::Literal("\0".to_owned()), + FormatComponent::Literal("€".to_owned()), + ] + ); + assert_eq!( + FormatString::parse("\\1😀").unwrap().components, + vec![ + FormatComponent::Literal("\u{1}".to_owned()), + FormatComponent::Literal("😀".to_owned()), + ] + ); + assert_eq!( + FormatString::parse("\\00é").unwrap().components, + vec![ + FormatComponent::Literal("\0".to_owned()), + FormatComponent::Literal("é".to_owned()), + ] + ); + } + #[test] fn test_parse_formatting() { fn unaligned_directive(directive: FormatDirective) -> FormatComponent {