Skip to content

Commit fca3971

Browse files
committed
checksum/cksum: fix: filename that include separator should parse + add tests
fixes this non-regex implementation's flaw with file_names containing the separator's pattern: - replaces left-to-right greedy separator match with right-to-left one. - added bugfix tests fixes secondary bug: positive match on hybrid posix-openssl format adds secondary bugfix tests Co-authored-by: Dorian Péron <72708393+RenjiSann@users.noreply.github.com>
1 parent 602fc70 commit fca3971

File tree

1 file changed

+75
-15
lines changed

1 file changed

+75
-15
lines changed

src/uucore/src/lib/features/checksum.rs

Lines changed: 75 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
//
33
// For the full copyright and license information, please view the LICENSE
44
// file that was distributed with this source code.
5-
// spell-checker:ignore anotherfile invalidchecksum JWZG FFFD xffname prefixfilename bytelen bitlen hexdigit
5+
// spell-checker:ignore anotherfile invalidchecksum JWZG FFFD xffname prefixfilename bytelen bitlen hexdigit rsplit
66

77
use data_encoding::BASE64;
88
use os_display::Quotable;
@@ -484,10 +484,24 @@ impl LineFormat {
484484
let algo_start = if trimmed.starts_with(b"\\") { 1 } else { 0 };
485485
let rest = &trimmed[algo_start..];
486486

487+
enum SubCase {
488+
Posix,
489+
OpenSSL,
490+
}
487491
// find the next parenthesis using byte search (not next whitespace) because openssl's
488492
// tagged format does not put a space before (filename)
493+
489494
let par_idx = rest.iter().position(|&b| b == b'(')?;
490-
let algo_substring = &rest[..par_idx].trim_ascii();
495+
let sub_case = if rest[par_idx - 1] == b' ' {
496+
SubCase::Posix
497+
} else {
498+
SubCase::OpenSSL
499+
};
500+
501+
let algo_substring = match sub_case {
502+
SubCase::Posix => &rest[..par_idx - 1],
503+
SubCase::OpenSSL => &rest[..par_idx],
504+
};
491505
let mut algo_parts = algo_substring.splitn(2, |&b| b == b'-');
492506
let algo = algo_parts.next()?;
493507

@@ -509,8 +523,10 @@ impl LineFormat {
509523
let algo_utf8 = unsafe { String::from_utf8_unchecked(algo.to_vec()) };
510524
// stripping '(' not ' (' since we matched on ( not whitespace because of openssl.
511525
let after_paren = rest.get(par_idx + 1..)?;
512-
let (filename, checksum) = ByteSliceExt::split_once(after_paren, b") = ")
513-
.or_else(|| ByteSliceExt::split_once(after_paren, b")= "))?;
526+
let (filename, checksum) = match sub_case {
527+
SubCase::Posix => ByteSliceExt::rsplit_once(after_paren, b") = ")?,
528+
SubCase::OpenSSL => ByteSliceExt::rsplit_once(after_paren, b")= ")?,
529+
};
514530

515531
fn is_valid_checksum(checksum: &[u8]) -> bool {
516532
if checksum.is_empty() {
@@ -608,13 +624,20 @@ impl LineFormat {
608624

609625
// Helper trait for byte slice operations
610626
trait ByteSliceExt {
611-
fn split_once(&self, pattern: &[u8]) -> Option<(&Self, &Self)>;
627+
/// Look for a pattern from right to left, return surrounding parts if found.
628+
fn rsplit_once(&self, pattern: &[u8]) -> Option<(&Self, &Self)>;
612629
}
613630

614631
impl ByteSliceExt for [u8] {
615-
fn split_once(&self, pattern: &[u8]) -> Option<(&Self, &Self)> {
616-
let pos = self.windows(pattern.len()).position(|w| w == pattern)?;
617-
Some((&self[..pos], &self[pos + pattern.len()..]))
632+
fn rsplit_once(&self, pattern: &[u8]) -> Option<(&Self, &Self)> {
633+
let pos = self
634+
.windows(pattern.len())
635+
.rev()
636+
.position(|w| w == pattern)?;
637+
Some((
638+
&self[..self.len() - pattern.len() - pos],
639+
&self[self.len() - pos..],
640+
))
618641
}
619642
}
620643

@@ -1345,30 +1368,67 @@ mod tests {
13451368
#[allow(clippy::type_complexity)]
13461369
let test_cases: &[(&[u8], Option<(&[u8], Option<&[u8]>, &[u8], &[u8])>)] = &[
13471370
(b"SHA256 (example.txt) = d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2", Some((b"SHA256", None, b"example.txt", b"d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2"))),
1348-
// cspell:disable-next-line
1371+
// cspell:disable
13491372
(b"BLAKE2b-512 (file) = abcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdef", Some((b"BLAKE2b", Some(b"512"), b"file", b"abcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdef"))),
13501373
(b" MD5 (test) = 9e107d9d372bb6826bd81d3542a419d6", Some((b"MD5", None, b"test", b"9e107d9d372bb6826bd81d3542a419d6"))),
13511374
(b"SHA-1 (anotherfile) = a9993e364706816aba3e25717850c26c9cd0d89d", Some((b"SHA", Some(b"1"), b"anotherfile", b"a9993e364706816aba3e25717850c26c9cd0d89d"))),
1375+
(b" MD5 (anothertest) = fds65dsf46as5df4d6f54asds5d7f7g9", Some((b"MD5", None, b"anothertest", b"fds65dsf46as5df4d6f54asds5d7f7g9"))),
1376+
(b" MD5(anothertest2) = fds65dsf46as5df4d6f54asds5d7f7g9", None),
1377+
(b" MD5(weirdfilename0)= stillfilename)= fds65dsf46as5df4d6f54asds5d7f7g9", Some((b"MD5", None, b"weirdfilename0)= stillfilename", b"fds65dsf46as5df4d6f54asds5d7f7g9"))),
1378+
(b" MD5(weirdfilename1)= )= fds65dsf46as5df4d6f54asds5d7f7g9", Some((b"MD5", None, b"weirdfilename1)= ", b"fds65dsf46as5df4d6f54asds5d7f7g9"))),
1379+
(b" MD5(weirdfilename2) = )= fds65dsf46as5df4d6f54asds5d7f7g9", Some((b"MD5", None, b"weirdfilename2) = ", b"fds65dsf46as5df4d6f54asds5d7f7g9"))),
1380+
(b" MD5 (weirdfilename3)= ) = fds65dsf46as5df4d6f54asds5d7f7g9", Some((b"MD5", None, b"weirdfilename3)= ", b"fds65dsf46as5df4d6f54asds5d7f7g9"))),
1381+
(b" MD5 (weirdfilename4) = ) = fds65dsf46as5df4d6f54asds5d7f7g9", Some((b"MD5", None, b"weirdfilename4) = ", b"fds65dsf46as5df4d6f54asds5d7f7g9"))),
1382+
(b" MD5(weirdfilename5)= ) = fds65dsf46as5df4d6f54asds5d7f7g9", None),
1383+
(b" MD5(weirdfilename6) = ) = fds65dsf46as5df4d6f54asds5d7f7g9", None),
1384+
(b" MD5 (weirdfilename7)= )= fds65dsf46as5df4d6f54asds5d7f7g9", None),
1385+
(b" MD5 (weirdfilename8) = )= fds65dsf46as5df4d6f54asds5d7f7g9", None),
13521386
];
13531387

1388+
// cspell:enable
13541389
for (input, expected) in test_cases {
13551390
let line_info = LineFormat::parse_algo_based(input);
13561391
match expected {
13571392
Some((algo, bits, filename, checksum)) => {
1358-
assert!(line_info.is_some());
1393+
assert!(
1394+
line_info.is_some(),
1395+
"expected Some, got None for {}",
1396+
String::from_utf8_lossy(filename)
1397+
);
13591398
let line_info = line_info.unwrap();
1360-
assert_eq!(&line_info.algo_name.unwrap().as_bytes(), algo);
1399+
assert_eq!(
1400+
&line_info.algo_name.unwrap().as_bytes(),
1401+
algo,
1402+
"failed for {}",
1403+
String::from_utf8_lossy(filename)
1404+
);
13611405
assert_eq!(
13621406
line_info
13631407
.algo_bit_len
13641408
.map(|m| m.to_string().as_bytes().to_owned()),
1365-
bits.map(|b| b.to_owned())
1409+
bits.map(|b| b.to_owned()),
1410+
"failed for {}",
1411+
String::from_utf8_lossy(filename)
1412+
);
1413+
assert_eq!(
1414+
&line_info.filename,
1415+
filename,
1416+
"failed for {}",
1417+
String::from_utf8_lossy(filename)
1418+
);
1419+
assert_eq!(
1420+
&line_info.checksum.as_bytes(),
1421+
checksum,
1422+
"failed for {}",
1423+
String::from_utf8_lossy(filename)
13661424
);
1367-
assert_eq!(&line_info.filename, filename);
1368-
assert_eq!(&line_info.checksum.as_bytes(), checksum);
13691425
}
13701426
None => {
1371-
assert!(line_info.is_none());
1427+
assert!(
1428+
line_info.is_none(),
1429+
"failed for {}",
1430+
String::from_utf8_lossy(input)
1431+
);
13721432
}
13731433
}
13741434
}

0 commit comments

Comments
 (0)