Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/uu/df/locales/en-US.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ df-after-help = Display values are in units of the first available SIZE from --b
SIZE is an integer and optional unit (example: 10M is 10*1024*1024).
Units are K, M, G, T, P, E, Z, Y (powers of 1024) or KB, MB,... (powers
of 1000).
of 1000). Units can be decimal, hexadecimal, octal, binary.
# Help messages
df-help-print-help = Print help information.
Expand Down
2 changes: 1 addition & 1 deletion src/uu/df/locales/fr-FR.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ df-after-help = Les valeurs affichées sont en unités de la première TAILLE di
TAILLE est un entier et une unité optionnelle (exemple : 10M est 10*1024*1024).
Les unités sont K, M, G, T, P, E, Z, Y (puissances de 1024) ou KB, MB,... (puissances
de 1000).
de 1000). Les unités peuvent être décimales, hexadécimales, octales, binaires.
# Messages d'aide
df-help-print-help = afficher les informations d'aide.
Expand Down
2 changes: 1 addition & 1 deletion src/uu/du/locales/en-US.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ du-after-help = Display values are in units of the first available SIZE from --b
SIZE is an integer and optional unit (example: 10M is 10*1024*1024).
Units are K, M, G, T, P, E, Z, Y (powers of 1024) or KB, MB,... (powers
of 1000).
of 1000). Units can be decimal, hexadecimal, octal, binary.
PATTERN allows some advanced exclusions. For example, the following syntaxes
are supported:
Expand Down
2 changes: 1 addition & 1 deletion src/uu/du/locales/fr-FR.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ du-after-help = Les valeurs affichées sont en unités de la première TAILLE di
TAILLE est un entier et une unité optionnelle (exemple : 10M est 10*1024*1024).
Les unités sont K, M, G, T, P, E, Z, Y (puissances de 1024) ou KB, MB,... (puissances
de 1000).
de 1000). Les unités peuvent être décimales, hexadécimales, octales, binaires.
MOTIF permet des exclusions avancées. Par exemple, les syntaxes suivantes
sont supportées :
Expand Down
40 changes: 35 additions & 5 deletions src/uucore/src/lib/features/parser/parse_size.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ enum NumberSystem {
Decimal,
Octal,
Hexadecimal,
Binary,
}

impl<'parser> Parser<'parser> {
Expand Down Expand Up @@ -134,10 +135,11 @@ impl<'parser> Parser<'parser> {
}
/// Parse a size string into a number of bytes.
///
/// A size string comprises an integer and an optional unit. The unit
/// may be K, M, G, T, P, E, Z, Y, R or Q (powers of 1024), or KB, MB,
/// etc. (powers of 1000), or b which is 512.
/// Binary prefixes can be used, too: KiB=K, MiB=M, and so on.
/// A size string comprises an integer and an optional unit. The integer
/// may be in decimal, octal (0 prefix), hexadecimal (0x prefix), or
/// binary (0b prefix) notation. The unit may be K, M, G, T, P, E, Z, Y,
/// R or Q (powers of 1024), or KB, MB, etc. (powers of 1000), or b which
/// is 512. Binary prefixes can be used, too: KiB=K, MiB=M, and so on.
///
/// # Errors
///
Expand All @@ -159,6 +161,7 @@ impl<'parser> Parser<'parser> {
/// assert_eq!(Ok(9 * 1000), parser.parse("9kB")); // kB is 1000
/// assert_eq!(Ok(2 * 1024), parser.parse("2K")); // K is 1024
/// assert_eq!(Ok(44251 * 1024), parser.parse("0xACDBK")); // 0xACDB is 44251 in decimal
/// assert_eq!(Ok(44251 * 1024 * 1024), parser.parse("0b1010110011011011")); // 0b1010110011011011 is 44251 in decimal, default M
/// ```
pub fn parse(&self, size: &str) -> Result<u128, ParseSizeError> {
if size.is_empty() {
Expand All @@ -176,6 +179,11 @@ impl<'parser> Parser<'parser> {
.take(2)
.chain(size.chars().skip(2).take_while(char::is_ascii_hexdigit))
.collect(),
NumberSystem::Binary => size
.chars()
.take(2)
.chain(size.chars().skip(2).take_while(|c| c.is_digit(2)))
.collect(),
_ => size.chars().take_while(char::is_ascii_digit).collect(),
};
let mut unit: &str = &size[numeric_string.len()..];
Expand Down Expand Up @@ -268,6 +276,10 @@ impl<'parser> Parser<'parser> {
let trimmed_string = numeric_string.trim_start_matches("0x");
Self::parse_number(trimmed_string, 16, size)?
}
NumberSystem::Binary => {
let trimmed_string = numeric_string.trim_start_matches("0b");
Self::parse_number(trimmed_string, 2, size)?
}
};

number
Expand Down Expand Up @@ -328,6 +340,14 @@ impl<'parser> Parser<'parser> {
return NumberSystem::Hexadecimal;
}

// Binary prefix: "0b" followed by at least one binary digit (0 or 1)
// Note: "0b" alone is treated as decimal 0 with suffix "b"
if let Some(prefix) = size.strip_prefix("0b") {
if !prefix.is_empty() {
return NumberSystem::Binary;
}
}

let num_digits: usize = size
.chars()
.take_while(char::is_ascii_digit)
Expand Down Expand Up @@ -363,7 +383,9 @@ impl<'parser> Parser<'parser> {
/// assert_eq!(Ok(123), parse_size_u128("123"));
/// assert_eq!(Ok(9 * 1000), parse_size_u128("9kB")); // kB is 1000
/// assert_eq!(Ok(2 * 1024), parse_size_u128("2K")); // K is 1024
/// assert_eq!(Ok(44251 * 1024), parse_size_u128("0xACDBK"));
/// assert_eq!(Ok(44251 * 1024), parse_size_u128("0xACDBK")); // hexadecimal
/// assert_eq!(Ok(10), parse_size_u128("0b1010")); // binary
/// assert_eq!(Ok(10 * 1024), parse_size_u128("0b1010K")); // binary with suffix
/// ```
pub fn parse_size_u128(size: &str) -> Result<u128, ParseSizeError> {
Parser::default().parse(size)
Expand Down Expand Up @@ -564,6 +586,7 @@ mod tests {
assert!(parse_size_u64("1Y").is_err());
assert!(parse_size_u64("1R").is_err());
assert!(parse_size_u64("1Q").is_err());
assert!(parse_size_u64("0b1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111").is_err());

assert!(variant_eq(
&parse_size_u64("1Z").unwrap_err(),
Expand Down Expand Up @@ -634,6 +657,7 @@ mod tests {
#[test]
fn b_suffix() {
assert_eq!(Ok(3 * 512), parse_size_u64("3b")); // b is 512
assert_eq!(Ok(0), parse_size_u64("0b")); // b should be used as a suffix in this case instead of signifying binary
}

#[test]
Expand Down Expand Up @@ -774,6 +798,12 @@ mod tests {
assert_eq!(Ok(44251 * 1024), parse_size_u128("0xACDBK"));
}

#[test]
fn parse_binary_size() {
assert_eq!(Ok(44251), parse_size_u64("0b1010110011011011"));
assert_eq!(Ok(44251 * 1024), parse_size_u64("0b1010110011011011K"));
}

#[test]
#[cfg(target_os = "linux")]
fn parse_percent() {
Expand Down
73 changes: 73 additions & 0 deletions tests/by-util/test_df.rs
Original file line number Diff line number Diff line change
Expand Up @@ -648,6 +648,53 @@ fn test_block_size_with_suffix() {
assert_eq!(get_header("1GB"), "1GB-blocks");
}

#[test]
fn test_df_binary_block_size() {
fn get_header(block_size: &str) -> String {
let output = new_ucmd!()
.args(&["-B", block_size, "--output=size"])
.succeeds()
.stdout_str_lossy();
output.lines().next().unwrap().trim().to_string()
}

let test_cases = [
("0b1", "1"),
("0b10100", "20"),
("0b1000000000", "512"),
("0b10K", "2K"),
];

for (binary, decimal) in test_cases {
let binary_result = get_header(binary);
let decimal_result = get_header(decimal);
assert_eq!(
binary_result, decimal_result,
"Binary {binary} should equal decimal {decimal}"
);
}
}

#[test]
fn test_df_binary_env_block_size() {
fn get_header(env_var: &str, env_value: &str) -> String {
let output = new_ucmd!()
.env(env_var, env_value)
.args(&["--output=size"])
.succeeds()
.stdout_str_lossy();
output.lines().next().unwrap().trim().to_string()
}

let binary_header = get_header("DF_BLOCK_SIZE", "0b10000000000");
let decimal_header = get_header("DF_BLOCK_SIZE", "1024");
assert_eq!(binary_header, decimal_header);

let binary_header = get_header("BLOCK_SIZE", "0b10000000000");
let decimal_header = get_header("BLOCK_SIZE", "1024");
assert_eq!(binary_header, decimal_header);
}

#[test]
fn test_block_size_in_posix_portability_mode() {
fn get_header(block_size: &str) -> String {
Expand Down Expand Up @@ -849,6 +896,32 @@ fn test_invalid_block_size_suffix() {
.stderr_contains("invalid suffix in --block-size argument '1.2'");
}

#[test]
fn test_df_invalid_binary_size() {
new_ucmd!()
.arg("--block-size=0b123")
.fails()
.stderr_contains("invalid suffix in --block-size argument '0b123'");
}

#[test]
fn test_df_binary_edge_cases() {
new_ucmd!()
.arg("-B0b")
.fails()
.stderr_contains("invalid --block-size argument '0b'");

new_ucmd!()
.arg("-B0B")
.fails()
.stderr_contains("invalid suffix in --block-size argument '0B'");

new_ucmd!()
.arg("--block-size=0b1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111")
.fails()
.stderr_contains("too large");
}

#[test]
fn test_output_selects_columns() {
let output = new_ucmd!()
Expand Down
141 changes: 140 additions & 1 deletion tests/by-util/test_du.rs
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,120 @@ fn test_du_env_block_size_hierarchy() {
assert_eq!(expected, result2);
}

#[test]
fn test_du_binary_block_size() {
let ts = TestScenario::new(util_name!());
let at = &ts.fixtures;
let dir = "a";

at.mkdir(dir);
let fpath = at.plus(format!("{dir}/file"));
std::fs::File::create(&fpath)
.expect("cannot create test file")
.set_len(100_000)
.expect("cannot set file size");

let test_cases = [
("0b1", "1"),
("0b10100", "20"),
("0b1000000000", "512"),
("0b10K", "2K"),
];

for (binary, decimal) in test_cases {
let decimal = ts
.ucmd()
.arg(dir)
.arg(format!("--block-size={decimal}"))
.succeeds()
.stdout_move_str();

let binary = ts
.ucmd()
.arg(dir)
.arg(format!("--block-size={binary}"))
.succeeds()
.stdout_move_str();

assert_eq!(
decimal, binary,
"Binary {binary} should equal decimal {decimal}"
);
}
}

#[test]
fn test_du_binary_env_block_size() {
let ts = TestScenario::new(util_name!());
let at = &ts.fixtures;
let dir = "a";

at.mkdir(dir);
let fpath = at.plus(format!("{dir}/file"));
std::fs::File::create(&fpath)
.expect("cannot create test file")
.set_len(100_000)
.expect("cannot set file size");

let expected = ts
.ucmd()
.arg(dir)
.arg("--block-size=1024")
.succeeds()
.stdout_move_str();

let result = ts
.ucmd()
.arg(dir)
.env("DU_BLOCK_SIZE", "0b10000000000")
.succeeds()
.stdout_move_str();

assert_eq!(expected, result);
}

#[test]
fn test_du_invalid_binary_size() {
let ts = TestScenario::new(util_name!());

ts.ucmd()
.arg("--block-size=0b123")
.arg("/tmp")
.fails_with_code(1)
.stderr_only("du: invalid suffix in --block-size argument '0b123'\n");

ts.ucmd()
.arg("--threshold=0b123")
.arg("/tmp")
.fails_with_code(1)
.stderr_only("du: invalid suffix in --threshold argument '0b123'\n");
}

#[test]
fn test_du_binary_edge_cases() {
let ts = TestScenario::new(util_name!());
let at = &ts.fixtures;
at.write("foo", "test");

ts.ucmd()
.arg("-B0b")
.arg("foo")
.fails()
.stderr_only("du: invalid --block-size argument '0b'\n");

ts.ucmd()
.arg("-B0B")
.arg("foo")
.fails()
.stderr_only("du: invalid suffix in --block-size argument '0B'\n");

ts.ucmd()
.arg("--block-size=0b1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111")
.arg("foo")
.fails_with_code(1)
.stderr_contains("too large");
}

#[test]
fn test_du_non_existing_files() {
new_ucmd!()
Expand Down Expand Up @@ -995,6 +1109,31 @@ fn test_du_threshold() {
.stdout_contains("deeper_dir");
}

#[test]
#[cfg(not(target_os = "openbsd"))]
fn test_du_binary_threshold() {
let ts = TestScenario::new(util_name!());
let at = &ts.fixtures;

at.mkdir_all("subdir/links");
at.mkdir_all("subdir/deeper/deeper_dir");
at.write("subdir/links/bigfile.txt", &"x".repeat(10000));
at.write("subdir/deeper/deeper_dir/smallfile.txt", "small");

let threshold_bin = if cfg!(windows) {
"0b1101110000000"
} else {
"0b10011100010000"
};
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the reason the threshold is lower on Windows? Please add a comment (or, if possible, use a threshold that works on any system).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure, but it's there to match the logic on line 1095:
let threshold = if cfg!(windows) { "7K" } else { "10K" };

let me remove the custom threshold


ts.ucmd()
.arg("--apparent-size")
.arg(format!("--threshold={threshold_bin}"))
.succeeds()
.stdout_contains("links")
.stdout_does_not_contain("deeper_dir");
}

#[test]
fn test_du_invalid_threshold() {
let ts = TestScenario::new(util_name!());
Expand Down Expand Up @@ -1528,7 +1667,7 @@ fn test_du_blocksize_zero_do_not_panic() {
let ts = TestScenario::new(util_name!());
let at = &ts.fixtures;
at.write("foo", "some content");
for block_size in ["0", "00", "000", "0x0"] {
for block_size in ["0", "00", "000", "0x0", "0b0"] {
ts.ucmd()
.arg(format!("-B{block_size}"))
.arg("foo")
Expand Down
Loading