Skip to content

Commit 90729db

Browse files
Kaua-Klassmannmattsu2020
authored andcommitted
fix: numeric sort (-n) does not recognize thousand separators (uutils#10339)
1 parent 29627d4 commit 90729db

16 files changed

+167
-35
lines changed

src/uu/sort/src/sort.rs

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ use uucore::display::Quotable;
4747
use uucore::error::{FromIo, strip_errno};
4848
use uucore::error::{UError, UResult, USimpleError, UUsageError};
4949
use uucore::extendedbigdecimal::ExtendedBigDecimal;
50-
use uucore::format_usage;
5150
#[cfg(feature = "i18n-collator")]
5251
use uucore::i18n::collator::locale_cmp;
5352
use uucore::i18n::decimal::locale_decimal_separator;
@@ -59,6 +58,7 @@ use uucore::posix::{MODERN, TRADITIONAL};
5958
use uucore::show_error;
6059
use uucore::translate;
6160
use uucore::version_cmp::version_cmp;
61+
use uucore::{format_usage, i18n};
6262

6363
use crate::buffer_hint::automatic_buffer_size;
6464
use crate::tmp_dir::TmpDirWrapper;
@@ -1086,11 +1086,22 @@ impl FieldSelector {
10861086
};
10871087
let mut range_str = &line[self.get_range(line, tokens)];
10881088
if self.settings.mode == SortMode::Numeric || self.settings.mode == SortMode::HumanNumeric {
1089+
// Get the thousands separator from the locale, handling cases where the separator is empty or multi-character
1090+
let locale_thousands_separator = i18n::decimal::locale_grouping_separator().as_bytes();
1091+
1092+
// Upstream GNU coreutils ignore multibyte thousands separators
1093+
// (FIXME in C source). We keep the same single-byte behavior.
1094+
let thousands_separator = match locale_thousands_separator {
1095+
[b] => Some(*b),
1096+
_ => None,
1097+
};
1098+
10891099
// Parse NumInfo for this number.
10901100
let (info, num_range) = NumInfo::parse(
10911101
range_str,
10921102
&NumInfoParseSettings {
10931103
accept_si_units: self.settings.mode == SortMode::HumanNumeric,
1104+
thousands_separator,
10941105
..Default::default()
10951106
},
10961107
);

src/uucore/src/lib/features/i18n/decimal.rs

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,47 @@ pub fn locale_decimal_separator() -> &'static str {
3737
DECIMAL_SEP.get_or_init(|| get_decimal_separator(get_numeric_locale().0.clone()))
3838
}
3939

40+
/// Return the grouping separator for the given locale
41+
fn get_grouping_separator(loc: Locale) -> String {
42+
let data_locale = DataLocale::from(loc);
43+
44+
let request = DataRequest {
45+
id: DataIdentifierBorrowed::for_locale(&data_locale),
46+
metadata: DataRequestMetadata::default(),
47+
};
48+
49+
let response: DataResponse<DecimalSymbolsV1> =
50+
icu_decimal::provider::Baked.load(request).unwrap();
51+
52+
response.payload.get().grouping_separator().to_string()
53+
}
54+
55+
/// Return the grouping separator from the language we're working with.
56+
/// Example:
57+
/// Say we need to format 1,000
58+
/// en_US: 1,000 -> grouping separator is ','
59+
/// fr_FR: 1 000 -> grouping separator is '\u{202f}'
60+
pub fn locale_grouping_separator() -> &'static str {
61+
static GROUPING_SEP: OnceLock<String> = OnceLock::new();
62+
63+
GROUPING_SEP.get_or_init(|| get_grouping_separator(get_numeric_locale().0.clone()))
64+
}
65+
4066
#[cfg(test)]
4167
mod tests {
4268
use icu_locale::locale;
4369

44-
use super::get_decimal_separator;
70+
use super::{get_decimal_separator, get_grouping_separator};
4571

4672
#[test]
47-
fn test_simple_separator() {
73+
fn test_simple_decimal_separator() {
4874
assert_eq!(get_decimal_separator(locale!("en")), ".");
4975
assert_eq!(get_decimal_separator(locale!("fr")), ",");
5076
}
77+
78+
#[test]
79+
fn test_simple_grouping_separator() {
80+
assert_eq!(get_grouping_separator(locale!("en")), ",");
81+
assert_eq!(get_grouping_separator(locale!("fr")), "\u{202f}");
82+
}
5183
}

tests/by-util/test_sort.rs

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,14 @@ fn test_multiple_decimals_numeric() {
258258
);
259259
}
260260

261+
#[test]
262+
fn test_multiple_groupings_numeric() {
263+
test_helper(
264+
"multiple_groupings_numeric",
265+
&["-n", "--numeric-sort", "--sort=numeric", "--sort=n"],
266+
);
267+
}
268+
261269
#[test]
262270
fn test_numeric_with_trailing_invalid_chars() {
263271
test_helper(
@@ -2359,18 +2367,18 @@ _
23592367
__
23602368
1
23612369
_
2362-
2,5
2363-
_
23642370
2.4
23652371
___
2372+
2,5
2373+
_
23662374
2.,,3
23672375
__
23682376
2.4
23692377
___
2370-
2,,3
2371-
_
23722378
2.4
23732379
___
2380+
2,,3
2381+
_
23742382
1a
23752383
_
23762384
2b

tests/fixtures/sort/mixed_floats_ints_chars_numeric.expected

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@ CARAvan
2121
8.013
2222
45
2323
46.89
24-
576,446.88800000
25-
576,446.890
2624
4567.
2725
37800
26+
576,446.88800000
27+
576,446.890
2828
4798908.340000000000
2929
4798908.45
3030
4798908.8909800

tests/fixtures/sort/mixed_floats_ints_chars_numeric.expected.debug

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -67,18 +67,18 @@ __
6767
46.89
6868
_____
6969
_____
70-
576,446.88800000
71-
___
72-
________________
73-
576,446.890
74-
___
75-
___________
7670
4567.
7771
_____
7872
____________________
7973
>>>>37800
8074
_____
8175
_________
76+
576,446.88800000
77+
___
78+
________________
79+
576,446.890
80+
___
81+
___________
8282
4798908.340000000000
8383
____________________
8484
____________________

tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.expected

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@ CARAvan
2424
8.013
2525
45
2626
46.89
27-
576,446.890
28-
576,446.88800000
2927
4567.
3028
37800
29+
576,446.88800000
30+
576,446.890
3131
4798908.340000000000
3232
4798908.45
3333
4798908.8909800

tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.expected.debug

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,14 +50,14 @@ _____
5050
__
5151
46.89
5252
_____
53-
576,446.890
54-
___
55-
576,446.88800000
56-
___
5753
4567.
5854
_____
5955
>>>>37800
6056
_____
57+
576,446.88800000
58+
___
59+
576,446.890
60+
___
6161
4798908.340000000000
6262
____________________
6363
4798908.45

tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.expected

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,10 @@
1111
8.013
1212
45
1313
46.89
14-
576,446.890
1514
4567.
1615
37800
16+
576,446.88800000
17+
576,446.890
1718
4798908.340000000000
1819
4798908.45
1920
4798908.8909800

tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.expected.debug

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,14 @@ _____
2424
__
2525
46.89
2626
_____
27-
576,446.890
28-
___
2927
4567.
3028
_____
3129
>>>>37800
3230
_____
31+
576,446.88800000
32+
___
33+
576,446.890
34+
___
3335
4798908.340000000000
3436
____________________
3537
4798908.45

tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.expected

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
4798908.8909800
22
4798908.45
33
4798908.340000000000
4+
576,446.890
5+
576,446.88800000
46
37800
57
4567.
6-
576,446.890
78
46.89
89
45
910
8.013

0 commit comments

Comments
 (0)