Skip to content

Commit b5e0e23

Browse files
authored
Merge pull request #7801 from drinkcat/ls-opt-1
ls: Print optimizations
2 parents 993644c + 3de90b9 commit b5e0e23

File tree

2 files changed

+119
-109
lines changed

2 files changed

+119
-109
lines changed

src/uu/ls/BENCHMARKING.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
# Benchmarking ls
22

33
ls majorly involves fetching a lot of details (depending upon what details are requested, eg. time/date, inode details, etc) for each path using system calls. Ideally, any system call should be done only once for each of the paths - not adhering to this principle leads to a lot of system call overhead multiplying and bubbling up, especially for recursive ls, therefore it is important to always benchmark multiple scenarios.
4+
5+
ls _also_ prints a lot of information, so optimizing formatting operations is also critical:
6+
- Try to avoid using `format` unless required.
7+
- Try to avoid repeated string copies unless necessary.
8+
- If a temporary buffer is required, try to allocate a reasonable capacity to start with to avoid repeated reallocations.
9+
- Some values might be expensive to compute (e.g. current line width), but are only required in some cases. Consider delaying computations, e.g. by wrapping the evaluation in a LazyCell.
10+
411
This is an overview over what was benchmarked, and if you make changes to `ls`, you are encouraged to check
512
how performance was affected for the workloads listed below. Feel free to add other workloads to the
613
list that we should improve / make sure not to regress.

src/uu/ls/src/ls.rs

Lines changed: 112 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,10 @@
55

66
// spell-checker:ignore (ToDO) somegroup nlink tabsize dired subdired dtype colorterm stringly
77

8+
use std::iter;
89
#[cfg(windows)]
910
use std::os::windows::fs::MetadataExt;
10-
use std::{cell::OnceCell, num::IntErrorKind};
11+
use std::{cell::LazyCell, cell::OnceCell, num::IntErrorKind};
1112
use std::{
1213
cmp::Reverse,
1314
ffi::{OsStr, OsString},
@@ -2420,12 +2421,33 @@ fn display_dir_entry_size(
24202421
}
24212422
}
24222423

2423-
fn pad_left(string: &str, count: usize) -> String {
2424-
format!("{string:>count$}")
2424+
// A simple, performant, ExtendPad trait to add a string to a Vec<u8>, padding with spaces
2425+
// on the left or right, without making additional copies, or using formatting functions.
2426+
trait ExtendPad {
2427+
fn extend_pad_left(&mut self, string: &str, count: usize);
2428+
fn extend_pad_right(&mut self, string: &str, count: usize);
24252429
}
24262430

2427-
fn pad_right(string: &str, count: usize) -> String {
2428-
format!("{string:<count$}")
2431+
impl ExtendPad for Vec<u8> {
2432+
fn extend_pad_left(&mut self, string: &str, count: usize) {
2433+
if string.len() < count {
2434+
self.extend(iter::repeat_n(b' ', count - string.len()));
2435+
}
2436+
self.extend(string.as_bytes());
2437+
}
2438+
2439+
fn extend_pad_right(&mut self, string: &str, count: usize) {
2440+
self.extend(string.as_bytes());
2441+
if string.len() < count {
2442+
self.extend(iter::repeat_n(b' ', count - string.len()));
2443+
}
2444+
}
2445+
}
2446+
2447+
// TODO: Consider converting callers to use ExtendPad instead, as it avoids
2448+
// additional copies.
2449+
fn pad_left(string: &str, count: usize) -> String {
2450+
format!("{string:>count$}")
24292451
}
24302452

24312453
fn return_total(
@@ -2555,8 +2577,15 @@ fn display_items(
25552577
// whether text will wrap or not, because when format is grid or
25562578
// column ls will try to place the item name in a new line if it
25572579
// wraps.
2558-
let cell =
2559-
display_item_name(i, config, prefix_context, more_info, out, style_manager, 0);
2580+
let cell = display_item_name(
2581+
i,
2582+
config,
2583+
prefix_context,
2584+
more_info,
2585+
out,
2586+
style_manager,
2587+
LazyCell::new(Box::new(|| 0)),
2588+
);
25602589

25612590
names_vec.push(cell);
25622591
}
@@ -2760,11 +2789,11 @@ fn display_item_long(
27602789
style_manager: &mut Option<StyleManager>,
27612790
quoted: bool,
27622791
) -> UResult<()> {
2763-
let mut output_display: Vec<u8> = vec![];
2792+
let mut output_display: Vec<u8> = Vec::with_capacity(128);
27642793

27652794
// apply normal color to non filename outputs
27662795
if let Some(style_manager) = style_manager {
2767-
write!(output_display, "{}", style_manager.apply_normal()).unwrap();
2796+
output_display.extend(style_manager.apply_normal().as_bytes());
27682797
}
27692798
if config.dired {
27702799
output_display.extend(b" ");
@@ -2775,93 +2804,71 @@ fn display_item_long(
27752804
let is_acl_set = false;
27762805
#[cfg(all(unix, not(any(target_os = "android", target_os = "macos"))))]
27772806
let is_acl_set = has_acl(item.display_name.as_os_str());
2778-
write!(
2779-
output_display,
2780-
"{}{} {}",
2781-
display_permissions(md, true),
2782-
if item.security_context.len() > 1 {
2783-
// GNU `ls` uses a "." character to indicate a file with a security context,
2784-
// but not other alternate access method.
2785-
"."
2786-
} else if is_acl_set {
2787-
"+"
2788-
} else {
2789-
""
2790-
},
2791-
pad_left(&display_symlink_count(md), padding.link_count)
2792-
)
2793-
.unwrap();
2807+
output_display.extend(display_permissions(md, true).as_bytes());
2808+
if item.security_context.len() > 1 {
2809+
// GNU `ls` uses a "." character to indicate a file with a security context,
2810+
// but not other alternate access method.
2811+
output_display.extend(b".");
2812+
} else if is_acl_set {
2813+
output_display.extend(b"+");
2814+
}
2815+
output_display.extend(b" ");
2816+
output_display.extend_pad_left(&display_symlink_count(md), padding.link_count);
27942817

27952818
if config.long.owner {
2796-
write!(
2797-
output_display,
2798-
" {}",
2799-
pad_right(&display_uname(md, config), padding.uname)
2800-
)
2801-
.unwrap();
2819+
output_display.extend(b" ");
2820+
output_display.extend_pad_right(&display_uname(md, config), padding.uname);
28022821
}
28032822

28042823
if config.long.group {
2805-
write!(
2806-
output_display,
2807-
" {}",
2808-
pad_right(&display_group(md, config), padding.group)
2809-
)
2810-
.unwrap();
2824+
output_display.extend(b" ");
2825+
output_display.extend_pad_right(&display_group(md, config), padding.group);
28112826
}
28122827

28132828
if config.context {
2814-
write!(
2815-
output_display,
2816-
" {}",
2817-
pad_right(&item.security_context, padding.context)
2818-
)
2819-
.unwrap();
2829+
output_display.extend(b" ");
2830+
output_display.extend_pad_right(&item.security_context, padding.context);
28202831
}
28212832

28222833
// Author is only different from owner on GNU/Hurd, so we reuse
28232834
// the owner, since GNU/Hurd is not currently supported by Rust.
28242835
if config.long.author {
2825-
write!(
2826-
output_display,
2827-
" {}",
2828-
pad_right(&display_uname(md, config), padding.uname)
2829-
)
2830-
.unwrap();
2836+
output_display.extend(b" ");
2837+
output_display.extend_pad_right(&display_uname(md, config), padding.uname);
28312838
}
28322839

28332840
match display_len_or_rdev(md, config) {
28342841
SizeOrDeviceId::Size(size) => {
2835-
write!(output_display, " {}", pad_left(&size, padding.size)).unwrap();
2842+
output_display.extend(b" ");
2843+
output_display.extend_pad_left(&size, padding.size);
28362844
}
28372845
SizeOrDeviceId::Device(major, minor) => {
2838-
write!(
2839-
output_display,
2840-
" {}, {}",
2841-
pad_left(
2842-
&major,
2843-
#[cfg(not(unix))]
2844-
0usize,
2845-
#[cfg(unix)]
2846-
padding.major.max(
2847-
padding
2848-
.size
2849-
.saturating_sub(padding.minor.saturating_add(2usize))
2850-
),
2846+
output_display.extend(b" ");
2847+
output_display.extend_pad_left(
2848+
&major,
2849+
#[cfg(not(unix))]
2850+
0usize,
2851+
#[cfg(unix)]
2852+
padding.major.max(
2853+
padding
2854+
.size
2855+
.saturating_sub(padding.minor.saturating_add(2usize)),
28512856
),
2852-
pad_left(
2853-
&minor,
2854-
#[cfg(not(unix))]
2855-
0usize,
2856-
#[cfg(unix)]
2857-
padding.minor,
2858-
),
2859-
)
2860-
.unwrap();
2857+
);
2858+
output_display.extend(b", ");
2859+
output_display.extend_pad_left(
2860+
&minor,
2861+
#[cfg(not(unix))]
2862+
0usize,
2863+
#[cfg(unix)]
2864+
padding.minor,
2865+
);
28612866
}
28622867
};
28632868

2864-
write!(output_display, " {} ", display_date(md, config)).unwrap();
2869+
output_display.extend(b" ");
2870+
output_display.extend(display_date(md, config).as_bytes());
2871+
output_display.extend(b" ");
28652872

28662873
let item_name = display_item_name(
28672874
item,
@@ -2870,7 +2877,9 @@ fn display_item_long(
28702877
String::new(),
28712878
out,
28722879
style_manager,
2873-
ansi_width(&String::from_utf8_lossy(&output_display)),
2880+
LazyCell::new(Box::new(|| {
2881+
ansi_width(&String::from_utf8_lossy(&output_display))
2882+
})),
28742883
);
28752884

28762885
let displayed_item = if quoted && !os_str_starts_with(&item_name, b"'") {
@@ -2890,7 +2899,7 @@ fn display_item_long(
28902899
dired::update_positions(dired, start, end);
28912900
}
28922901
write_os_str(&mut output_display, &displayed_item)?;
2893-
write!(output_display, "{}", config.line_ending)?;
2902+
output_display.extend(config.line_ending.to_string().as_bytes());
28942903
} else {
28952904
#[cfg(unix)]
28962905
let leading_char = {
@@ -2925,42 +2934,36 @@ fn display_item_long(
29252934
}
29262935
};
29272936

2928-
write!(
2929-
output_display,
2930-
"{}{} {}",
2931-
format_args!("{leading_char}?????????"),
2932-
if item.security_context.len() > 1 {
2933-
// GNU `ls` uses a "." character to indicate a file with a security context,
2934-
// but not other alternate access method.
2935-
"."
2936-
} else {
2937-
""
2938-
},
2939-
pad_left("?", padding.link_count)
2940-
)
2941-
.unwrap();
2937+
output_display.extend(leading_char.as_bytes());
2938+
output_display.extend(b"?????????");
2939+
if item.security_context.len() > 1 {
2940+
// GNU `ls` uses a "." character to indicate a file with a security context,
2941+
// but not other alternate access method.
2942+
output_display.extend(b".");
2943+
}
2944+
output_display.extend(b" ");
2945+
output_display.extend_pad_left("?", padding.link_count);
29422946

29432947
if config.long.owner {
2944-
write!(output_display, " {}", pad_right("?", padding.uname)).unwrap();
2948+
output_display.extend(b" ");
2949+
output_display.extend_pad_right("?", padding.uname);
29452950
}
29462951

29472952
if config.long.group {
2948-
write!(output_display, " {}", pad_right("?", padding.group)).unwrap();
2953+
output_display.extend(b" ");
2954+
output_display.extend_pad_right("?", padding.group);
29492955
}
29502956

29512957
if config.context {
2952-
write!(
2953-
output_display,
2954-
" {}",
2955-
pad_right(&item.security_context, padding.context)
2956-
)
2957-
.unwrap();
2958+
output_display.extend(b" ");
2959+
output_display.extend_pad_right(&item.security_context, padding.context);
29582960
}
29592961

29602962
// Author is only different from owner on GNU/Hurd, so we reuse
29612963
// the owner, since GNU/Hurd is not currently supported by Rust.
29622964
if config.long.author {
2963-
write!(output_display, " {}", pad_right("?", padding.uname)).unwrap();
2965+
output_display.extend(b" ");
2966+
output_display.extend_pad_right("?", padding.uname);
29642967
}
29652968

29662969
let displayed_item = display_item_name(
@@ -2970,17 +2973,17 @@ fn display_item_long(
29702973
String::new(),
29712974
out,
29722975
style_manager,
2973-
ansi_width(&String::from_utf8_lossy(&output_display)),
2976+
LazyCell::new(Box::new(|| {
2977+
ansi_width(&String::from_utf8_lossy(&output_display))
2978+
})),
29742979
);
29752980
let date_len = 12;
29762981

2977-
write!(
2978-
output_display,
2979-
" {} {} ",
2980-
pad_left("?", padding.size),
2981-
pad_left("?", date_len),
2982-
)
2983-
.unwrap();
2982+
output_display.extend(b" ");
2983+
output_display.extend_pad_left("?", padding.size);
2984+
output_display.extend(b" ");
2985+
output_display.extend_pad_left("?", date_len);
2986+
output_display.extend(b" ");
29842987

29852988
if config.dired {
29862989
dired::calculate_and_update_positions(
@@ -2990,7 +2993,7 @@ fn display_item_long(
29902993
);
29912994
}
29922995
write_os_str(&mut output_display, &displayed_item)?;
2993-
write!(output_display, "{}", config.line_ending)?;
2996+
output_display.extend(config.line_ending.to_string().as_bytes());
29942997
}
29952998
out.write_all(&output_display)?;
29962999

@@ -3206,13 +3209,13 @@ fn display_item_name(
32063209
more_info: String,
32073210
out: &mut BufWriter<Stdout>,
32083211
style_manager: &mut Option<StyleManager>,
3209-
current_column: usize,
3212+
current_column: LazyCell<usize, Box<dyn FnOnce() -> usize + '_>>,
32103213
) -> OsString {
32113214
// This is our return value. We start by `&path.display_name` and modify it along the way.
32123215
let mut name = escape_name(&path.display_name, &config.quoting_style);
32133216

32143217
let is_wrap =
3215-
|namelen: usize| config.width != 0 && current_column + namelen > config.width.into();
3218+
|namelen: usize| config.width != 0 && *current_column + namelen > config.width.into();
32163219

32173220
if config.hyperlink {
32183221
name = create_hyperlink(&name, path);

0 commit comments

Comments
 (0)