diff --git a/src/uu/tac/src/tac.rs b/src/uu/tac/src/tac.rs index 50e615c988b..144e0e251e8 100644 --- a/src/uu/tac/src/tac.rs +++ b/src/uu/tac/src/tac.rs @@ -352,25 +352,22 @@ fn tac(filenames: &[OsString], before: bool, regex: bool, separator: &OsStr) -> set_exit_code(1); continue; } - if let Some(mmap1) = try_mmap_stdin() { - mmap = mmap1; - &mmap - } else { - // Copy stdin to a temp file (respects TMPDIR), then mmap it. - // Falls back to Vec buffer if temp file creation fails (e.g., bad TMPDIR). - match buffer_stdin() { - Ok(StdinData::Mmap(mmap1)) => { - mmap = mmap1; - &mmap - } - Ok(StdinData::Vec(buf1)) => { - buf = buf1; - &buf - } - Err(e) => { - show!(TacError::ReadError(OsString::from("stdin"), e)); - continue; - } + // Spool stdin to a temp file and mmap that (buffer_stdin explains + // why mapping the temp file is sound). Mapping the raw stdin fd + // would expose `tac < file` to the same truncation SIGBUS as #9748, + // and the temp file also bounds memory for huge stdin (#10094). + match buffer_stdin() { + Ok(StdinData::Mmap(mmap1)) => { + mmap = mmap1; + &mmap + } + Ok(StdinData::Vec(buf1)) => { + buf = buf1; + &buf + } + Err(e) => { + show!(TacError::ReadError(OsString::from("stdin"), e)); + continue; } } } else { @@ -383,20 +380,20 @@ fn tac(filenames: &[OsString], before: bool, regex: bool, separator: &OsStr) -> } }; - if let Some(mmap1) = try_mmap_file(&file) { - mmap = mmap1; - &mmap - } else { - let mut contents = Vec::new(); - match file.read_to_end(&mut contents) { - Ok(_) => { - buf = contents; - &buf - } - Err(e) => { - show!(TacError::ReadError(filename.clone(), e)); - continue; - } + // Read the file into memory rather than memory-mapping it: a + // concurrent truncation of a mapped file raises SIGBUS and kills the + // process (e.g. during log rotation; #9748). This holds the whole + // file in memory, unlike the stdin path; reading seekable files + // backwards in blocks could bound memory without copying (future work). + let mut contents = Vec::new(); + match file.read_to_end(&mut contents) { + Ok(_) => { + buf = contents; + &buf + } + Err(e) => { + show!(TacError::ReadError(filename.clone(), e)); + continue; } } }; @@ -416,16 +413,6 @@ fn tac(filenames: &[OsString], before: bool, regex: bool, separator: &OsStr) -> Ok(()) } -fn try_mmap_stdin() -> Option { - // SAFETY: If the file is truncated while we map it, SIGBUS will be raised - // and our process will be terminated, thus preventing access of invalid memory. - let mmap = unsafe { Mmap::map(&stdin()).ok()? }; - // On Windows, mmap on a pipe handle can "succeed" but return 0 bytes - // (the file size of a pipe is reported as 0). When that happens, return - // None so we fall through to buffer_stdin() which reads the pipe properly. - if mmap.is_empty() { None } else { Some(mmap) } -} - enum StdinData { Mmap(Mmap), Vec(Vec), @@ -438,8 +425,10 @@ fn buffer_stdin() -> std::io::Result { if let Ok(mut tmp) = tempfile::tempfile() { // Temp file created - copy stdin to it, then read back copy(&mut stdin(), &mut tmp)?; - // SAFETY: If the file is truncated while we map it, SIGBUS will be raised - // and our process will be terminated, thus preventing access of invalid memory. + // SAFETY: `tmp` is an unlinked file owned by this process, so no other + // process can open and truncate it. The mapping therefore stays valid + // for its whole lifetime and cannot trigger SIGBUS (unlike mapping a + // caller-provided file; see #9748). let mmap = unsafe { Mmap::map(&tmp)? }; Ok(StdinData::Mmap(mmap)) } else { @@ -450,12 +439,6 @@ fn buffer_stdin() -> std::io::Result { } } -fn try_mmap_file(file: &File) -> Option { - // SAFETY: If the file is truncated while we map it, SIGBUS will be raised - // and our process will be terminated, thus preventing access of invalid memory. - unsafe { Mmap::map(file).ok() } -} - #[cfg(test)] mod tests_hybrid_flavor { use super::translate_regex_flavor; diff --git a/tests/by-util/test_tac.rs b/tests/by-util/test_tac.rs index e839311ac24..4230a4a8971 100644 --- a/tests/by-util/test_tac.rs +++ b/tests/by-util/test_tac.rs @@ -2,8 +2,8 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore axxbxx bxxaxx axxx axxxx xxaxx xxax xxxxa axyz zyax zyxa bbaaa aaabc bcdddd cddddaaabc xyzabc abcxyzabc nbbaaa EISDIR -#[cfg(target_os = "linux")] +// spell-checker:ignore axxbxx bxxaxx axxx axxxx xxaxx xxax xxxxa axyz zyax zyxa bbaaa aaabc bcdddd cddddaaabc xyzabc abcxyzabc nbbaaa EISDIR SIGBUS mmap +#[cfg(unix)] use uutests::at_and_ucmd; use uutests::new_ucmd; use uutests::util::TestScenario; @@ -475,3 +475,80 @@ fn test_regular_end_anchor() { .succeeds() .stdout_is("\nccc\nbbaaa\nb"); } + +/// Regression test for . +/// +/// `tac` used to mmap regular files, so truncating a file mid-read raised +/// SIGBUS and killed the process. It now reads files into memory up front, so a +/// concurrent truncation can no longer crash it. The assertion only checks that +/// no signal killed `tac`, so it is stable regardless of how the race lands. +#[test] +#[cfg(unix)] +fn test_tac_file_truncated_during_read_does_not_crash() { + use std::fs::OpenOptions; + use std::time::Duration; + + let (at, mut ucmd) = at_and_ucmd!(); + + // A large sparse file, so the read overlaps with the truncation below. + // `set_len` keeps it sparse, so creation stays cheap. + let name = "input"; + at.make_file(name).set_len(64 * 1024 * 1024).unwrap(); + + let child = ucmd.arg(name).run_no_wait(); + + // Give tac a moment to start reading, then truncate the file out from + // under it. + std::thread::sleep(Duration::from_millis(2)); + OpenOptions::new() + .write(true) + .open(at.plus(name)) + .unwrap() + .set_len(0) + .unwrap(); + + let result = child.wait().unwrap(); + assert!( + result.signal().is_none(), + "tac was killed by signal {:?} (SIGBUS regression, see #9748)", + result.signal() + ); +} + +/// Companion to the test above for the `tac < file` path. `tac` used to mmap the +/// raw stdin fd, exposing a redirected file to the same SIGBUS-on-truncation +/// race; it now copies stdin to an unlinked temp file before mapping. +#[test] +#[cfg(unix)] +fn test_tac_stdin_redirected_file_truncated_during_read_does_not_crash() { + use std::fs::{File, OpenOptions}; + use std::time::Duration; + + let (at, mut ucmd) = at_and_ucmd!(); + + // A large sparse file, so the read overlaps with the truncation below. + let name = "input"; + at.make_file(name).set_len(64 * 1024 * 1024).unwrap(); + + // Redirect the regular file in as stdin (`tac < input`). + let child = ucmd + .set_stdin(File::open(at.plus(name)).unwrap()) + .run_no_wait(); + + // Give tac a moment to start reading, then truncate the file out from + // under it. + std::thread::sleep(Duration::from_millis(2)); + OpenOptions::new() + .write(true) + .open(at.plus(name)) + .unwrap() + .set_len(0) + .unwrap(); + + let result = child.wait().unwrap(); + assert!( + result.signal().is_none(), + "tac was killed by signal {:?} (SIGBUS regression, see #9748)", + result.signal() + ); +}