From a8e339a7b088e7cc4db30f560616af5533dd3f30 Mon Sep 17 00:00:00 2001 From: DaniPopes <57450786+DaniPopes@users.noreply.github.com> Date: Sun, 29 Mar 2026 21:31:50 +0200 Subject: [PATCH 1/4] feat: search LLVM ORC JIT jitdump paths The jitdump search was hardcoded to /tmp/jit-{pid}.dump, which misses jitdumps from LLVM ORC JIT (e.g. revmc, Julia) that write to {JITDUMPDIR|HOME}/.debug/jit/*/jit-{pid}.dump per JITLoaderPerf.cpp. --- src/executor/wall_time/perf/jit_dump.rs | 84 ++++++++++++++++++------- 1 file changed, 62 insertions(+), 22 deletions(-) diff --git a/src/executor/wall_time/perf/jit_dump.rs b/src/executor/wall_time/perf/jit_dump.rs index f306de08..c8502d3a 100644 --- a/src/executor/wall_time/perf/jit_dump.rs +++ b/src/executor/wall_time/perf/jit_dump.rs @@ -108,6 +108,43 @@ impl JitDump { } } +/// Finds all jitdump file paths for a given PID. +/// +/// Searches in order: +/// 1. `/tmp/jit-{pid}.dump` (standard perf jitdump location) +/// 2. `{jitdumpdir}/.debug/jit/*/jit-{pid}.dump` where `jitdumpdir` is `$JITDUMPDIR` or `$HOME` +/// (LLVM ORC JIT / PerfSupportPlugin location) +fn find_jit_dumps_for_pid(pid: libc::pid_t) -> Vec { + let name = format!("jit-{pid}.dump"); + let mut paths = Vec::new(); + + // Standard perf location. + let tmp_path = PathBuf::from("/tmp").join(&name); + if tmp_path.exists() { + paths.push(tmp_path); + } + + // LLVM ORC JIT location: {base}/.debug/jit/*/jit-{pid}.dump + // See LLVM's JITLoaderPerf.cpp for the path construction logic. + let base_dir = std::env::var("JITDUMPDIR") + .or_else(|_| std::env::var("HOME")) + .ok() + .map(PathBuf::from); + if let Some(base) = base_dir { + let jit_dir = base.join(".debug/jit"); + if let Ok(entries) = std::fs::read_dir(&jit_dir) { + for entry in entries.filter_map(|e| e.ok()) { + let candidate = entry.path().join(&name); + if candidate.exists() { + paths.push(candidate); + } + } + } + } + + paths +} + /// Converts all the `jit-.dump` into a perf-.map with symbols, and collects the unwind data /// /// # Symbols @@ -123,34 +160,37 @@ pub async fn save_symbols_and_harvest_unwind_data_for_pids( let mut jit_unwind_data_by_path = HashMap::new(); for pid in pids { - let name = format!("jit-{pid}.dump"); - let path = PathBuf::from("/tmp").join(&name); - - if !path.exists() { + let paths = find_jit_dumps_for_pid(*pid); + if paths.is_empty() { continue; } - debug!("Found JIT dump file: {path:?}"); - let symbols = match JitDump::new(path.clone()).into_perf_map() { - Ok(symbols) => symbols, - Err(error) => { - warn!("Failed to convert jit dump into perf map: {error:?}"); - continue; - } - }; + for path in paths { + debug!("Found JIT dump file: {path:?}"); - // Also write to perf-.map for harvested Python perf maps compatibility - symbols.append_to_file(profile_folder.join(format!("perf-{pid}.map")))?; + let symbols = match JitDump::new(path.clone()).into_perf_map() { + Ok(symbols) => symbols, + Err(error) => { + warn!("Failed to convert jit dump into perf map: {error:?}"); + continue; + } + }; - let jit_unwind_data = match JitDump::new(path).into_unwind_data() { - Ok(data) => data, - Err(error) => { - warn!("Failed to convert jit dump into unwind data: {error:?}"); - continue; - } - }; + symbols.append_to_file(profile_folder.join(format!("perf-{pid}.map")))?; - jit_unwind_data_by_path.insert(*pid, jit_unwind_data); + let jit_unwind_data = match JitDump::new(path).into_unwind_data() { + Ok(data) => data, + Err(error) => { + warn!("Failed to convert jit dump into unwind data: {error:?}"); + continue; + } + }; + + jit_unwind_data_by_path + .entry(*pid) + .or_insert_with(Vec::new) + .extend(jit_unwind_data); + } } Ok(jit_unwind_data_by_path) From eaf949b810a7b1c95589ed334025e601d53e9486 Mon Sep 17 00:00:00 2001 From: DaniPopes <57450786+DaniPopes@users.noreply.github.com> Date: Sun, 29 Mar 2026 22:12:30 +0200 Subject: [PATCH 2/4] refactor: discover jitdump paths from MMAP2 records Instead of searching hardcoded filesystem paths, extract jitdump file paths from MMAP2 records in the perf data. This is how perf inject finds them and works for any jitdump location. --- src/executor/wall_time/perf/jit_dump.rs | 59 ++++--------------- src/executor/wall_time/perf/mod.rs | 17 +++--- .../wall_time/perf/parse_perf_file.rs | 19 ++++++ 3 files changed, 39 insertions(+), 56 deletions(-) diff --git a/src/executor/wall_time/perf/jit_dump.rs b/src/executor/wall_time/perf/jit_dump.rs index c8502d3a..27385006 100644 --- a/src/executor/wall_time/perf/jit_dump.rs +++ b/src/executor/wall_time/perf/jit_dump.rs @@ -5,7 +5,7 @@ use crate::{ use linux_perf_data::jitdump::{JitDumpReader, JitDumpRecord}; use runner_shared::unwind_data::{ProcessUnwindData, UnwindData}; use std::{ - collections::{HashMap, HashSet}, + collections::HashMap, path::{Path, PathBuf}, }; @@ -108,44 +108,10 @@ impl JitDump { } } -/// Finds all jitdump file paths for a given PID. +/// Converts all the `jit-.dump` into a perf-.map with symbols, and collects the unwind data. /// -/// Searches in order: -/// 1. `/tmp/jit-{pid}.dump` (standard perf jitdump location) -/// 2. `{jitdumpdir}/.debug/jit/*/jit-{pid}.dump` where `jitdumpdir` is `$JITDUMPDIR` or `$HOME` -/// (LLVM ORC JIT / PerfSupportPlugin location) -fn find_jit_dumps_for_pid(pid: libc::pid_t) -> Vec { - let name = format!("jit-{pid}.dump"); - let mut paths = Vec::new(); - - // Standard perf location. - let tmp_path = PathBuf::from("/tmp").join(&name); - if tmp_path.exists() { - paths.push(tmp_path); - } - - // LLVM ORC JIT location: {base}/.debug/jit/*/jit-{pid}.dump - // See LLVM's JITLoaderPerf.cpp for the path construction logic. - let base_dir = std::env::var("JITDUMPDIR") - .or_else(|_| std::env::var("HOME")) - .ok() - .map(PathBuf::from); - if let Some(base) = base_dir { - let jit_dir = base.join(".debug/jit"); - if let Ok(entries) = std::fs::read_dir(&jit_dir) { - for entry in entries.filter_map(|e| e.ok()) { - let candidate = entry.path().join(&name); - if candidate.exists() { - paths.push(candidate); - } - } - } - } - - paths -} - -/// Converts all the `jit-.dump` into a perf-.map with symbols, and collects the unwind data +/// Jitdump file paths are discovered from MMAP2 records in the perf data, since JIT runtimes +/// mmap the jitdump file and perf records the mapping with the actual path on disk. /// /// # Symbols /// Since a jit dump is by definition specific to a single pid, we append the harvested symbols @@ -155,16 +121,11 @@ fn find_jit_dumps_for_pid(pid: libc::pid_t) -> Vec { /// Unwind data is generated as a list pub async fn save_symbols_and_harvest_unwind_data_for_pids( profile_folder: &Path, - pids: &HashSet, + jit_dump_paths_by_pid: &HashMap>, ) -> Result>> { - let mut jit_unwind_data_by_path = HashMap::new(); - - for pid in pids { - let paths = find_jit_dumps_for_pid(*pid); - if paths.is_empty() { - continue; - } + let mut jit_unwind_data_by_pid = HashMap::new(); + for (pid, paths) in jit_dump_paths_by_pid { for path in paths { debug!("Found JIT dump file: {path:?}"); @@ -178,7 +139,7 @@ pub async fn save_symbols_and_harvest_unwind_data_for_pids( symbols.append_to_file(profile_folder.join(format!("perf-{pid}.map")))?; - let jit_unwind_data = match JitDump::new(path).into_unwind_data() { + let jit_unwind_data = match JitDump::new(path.clone()).into_unwind_data() { Ok(data) => data, Err(error) => { warn!("Failed to convert jit dump into unwind data: {error:?}"); @@ -186,12 +147,12 @@ pub async fn save_symbols_and_harvest_unwind_data_for_pids( } }; - jit_unwind_data_by_path + jit_unwind_data_by_pid .entry(*pid) .or_insert_with(Vec::new) .extend(jit_unwind_data); } } - Ok(jit_unwind_data_by_path) + Ok(jit_unwind_data_by_pid) } diff --git a/src/executor/wall_time/perf/mod.rs b/src/executor/wall_time/perf/mod.rs index c854d49a..ab8674ae 100644 --- a/src/executor/wall_time/perf/mod.rs +++ b/src/executor/wall_time/perf/mod.rs @@ -300,6 +300,7 @@ impl BenchmarkData { let MemmapRecordsOutput { loaded_modules_by_path, tracked_pids, + jit_dump_paths_by_pid, } = { parse_perf_file::parse_for_memmap2(perf_file_path, pid_filter).map_err(|e| { error!("Failed to parse perf file: {e}"); @@ -317,13 +318,15 @@ impl BenchmarkData { error!("Failed to harvest perf maps: {e}"); BenchmarkDataSaveError::FailedToHarvestPerfMaps })?; - let jit_unwind_data_by_pid = - jit_dump::save_symbols_and_harvest_unwind_data_for_pids(path_ref, &tracked_pids) - .await - .map_err(|e| { - error!("Failed to harvest jit dumps: {e}"); - BenchmarkDataSaveError::FailedToHarvestJitDumps - })?; + let jit_unwind_data_by_pid = jit_dump::save_symbols_and_harvest_unwind_data_for_pids( + path_ref, + &jit_dump_paths_by_pid, + ) + .await + .map_err(|e| { + error!("Failed to harvest jit dumps: {e}"); + BenchmarkDataSaveError::FailedToHarvestJitDumps + })?; let artifacts = save_artifacts::save_artifacts( path_ref, diff --git a/src/executor/wall_time/perf/parse_perf_file.rs b/src/executor/wall_time/perf/parse_perf_file.rs index 9f5b0fd8..2378167d 100644 --- a/src/executor/wall_time/perf/parse_perf_file.rs +++ b/src/executor/wall_time/perf/parse_perf_file.rs @@ -42,6 +42,8 @@ pub struct MemmapRecordsOutput { /// Module symbols and the computed load bias for each pid that maps the ELF path. pub loaded_modules_by_path: HashMap, pub tracked_pids: HashSet, + /// Jitdump file paths discovered from MMAP2 records, keyed by PID. + pub jit_dump_paths_by_pid: HashMap>, } /// Parse the perf file at `perf_file_path` and look for MMAP2 records for the given `pids`. @@ -53,6 +55,7 @@ pub fn parse_for_memmap2>( mut pid_filter: PidFilter, ) -> Result { let mut loaded_modules_by_path = HashMap::::new(); + let mut jit_dump_paths_by_pid = HashMap::>::new(); // 1MiB buffer let reader = std::io::BufReader::with_capacity( @@ -105,6 +108,21 @@ pub fn parse_for_memmap2>( continue; } + // Collect jitdump file paths before the PROT_EXEC filter in process_mmap2_record + // skips them. JIT runtimes mmap the jitdump file so perf records it. + if mmap2_record.path.as_slice().ends_with(b".dump") { + let path = PathBuf::from( + String::from_utf8_lossy(&mmap2_record.path.as_slice()).into_owned(), + ); + if path.exists() { + debug!("Found jitdump path from MMAP2 record: {path:?}"); + jit_dump_paths_by_pid + .entry(mmap2_record.pid) + .or_default() + .push(path); + } + } + process_mmap2_record(mmap2_record, &mut loaded_modules_by_path); } _ => continue, @@ -123,6 +141,7 @@ pub fn parse_for_memmap2>( Ok(MemmapRecordsOutput { loaded_modules_by_path, tracked_pids, + jit_dump_paths_by_pid, }) } From fc2b18e91c3059253eac70fcf9ffda40fc058197 Mon Sep 17 00:00:00 2001 From: DaniPopes <57450786+DaniPopes@users.noreply.github.com> Date: Sun, 29 Mar 2026 22:14:27 +0200 Subject: [PATCH 3/4] fix: match perf's jit_detect() pattern for jitdump paths Only match basenames of the form jit-.dump instead of any .dump suffix. This matches the validation in perf's jit_detect(). --- .../wall_time/perf/parse_perf_file.rs | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/executor/wall_time/perf/parse_perf_file.rs b/src/executor/wall_time/perf/parse_perf_file.rs index 2378167d..94615f22 100644 --- a/src/executor/wall_time/perf/parse_perf_file.rs +++ b/src/executor/wall_time/perf/parse_perf_file.rs @@ -110,7 +110,8 @@ pub fn parse_for_memmap2>( // Collect jitdump file paths before the PROT_EXEC filter in process_mmap2_record // skips them. JIT runtimes mmap the jitdump file so perf records it. - if mmap2_record.path.as_slice().ends_with(b".dump") { + // Match perf's jit_detect(): basename must be `jit-.dump`. + if is_jit_dump_path(&mmap2_record.path.as_slice()) { let path = PathBuf::from( String::from_utf8_lossy(&mmap2_record.path.as_slice()).into_owned(), ); @@ -179,6 +180,23 @@ impl PidFilter { } } +/// Returns true if the path basename matches perf's jitdump pattern: `jit-.dump`. +fn is_jit_dump_path(path: &[u8]) -> bool { + let basename = match path.iter().rposition(|&b| b == b'/') { + Some(pos) => &path[pos + 1..], + None => return false, + }; + let rest = match basename.strip_prefix(b"jit-") { + Some(rest) => rest, + None => return false, + }; + let rest = match rest.strip_suffix(b".dump") { + Some(rest) => rest, + None => return false, + }; + !rest.is_empty() && rest.iter().all(|b| b.is_ascii_digit()) +} + /// Process a single MMAP2 record and add it to the symbols and unwind data maps fn process_mmap2_record( record: linux_perf_data::linux_perf_event_reader::Mmap2Record, From 92ff4b6100899b8ef4507e04428c051a1743f7a2 Mon Sep 17 00:00:00 2001 From: DaniPopes <57450786+DaniPopes@users.noreply.github.com> Date: Sun, 29 Mar 2026 22:29:49 +0200 Subject: [PATCH 4/4] fix: validate jitdump filename PID matches MMAP2 record PID Match perf's jit_detect() behavior: the PID embedded in the jitdump filename must match the MMAP2 record's PID. Also fixes a bug where rposition returned a usize that was incorrectly used as a byte slice. --- .../wall_time/perf/parse_perf_file.rs | 24 +++++++------------ 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/src/executor/wall_time/perf/parse_perf_file.rs b/src/executor/wall_time/perf/parse_perf_file.rs index 94615f22..2e0195a1 100644 --- a/src/executor/wall_time/perf/parse_perf_file.rs +++ b/src/executor/wall_time/perf/parse_perf_file.rs @@ -111,7 +111,7 @@ pub fn parse_for_memmap2>( // Collect jitdump file paths before the PROT_EXEC filter in process_mmap2_record // skips them. JIT runtimes mmap the jitdump file so perf records it. // Match perf's jit_detect(): basename must be `jit-.dump`. - if is_jit_dump_path(&mmap2_record.path.as_slice()) { + if is_jit_dump_path(&mmap2_record.path.as_slice(), mmap2_record.pid) { let path = PathBuf::from( String::from_utf8_lossy(&mmap2_record.path.as_slice()).into_owned(), ); @@ -180,21 +180,15 @@ impl PidFilter { } } -/// Returns true if the path basename matches perf's jitdump pattern: `jit-.dump`. -fn is_jit_dump_path(path: &[u8]) -> bool { - let basename = match path.iter().rposition(|&b| b == b'/') { - Some(pos) => &path[pos + 1..], - None => return false, +/// Returns true if the path basename matches perf's `jit_detect()` pattern: `jit-.dump`, +/// where `` must match the MMAP2 record's PID. +fn is_jit_dump_path(path: &[u8], pid: pid_t) -> bool { + let Some(pos) = path.iter().rposition(|&b| b == b'/') else { + return false; }; - let rest = match basename.strip_prefix(b"jit-") { - Some(rest) => rest, - None => return false, - }; - let rest = match rest.strip_suffix(b".dump") { - Some(rest) => rest, - None => return false, - }; - !rest.is_empty() && rest.iter().all(|b| b.is_ascii_digit()) + let basename = &path[pos + 1..]; + let expected = format!("jit-{pid}.dump"); + basename == expected.as_bytes() } /// Process a single MMAP2 record and add it to the symbols and unwind data maps