diff --git a/Cargo.lock b/Cargo.lock index daf2000e..0fca0173 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -712,6 +712,7 @@ dependencies = [ "gimli", "git2", "gql_client", + "indexmap", "indicatif", "insta", "instrument-hooks-bindings", diff --git a/Cargo.toml b/Cargo.toml index a2703b51..c10c4049 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,6 +41,7 @@ gql_client = { git = "https://github.com/CodSpeedHQ/gql-client-rs", rev = "c2691 serde_yaml = "0.9.34" sysinfo = { version = "0.37", features = ["serde"] } indicatif = "0.18" +indexmap = "2.14" console = "0.16" async-trait = "0.1.89" libc = { workspace = true } diff --git a/src/executor/wall_time/profiler/perf/jit_dump.rs b/src/executor/wall_time/profiler/perf/jit_dump.rs index fd5fad05..686c061e 100644 --- a/src/executor/wall_time/profiler/perf/jit_dump.rs +++ b/src/executor/wall_time/profiler/perf/jit_dump.rs @@ -1,5 +1,6 @@ use super::module_symbols::{ModuleSymbols, Symbol}; use crate::prelude::*; +use indexmap::IndexSet; use linux_perf_data::jitdump::{JitDumpReader, JitDumpRecord}; use runner_shared::unwind_data::{ProcessUnwindData, UnwindData}; use std::{ @@ -7,6 +8,8 @@ use std::{ path::{Path, PathBuf}, }; +pub(super) type JitDumpPathsByPid = HashMap>; + struct JitDump { path: PathBuf, } @@ -106,7 +109,30 @@ impl JitDump { } } -/// Converts all the `jit-.dump` into a perf-.map with symbols, and collects the unwind data +pub(super) fn insert_jit_dump_path( + jit_dump_paths_by_pid: &mut JitDumpPathsByPid, + pid: libc::pid_t, + path: PathBuf, +) -> bool { + jit_dump_paths_by_pid.entry(pid).or_default().insert(path) +} + +pub(super) fn add_legacy_tmp_jit_dump_paths_for_pids( + jit_dump_paths_by_pid: &mut JitDumpPathsByPid, + pids: &HashSet, +) { + for pid in pids { + let path = PathBuf::from("/tmp").join(format!("jit-{pid}.dump")); + if path.exists() && insert_jit_dump_path(jit_dump_paths_by_pid, *pid, path.clone()) { + debug!("Found legacy JIT dump file: {path:?}"); + } + } +} + +/// Converts all the `jit-.dump` into a perf-.map with symbols, and collects the unwind data. +/// +/// Jitdump file paths are discovered from MMAP2 records in the perf data, since JIT runtimes +/// mmap the jitdump file and perf records the mapping with the actual path on disk. /// /// # Symbols /// Since a jit dump is by definition specific to a single pid, we append the harvested symbols @@ -116,40 +142,66 @@ impl JitDump { /// Unwind data is generated as a list pub async fn save_symbols_and_harvest_unwind_data_for_pids( profile_folder: &Path, - pids: &HashSet, + jit_dump_paths_by_pid: &JitDumpPathsByPid, ) -> Result>> { - let mut jit_unwind_data_by_path = HashMap::new(); + let mut jit_unwind_data_by_pid = HashMap::new(); - for pid in pids { - let name = format!("jit-{pid}.dump"); - let path = PathBuf::from("/tmp").join(&name); + for (pid, paths) in jit_dump_paths_by_pid { + for path in paths { + debug!("Found JIT dump file: {path:?}"); + + let symbols = match JitDump::new(path.clone()).into_perf_map() { + Ok(symbols) => symbols, + Err(error) => { + warn!("Failed to convert jit dump into perf map: {error:?}"); + continue; + } + }; + + symbols.append_to_file(profile_folder.join(format!("perf-{pid}.map")))?; + + let jit_unwind_data = match JitDump::new(path.clone()).into_unwind_data() { + Ok(data) => data, + Err(error) => { + warn!("Failed to convert jit dump into unwind data: {error:?}"); + continue; + } + }; - if !path.exists() { - continue; + jit_unwind_data_by_pid + .entry(*pid) + .or_insert_with(Vec::new) + .extend(jit_unwind_data); } - debug!("Found JIT dump file: {path:?}"); + } - let symbols = match JitDump::new(path.clone()).into_perf_map() { - Ok(symbols) => symbols, - Err(error) => { - warn!("Failed to convert jit dump into perf map: {error:?}"); - continue; - } - }; + Ok(jit_unwind_data_by_pid) +} - // Also write to perf-.map for harvested Python perf maps compatibility - symbols.append_to_file(profile_folder.join(format!("perf-{pid}.map")))?; +#[cfg(test)] +mod tests { + use super::*; - let jit_unwind_data = match JitDump::new(path).into_unwind_data() { - Ok(data) => data, - Err(error) => { - warn!("Failed to convert jit dump into unwind data: {error:?}"); - continue; - } - }; + #[test] + fn insert_jit_dump_path_dedupes_per_pid() { + let mut paths = HashMap::new(); + let path = PathBuf::from("/tmp/jit-123.dump"); + + assert!(insert_jit_dump_path(&mut paths, 123, path.clone())); + assert!(!insert_jit_dump_path(&mut paths, 123, path.clone())); - jit_unwind_data_by_path.insert(*pid, jit_unwind_data); + assert_eq!(paths[&123].iter().collect::>(), vec![&path]); } - Ok(jit_unwind_data_by_path) + #[test] + fn insert_jit_dump_path_keeps_same_path_for_different_pids() { + let mut paths = HashMap::new(); + let path = PathBuf::from("/tmp/jit-123.dump"); + + assert!(insert_jit_dump_path(&mut paths, 123, path.clone())); + assert!(insert_jit_dump_path(&mut paths, 456, path.clone())); + + assert_eq!(paths[&123].iter().collect::>(), vec![&path]); + assert_eq!(paths[&456].iter().collect::>(), vec![&path]); + } } diff --git a/src/executor/wall_time/profiler/perf/mod.rs b/src/executor/wall_time/profiler/perf/mod.rs index 5536af12..ce753119 100644 --- a/src/executor/wall_time/profiler/perf/mod.rs +++ b/src/executor/wall_time/profiler/perf/mod.rs @@ -270,6 +270,7 @@ impl BenchmarkData<'_> { let MemmapRecordsOutput { loaded_modules_by_path, tracked_pids, + mut jit_dump_paths_by_pid, } = parse_perf_file::parse_for_memmap2(perf_file_path, pid_filter).map_err(|e| { error!("Failed to parse perf file: {e}"); BenchmarkDataSaveError::FailedToParsePerfFile @@ -285,8 +286,9 @@ impl BenchmarkData<'_> { error!("Failed to harvest perf maps: {e}"); BenchmarkDataSaveError::FailedToHarvestPerfMaps })?; + jit_dump::add_legacy_tmp_jit_dump_paths_for_pids(&mut jit_dump_paths_by_pid, &tracked_pids); let jit_unwind_data_by_pid = - jit_dump::save_symbols_and_harvest_unwind_data_for_pids(path, &tracked_pids) + jit_dump::save_symbols_and_harvest_unwind_data_for_pids(path, &jit_dump_paths_by_pid) .await .map_err(|e| { error!("Failed to harvest jit dumps: {e}"); diff --git a/src/executor/wall_time/profiler/perf/parse_perf_file.rs b/src/executor/wall_time/profiler/perf/parse_perf_file.rs index 151b5494..3af6beba 100644 --- a/src/executor/wall_time/profiler/perf/parse_perf_file.rs +++ b/src/executor/wall_time/profiler/perf/parse_perf_file.rs @@ -1,3 +1,4 @@ +use super::jit_dump::{JitDumpPathsByPid, insert_jit_dump_path}; use super::loaded_module::{LoadedModule, ProcessLoadedModule}; use super::module_symbols::ModuleSymbols; use super::unwind_data::unwind_data_from_elf; @@ -16,6 +17,8 @@ pub struct MemmapRecordsOutput { /// Module symbols and the computed load bias for each pid that maps the ELF path. pub loaded_modules_by_path: HashMap, pub tracked_pids: HashSet, + /// Jitdump file paths discovered from MMAP2 records, keyed by PID. + pub jit_dump_paths_by_pid: JitDumpPathsByPid, } /// Parse the perf file at `perf_file_path` and look for MMAP2 records for the given `pids`. @@ -27,6 +30,7 @@ pub fn parse_for_memmap2>( mut pid_filter: PidFilter, ) -> Result { let mut loaded_modules_by_path = HashMap::::new(); + let mut jit_dump_paths_by_pid = JitDumpPathsByPid::new(); // 1MiB buffer let reader = std::io::BufReader::with_capacity( @@ -115,6 +119,24 @@ pub fn parse_for_memmap2>( continue; } + // Collect jitdump file paths before the PROT_EXEC filter in process_mmap2_record + // skips them. JIT runtimes mmap the jitdump file so perf records it. + // Match perf's jit_detect(): basename must be `jit-.dump`. + if is_jit_dump_path(&mmap2_record.path.as_slice()) { + let path = PathBuf::from( + String::from_utf8_lossy(&mmap2_record.path.as_slice()).into_owned(), + ); + if path.exists() + && insert_jit_dump_path( + &mut jit_dump_paths_by_pid, + mmap2_record.pid, + path.clone(), + ) + { + debug!("Found jitdump path from MMAP2 record: {path:?}"); + } + } + process_mmap2_record(mmap2_record, &mut loaded_modules_by_path); } _ => continue, @@ -133,6 +155,7 @@ pub fn parse_for_memmap2>( Ok(MemmapRecordsOutput { loaded_modules_by_path, tracked_pids, + jit_dump_paths_by_pid, }) } @@ -208,6 +231,21 @@ fn purge_process_mappings(loaded_modules_by_path: &mut HashMap.dump`. +fn is_jit_dump_path(path: &[u8]) -> bool { + let Some(pos) = path.iter().rposition(|&b| b == b'/') else { + return false; + }; + let basename = &path[pos + 1..]; + let Some(pid) = basename + .strip_prefix(b"jit-") + .and_then(|path| path.strip_suffix(b".dump")) + else { + return false; + }; + !pid.is_empty() && pid.iter().all(|b| b.is_ascii_digit()) +} + /// Process a single MMAP2 record and add it to the symbols and unwind data maps fn process_mmap2_record( record: linux_perf_data::linux_perf_event_reader::Mmap2Record, @@ -398,4 +436,19 @@ mod tests { Some(0xaaaaaaaa0000) ); } + + #[test] + fn is_jit_dump_path_accepts_numeric_pid() { + assert!(is_jit_dump_path(b"/tmp/jit-123.dump")); + assert!(is_jit_dump_path(b"/home/user/.debug/jit/abc/jit-123.dump")); + } + + #[test] + fn is_jit_dump_path_rejects_non_perf_jitdump_names() { + assert!(!is_jit_dump_path(b"jit-123.dump")); + assert!(!is_jit_dump_path(b"/tmp/jit-.dump")); + assert!(!is_jit_dump_path(b"/tmp/jit-abc.dump")); + assert!(!is_jit_dump_path(b"/tmp/not-jit-123.dump")); + assert!(!is_jit_dump_path(b"/tmp/jit-123.dump.old")); + } }