Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 9 additions & 15 deletions src/executor/wall_time/executor.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use super::helpers::validate_walltime_results;
use super::isolation::{requires_isolation, wrap_isolation_scope};
use super::isolation::resolve_isolation_mode;
use super::profiler::Profiler;
use super::profiler::perf::PerfProfiler;
use super::profiler::samply::SamplyProfiler;
Expand Down Expand Up @@ -181,14 +181,11 @@ impl Executor for WallTimeExecutor {
let (_env_file, _script_file, cmd_builder) =
WallTimeExecutor::walltime_bench_cmd(&execution_context.config, execution_context)?;

// Resolve the isolation decision once and reuse it for both the scope
// Resolve the isolation decision once and reuse it for both the leaf
// wrapping here and the privilege wrapping below (or in the profiler).
let isolate = requires_isolation();
let cmd_builder = if isolate {
wrap_isolation_scope(cmd_builder)?
} else {
cmd_builder
};
let isolation_mode = resolve_isolation_mode();
let cmd_builder = isolation_mode.wrap_bench(cmd_builder)?;
let requires_sudo = isolation_mode.requires_sudo();

// Split-borrow `self` so the closure inside `run_with_profiler` can
// capture `benchmark_state` while we hold `&mut profiler`.
Expand All @@ -204,16 +201,13 @@ impl Executor for WallTimeExecutor {
cmd_builder,
&execution_context.config,
&execution_context.profile_folder,
isolate,
requires_sudo,
benchmark_state,
)
.await
}
_ => {
// No profiler: still honor the isolation decision, since the
// scope (and the sudo it requires) is a property of the run, not
// of the profiler.
let cmd_builder = if isolate {
let cmd_builder = if requires_sudo {
wrap_with_sudo(cmd_builder)?
} else {
cmd_builder
Expand Down Expand Up @@ -262,11 +256,11 @@ async fn run_with_profiler(
cmd_builder: CommandBuilder,
config: &ExecutorConfig,
profile_folder: &Path,
isolate: bool,
requires_sudo: bool,
benchmark_state: &OnceCell<(FifoBenchmarkData, ExecutionTimestamps)>,
) -> Result<std::process::ExitStatus> {
let wrapped = profiler
.wrap_command(cmd_builder, config, profile_folder, isolate)
.wrap_command(cmd_builder, config, profile_folder, requires_sudo)
.await?;
let cmd = wrapped.build();
debug!("cmd: {cmd:?}");
Expand Down
238 changes: 192 additions & 46 deletions src/executor/wall_time/isolation.rs
Original file line number Diff line number Diff line change
@@ -1,60 +1,107 @@
use std::path::Path;

use crate::executor::helpers::command::CommandBuilder;
use crate::prelude::*;

/// Whether the benchmark must run inside the privileged systemd scope, which in
/// turn requires the profiler itself to record under elevated privileges.
///
/// When isolated, the scope reparents the benchmark out of the profiler's
/// process subtree, so the profiler can only observe it by recording
/// system-wide — which needs `sudo`. When not isolated, the profiler records its
/// own descendant tree and runs unprivileged (relying on a permissive
/// `perf_event_paranoid`).
///
/// `CODSPEED_ISOLATION` overrides the decision; otherwise we isolate only when we
/// can elevate without prompting (root or passwordless sudo, as on CI), so a
/// local run never blocks on a password.
///
/// Isolation relies on `systemd-run`, so it is Linux-only; other platforms always
/// record their own descendant tree unprivileged.
pub fn requires_isolation() -> bool {
if !cfg!(target_os = "linux") {
return false;
/// How the benchmark is pinned to dedicated cores, away from the rest of the
/// system. Whether the profiler must run under sudo depends on the mode; see
/// [`requires_sudo`](Self::requires_sudo).
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum IsolationMode {
/// The benchmark runs in the runner's own process subtree, unpinned.
None,
/// `systemd-run --scope --slice=codspeed.slice`. The scope reparents the
/// benchmark out of the profiler's process subtree, so the profiler needs
/// elevated privileges (sudo) to observe it.
Systemd,
/// The benchmark runs in a delegated cgroup scope.
/// The scope is expected to be created with the expected leaves before CLI invocation.
Cgroup {
/// The delegated scope cgroup directory, as seen from the runner.
scope_dir: String,
},
}

impl IsolationMode {
/// Whether the profiler must run under sudo. True only when the benchmark is
/// reparented out of the profiler's subtree (i.e. for [`Systemd`](Self::Systemd)),
/// where the profiler needs elevated privileges to observe it.
pub fn requires_sudo(&self) -> bool {
matches!(self, IsolationMode::Systemd)
}
match std::env::var("CODSPEED_ISOLATION").as_deref() {
Ok("true") => true,
Ok("false") => false,
_ => {
let can_isolate = crate::executor::helpers::run_with_sudo::can_elevate_without_prompt();
if !can_isolate {
info!(
"Running without process isolation: elevating privileges would require a \
password prompt. Set CODSPEED_ISOLATION=true to force it."
);

/// Wrap the benchmark leaf so it runs pinned according to this mode. The
/// profiler wraps its recorder around the result, so this must only touch the
/// leaf and never move the profiler onto the bench cores.
pub fn wrap_bench(&self, bench_cmd: CommandBuilder) -> Result<CommandBuilder> {
match self {
IsolationMode::None => Ok(bench_cmd),
IsolationMode::Systemd => wrap_isolation_scope(bench_cmd),
IsolationMode::Cgroup { scope_dir } => {
// Move the runner (and the profiler it later spawns) onto the
// system cores before wrapping the benchmark for the bench cores.
place_runner_in_support(scope_dir)?;

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Support leaf race In cgroup mode, this moves the runner into <scope>/support/cgroup.procs before the code waits for any leaf to exist. If the privileged setup creates support and bench asynchronously, this write can fail with a missing support/cgroup.procs path even though the cgroup layout would be ready shortly after. The run then exits before the benchmark starts, so the support leaf needs the same readiness handling as the bench leaf.

wrap_cgroup_isolation(bench_cmd, scope_dir)
}
can_isolate
}
}
}

/// Run the benchmark command in an isolated process scope.
/// Environment variable selecting how walltime runs isolate the benchmark.
const ISOLATION_ENV: &str = "CODSPEED_ISOLATION";

/// Resolve how this run should isolate the benchmark from [`ISOLATION_ENV`]:
///
/// On Linux, the command is wrapped with `systemd-run --scope` so it runs inside the
/// `codspeed.slice` cgroup (predefined on CodSpeed CI runners to pin and isolate the
/// benchmark). Only applied when [`requires_isolation`] is true.
/// - non-Linux: [`None`](IsolationMode::None), no mechanism is available;
/// - unset: [`Systemd`](IsolationMode::Systemd) if we can elevate without a prompt
/// (root or passwordless sudo, as on CI), else [`None`](IsolationMode::None) so a
/// local run never blocks on a password prompt;
/// - `false`: [`None`](IsolationMode::None);
/// - `CGROUP:<dir>`: [`Cgroup`](IsolationMode::Cgroup) under `<dir>`, the only mode
/// that works without host systemd (used inside the sandbox);
/// - anything else: [`Systemd`](IsolationMode::Systemd).
pub fn resolve_isolation_mode() -> IsolationMode {
if !cfg!(target_os = "linux") {
return IsolationMode::None;
}

let Some(value) = std::env::var(ISOLATION_ENV).ok().filter(|v| !v.is_empty()) else {
// Unset: isolate only if we can elevate silently, otherwise stay unpinned
// so a local run never blocks on a password prompt.
if crate::executor::helpers::run_with_sudo::can_elevate_without_prompt() {
return IsolationMode::Systemd;
}
info!(
"Running without process isolation: elevating privileges would require a \
password prompt. Set {ISOLATION_ENV}=true to force it."
);
return IsolationMode::None;
};

match value.as_str() {
"false" => IsolationMode::None,
_ => match value.strip_prefix("CGROUP:") {
Some(scope_dir) => IsolationMode::Cgroup {
scope_dir: scope_dir.to_string(),
},
None => IsolationMode::Systemd,
},
}
}

/// Wrap the benchmark leaf so it runs inside the systemd `codspeed.slice` scope.
///
/// Remarks:
/// - We're using `--scope` so that the profiler is able to capture the events of the benchmark
/// process.
/// - We can't use `--user` here because we need to run in `codspeed.slice`, otherwise we'd run in
/// `user.slice` (which is isolated). We use `--uid` and `--gid` to keep running as the current
/// user.
/// - `--scope` only inherits the system environment, so the caller is expected to have already
/// forwarded the relevant variables (via `wrap_with_env`).
/// - The caller is expected to have already set the working directory on `bench_cmd`; it will be
/// propagated to `systemd-run` via [`CommandBuilder::wrap_with`], and `--same-dir` makes the
/// spawned scope inherit it.
/// Notes on the `systemd-run` flags:
/// - `--scope` (rather than a transient service) keeps the benchmark a child of
/// the runner, so the profiler can capture its events; see
/// [`IsolationMode::Systemd`].
/// - `--user` would land us in `user.slice`, not `codspeed.slice`; `--uid`/`--gid`
/// instead keep the scope running as the current user.
/// - `--scope` only inherits the system environment, so the caller must have
/// already forwarded the benchmark's variables (via `wrap_with_env`) and set
/// its working directory, which `--same-dir` propagates into the scope.
#[cfg(target_os = "linux")]
pub fn wrap_isolation_scope(mut bench_cmd: CommandBuilder) -> Result<CommandBuilder> {
fn wrap_isolation_scope(mut bench_cmd: CommandBuilder) -> Result<CommandBuilder> {
use crate::executor::helpers::env::is_codspeed_debug_enabled;

let mut cmd_builder = CommandBuilder::new("systemd-run");
Expand All @@ -75,6 +122,105 @@ pub fn wrap_isolation_scope(mut bench_cmd: CommandBuilder) -> Result<CommandBuil
/// Dummy implementation on non-Linux platforms: the benchmark command is returned as-is.
// TODO(COD-2513): implement an equivalent process-isolation mechanism on macOS
#[cfg(not(target_os = "linux"))]
pub fn wrap_isolation_scope(bench_cmd: CommandBuilder) -> Result<CommandBuilder> {
fn wrap_isolation_scope(bench_cmd: CommandBuilder) -> Result<CommandBuilder> {
Ok(bench_cmd)
}

/// Names of the leaves that are expected to be created prior to the CLI invocation.
const BENCH_LEAF: &str = "bench";
const SUPPORT_LEAF: &str = "support";

/// Move the runner itself into the scope's `support` leaf, so it and every child
/// it later spawns (notably the profiler) run on the system cores rather than the
/// bench cores. Call before spawning the profiler.
///
/// The workload starts in the scope's `all` leaf (every core); this opts the
/// runner down onto the system cores, leaving the bench cores idle for the
/// benchmark the shim later places into `bench`.
fn place_runner_in_support(scope_dir: &str) -> Result<()> {
let procs = Path::new(scope_dir).join(SUPPORT_LEAF).join("cgroup.procs");
let pid = std::process::id().to_string();
std::fs::write(&procs, &pid).with_context(|| format!("move runner into {}", procs.display()))
}

/// Wrap the benchmark so it self-places into the scope's `bench` leaf.
///
/// The privileged side splits the delegated scope and pins the leaves; here a
/// tiny `bash` shim writes its own PID into `<scope>/bench/cgroup.procs` (moving
/// itself, and the benchmark it `exec`s, into the leaf) and then `exec`s the
/// original command, so the benchmark inherits the leaf's `cpuset.cpus` pinning
/// while staying the same PID and the same descendant of the profiler.
///
/// Must wrap only the leaf, before the profiler wraps the recorder around it, so
/// the profiler itself is never moved onto the bench cores.
fn wrap_cgroup_isolation(mut bench_cmd: CommandBuilder, scope_dir: &str) -> Result<CommandBuilder> {
let procs_path = Path::new(scope_dir).join(BENCH_LEAF).join("cgroup.procs");
let quoted = shell_words::quote(&procs_path.to_string_lossy()).into_owned();
// `$0`/`$@`: the shim's own argv[0] is a throwaway "bash"; the real program
// and its arguments follow and are re-exec'd verbatim via `exec "$@"`.
let script = format!("echo $$ > {quoted} && exec \"$@\"");

bench_cmd.wrap("bash", ["-c".to_string(), script, "bash".to_string()]);
Ok(bench_cmd)
}

#[cfg(test)]
mod tests {
use super::*;
use temp_env::with_vars;

#[test]
fn cgroup_prefix_selects_cgroup_mode() {
if !cfg!(target_os = "linux") {
return;
}
with_vars(
[(ISOLATION_ENV, Some("CGROUP:/sys/fs/cgroup/scope"))],
|| {
assert_eq!(
resolve_isolation_mode(),
IsolationMode::Cgroup {
scope_dir: "/sys/fs/cgroup/scope".to_string(),
}
);
},
);
}

#[test]
fn isolation_false_disables() {
with_vars([(ISOLATION_ENV, Some("false"))], || {
assert_eq!(resolve_isolation_mode(), IsolationMode::None);
});
}

#[test]
fn other_value_selects_systemd() {
if !cfg!(target_os = "linux") {
return;
}
with_vars([(ISOLATION_ENV, Some("true"))], || {
assert_eq!(resolve_isolation_mode(), IsolationMode::Systemd);
});
}

#[test]
fn cgroup_mode_keeps_profiler_unprivileged() {
let cgroup = IsolationMode::Cgroup {
scope_dir: "/x".to_string(),
};
assert!(!cgroup.requires_sudo());
assert!(IsolationMode::Systemd.requires_sudo());
}

#[test]
fn cgroup_wrap_places_then_execs() {
let mut cmd = CommandBuilder::new("bash");
cmd.arg("/tmp/bench.sh");
let wrapped = wrap_cgroup_isolation(cmd, "/sys/fs/cgroup").unwrap();
assert_eq!(
wrapped.as_command_line(),
"bash -c 'echo $$ > /sys/fs/cgroup/bench/cgroup.procs && exec \"$@\"' bash bash /tmp/bench.sh"
);
}
}
9 changes: 5 additions & 4 deletions src/executor/wall_time/profiler/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,16 @@ pub trait Profiler {
/// Profilers stash any live state they need for the duration of the run
/// (control fifos, output paths) on `self`.
///
/// `isolate` mirrors the decision the executor used to wrap the benchmark in
/// the systemd scope: when set, the profiler records system-wide under sudo;
/// otherwise it records its own descendant tree unprivileged.
/// When `requires_sudo` is set, the profiler must run elevated: the
/// isolation mechanism reparented the benchmark out of its subtree, so it can
/// only observe it with elevated privileges. Otherwise it records its own
/// descendant tree unprivileged.
async fn wrap_command(
&mut self,
cmd: CommandBuilder,
config: &ExecutorConfig,
profile_folder: &Path,
isolate: bool,
requires_sudo: bool,
) -> anyhow::Result<CommandBuilder>;

/// The benchmarked process signaled the start of a measured region.
Expand Down
9 changes: 4 additions & 5 deletions src/executor/wall_time/profiler/perf/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ impl Profiler for PerfProfiler {
mut cmd_builder: CommandBuilder,
config: &ExecutorConfig,
profile_folder: &Path,
isolate: bool,
requires_sudo: bool,
) -> anyhow::Result<CommandBuilder> {
let perf_fifo = PerfFifo::new()?;
let perf_file_path = profile_folder.join(PERF_PIPEDATA_FILE_NAME);
Expand Down Expand Up @@ -183,10 +183,9 @@ impl Profiler for PerfProfiler {
self.perf_fifo = Some(perf_fifo);
self.perf_file_path = Some(perf_file_path);

// Isolated runs reparent the benchmark out of perf's subtree, so perf
// must record system-wide under sudo. Unisolated runs record perf's own
// descendant tree unprivileged.
let wrapped_builder = if isolate {
// When the benchmark was reparented out of perf's subtree, perf needs
// sudo to observe it; otherwise it records its own descendants unprivileged.
let wrapped_builder = if requires_sudo {
wrap_with_sudo(wrapped_builder)?
} else {
wrapped_builder
Expand Down
9 changes: 4 additions & 5 deletions src/executor/wall_time/profiler/samply/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ impl Profiler for SamplyProfiler {
mut cmd_builder: CommandBuilder,
_config: &ExecutorConfig,
profile_folder: &Path,
isolate: bool,
requires_sudo: bool,
) -> anyhow::Result<CommandBuilder> {
let output_path = profile_folder.join(SAMPLY_OUTPUT_FILE_NAME);

Expand Down Expand Up @@ -157,10 +157,9 @@ impl Profiler for SamplyProfiler {

self.output_path = Some(output_path);
self.v8_log_dir = Some(v8_log_dir);
// Isolated runs reparent the benchmark out of samply's subtree, so samply
// must record system-wide under sudo. Unisolated runs record samply's own
// descendant tree unprivileged.
let cmd_builder = if isolate {
// When the benchmark was reparented out of samply's subtree, samply needs
// sudo to observe it; otherwise it records its own descendants unprivileged.
let cmd_builder = if requires_sudo {
wrap_with_sudo(cmd_builder)?
} else {
cmd_builder
Expand Down
Loading