CodSpeedHQ · GuillaumeLagrange · Jun 29, 2026 · greptile-apps · Jun 29, 2026
diff --git a/src/executor/wall_time/executor.rs b/src/executor/wall_time/executor.rs
@@ -1,5 +1,5 @@
 use super::helpers::validate_walltime_results;
-use super::isolation::{requires_isolation, wrap_isolation_scope};
+use super::isolation::resolve_isolation_mode;
 use super::profiler::Profiler;
 use super::profiler::perf::PerfProfiler;
 use super::profiler::samply::SamplyProfiler;
@@ -181,14 +181,11 @@ impl Executor for WallTimeExecutor {
         let (_env_file, _script_file, cmd_builder) =
             WallTimeExecutor::walltime_bench_cmd(&execution_context.config, execution_context)?;
 
-        // Resolve the isolation decision once and reuse it for both the scope
+        // Resolve the isolation decision once and reuse it for both the leaf
         // wrapping here and the privilege wrapping below (or in the profiler).
-        let isolate = requires_isolation();
-        let cmd_builder = if isolate {
-            wrap_isolation_scope(cmd_builder)?
-        } else {
-            cmd_builder
-        };
+        let isolation_mode = resolve_isolation_mode();
+        let cmd_builder = isolation_mode.wrap_bench(cmd_builder)?;
+        let requires_sudo = isolation_mode.requires_sudo();
 
         // Split-borrow `self` so the closure inside `run_with_profiler` can
         // capture `benchmark_state` while we hold `&mut profiler`.
@@ -204,16 +201,13 @@ impl Executor for WallTimeExecutor {
                     cmd_builder,
                     &execution_context.config,
                     &execution_context.profile_folder,
-                    isolate,
+                    requires_sudo,
                     benchmark_state,
                 )
                 .await
             }
             _ => {
-                // No profiler: still honor the isolation decision, since the
-                // scope (and the sudo it requires) is a property of the run, not
-                // of the profiler.
-                let cmd_builder = if isolate {
+                let cmd_builder = if requires_sudo {
                     wrap_with_sudo(cmd_builder)?
                 } else {
                     cmd_builder
@@ -262,11 +256,11 @@ async fn run_with_profiler(
     cmd_builder: CommandBuilder,
     config: &ExecutorConfig,
     profile_folder: &Path,
-    isolate: bool,
+    requires_sudo: bool,
     benchmark_state: &OnceCell<(FifoBenchmarkData, ExecutionTimestamps)>,
 ) -> Result<std::process::ExitStatus> {
     let wrapped = profiler
-        .wrap_command(cmd_builder, config, profile_folder, isolate)
+        .wrap_command(cmd_builder, config, profile_folder, requires_sudo)
         .await?;
     let cmd = wrapped.build();
     debug!("cmd: {cmd:?}");

diff --git a/src/executor/wall_time/isolation.rs b/src/executor/wall_time/isolation.rs
@@ -1,60 +1,107 @@
+use std::path::Path;
+
 use crate::executor::helpers::command::CommandBuilder;
 use crate::prelude::*;
 
-/// Whether the benchmark must run inside the privileged systemd scope, which in
-/// turn requires the profiler itself to record under elevated privileges.
-///
-/// When isolated, the scope reparents the benchmark out of the profiler's
-/// process subtree, so the profiler can only observe it by recording
-/// system-wide — which needs `sudo`. When not isolated, the profiler records its
-/// own descendant tree and runs unprivileged (relying on a permissive
-/// `perf_event_paranoid`).
-///
-/// `CODSPEED_ISOLATION` overrides the decision; otherwise we isolate only when we
-/// can elevate without prompting (root or passwordless sudo, as on CI), so a
-/// local run never blocks on a password.
-///
-/// Isolation relies on `systemd-run`, so it is Linux-only; other platforms always
-/// record their own descendant tree unprivileged.
-pub fn requires_isolation() -> bool {
-    if !cfg!(target_os = "linux") {
-        return false;
+/// How the benchmark is pinned to dedicated cores, away from the rest of the
+/// system. Whether the profiler must run under sudo depends on the mode; see
+/// [`requires_sudo`](Self::requires_sudo).
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum IsolationMode {
+    /// The benchmark runs in the runner's own process subtree, unpinned.
+    None,
+    /// `systemd-run --scope --slice=codspeed.slice`. The scope reparents the
+    /// benchmark out of the profiler's process subtree, so the profiler needs
+    /// elevated privileges (sudo) to observe it.
+    Systemd,
+    /// The benchmark runs in a delegated cgroup scope.
+    /// The scope is expected to be created with the expected leaves before CLI invocation.
+    Cgroup {
+        /// The delegated scope cgroup directory, as seen from the runner.
+        scope_dir: String,
+    },
+}
+
+impl IsolationMode {
+    /// Whether the profiler must run under sudo. True only when the benchmark is
+    /// reparented out of the profiler's subtree (i.e. for [`Systemd`](Self::Systemd)),
+    /// where the profiler needs elevated privileges to observe it.
+    pub fn requires_sudo(&self) -> bool {
+        matches!(self, IsolationMode::Systemd)
     }
-    match std::env::var("CODSPEED_ISOLATION").as_deref() {
-        Ok("true") => true,
-        Ok("false") => false,
-        _ => {
-            let can_isolate = crate::executor::helpers::run_with_sudo::can_elevate_without_prompt();
-            if !can_isolate {
-                info!(
-                    "Running without process isolation: elevating privileges would require a \
-                     password prompt. Set CODSPEED_ISOLATION=true to force it."
-                );
+
+    /// Wrap the benchmark leaf so it runs pinned according to this mode. The
+    /// profiler wraps its recorder around the result, so this must only touch the
+    /// leaf and never move the profiler onto the bench cores.
+    pub fn wrap_bench(&self, bench_cmd: CommandBuilder) -> Result<CommandBuilder> {
+        match self {
+            IsolationMode::None => Ok(bench_cmd),
+            IsolationMode::Systemd => wrap_isolation_scope(bench_cmd),
+            IsolationMode::Cgroup { scope_dir } => {
+                // Move the runner (and the profiler it later spawns) onto the
+                // system cores before wrapping the benchmark for the bench cores.
+                place_runner_in_support(scope_dir)?;
+                wrap_cgroup_isolation(bench_cmd, scope_dir)
             }
-            can_isolate
         }
     }
 }
 
-/// Run the benchmark command in an isolated process scope.
+/// Environment variable selecting how walltime runs isolate the benchmark.
+const ISOLATION_ENV: &str = "CODSPEED_ISOLATION";
+
+/// Resolve how this run should isolate the benchmark from [`ISOLATION_ENV`]:
 ///
-/// On Linux, the command is wrapped with `systemd-run --scope` so it runs inside the
-/// `codspeed.slice` cgroup (predefined on CodSpeed CI runners to pin and isolate the
-/// benchmark). Only applied when [`requires_isolation`] is true.
+/// - non-Linux: [`None`](IsolationMode::None), no mechanism is available;
+/// - unset: [`Systemd`](IsolationMode::Systemd) if we can elevate without a prompt
+///   (root or passwordless sudo, as on CI), else [`None`](IsolationMode::None) so a
+///   local run never blocks on a password prompt;
+/// - `false`: [`None`](IsolationMode::None);
+/// - `CGROUP:<dir>`: [`Cgroup`](IsolationMode::Cgroup) under `<dir>`, the only mode
+///   that works without host systemd (used inside the sandbox);
+/// - anything else: [`Systemd`](IsolationMode::Systemd).
+pub fn resolve_isolation_mode() -> IsolationMode {
+    if !cfg!(target_os = "linux") {
+        return IsolationMode::None;
+    }
+
+    let Some(value) = std::env::var(ISOLATION_ENV).ok().filter(|v| !v.is_empty()) else {
+        // Unset: isolate only if we can elevate silently, otherwise stay unpinned
+        // so a local run never blocks on a password prompt.
+        if crate::executor::helpers::run_with_sudo::can_elevate_without_prompt() {
+            return IsolationMode::Systemd;
+        }
+        info!(
+            "Running without process isolation: elevating privileges would require a \
+             password prompt. Set {ISOLATION_ENV}=true to force it."
+        );
+        return IsolationMode::None;
+    };
+
+    match value.as_str() {
+        "false" => IsolationMode::None,
+        _ => match value.strip_prefix("CGROUP:") {
+            Some(scope_dir) => IsolationMode::Cgroup {
+                scope_dir: scope_dir.to_string(),
+            },
+            None => IsolationMode::Systemd,
+        },
+    }
+}
+
+/// Wrap the benchmark leaf so it runs inside the systemd `codspeed.slice` scope.
 ///
-/// Remarks:
-/// - We're using `--scope` so that the profiler is able to capture the events of the benchmark
-///   process.
-/// - We can't use `--user` here because we need to run in `codspeed.slice`, otherwise we'd run in
-///   `user.slice` (which is isolated). We use `--uid` and `--gid` to keep running as the current
-///   user.
-/// - `--scope` only inherits the system environment, so the caller is expected to have already
-///   forwarded the relevant variables (via `wrap_with_env`).
-/// - The caller is expected to have already set the working directory on `bench_cmd`; it will be
-///   propagated to `systemd-run` via [`CommandBuilder::wrap_with`], and `--same-dir` makes the
-///   spawned scope inherit it.
+/// Notes on the `systemd-run` flags:
+/// - `--scope` (rather than a transient service) keeps the benchmark a child of
+///   the runner, so the profiler can capture its events; see
+///   [`IsolationMode::Systemd`].
+/// - `--user` would land us in `user.slice`, not `codspeed.slice`; `--uid`/`--gid`
+///   instead keep the scope running as the current user.
+/// - `--scope` only inherits the system environment, so the caller must have
+///   already forwarded the benchmark's variables (via `wrap_with_env`) and set
+///   its working directory, which `--same-dir` propagates into the scope.
 #[cfg(target_os = "linux")]
-pub fn wrap_isolation_scope(mut bench_cmd: CommandBuilder) -> Result<CommandBuilder> {
+fn wrap_isolation_scope(mut bench_cmd: CommandBuilder) -> Result<CommandBuilder> {
     use crate::executor::helpers::env::is_codspeed_debug_enabled;
 
     let mut cmd_builder = CommandBuilder::new("systemd-run");
@@ -75,6 +122,105 @@ pub fn wrap_isolation_scope(mut bench_cmd: CommandBuilder) -> Result<CommandBuil
 /// Dummy implementation on non-Linux platforms: the benchmark command is returned as-is.
 // TODO(COD-2513): implement an equivalent process-isolation mechanism on macOS
 #[cfg(not(target_os = "linux"))]
-pub fn wrap_isolation_scope(bench_cmd: CommandBuilder) -> Result<CommandBuilder> {
+fn wrap_isolation_scope(bench_cmd: CommandBuilder) -> Result<CommandBuilder> {
     Ok(bench_cmd)
 }
+
+/// Names of the leaves that are expected to be created prior to the CLI invocation.
+const BENCH_LEAF: &str = "bench";
+const SUPPORT_LEAF: &str = "support";
+
+/// Move the runner itself into the scope's `support` leaf, so it and every child
+/// it later spawns (notably the profiler) run on the system cores rather than the
+/// bench cores. Call before spawning the profiler.
+///
+/// The workload starts in the scope's `all` leaf (every core); this opts the
+/// runner down onto the system cores, leaving the bench cores idle for the
+/// benchmark the shim later places into `bench`.
+fn place_runner_in_support(scope_dir: &str) -> Result<()> {
+    let procs = Path::new(scope_dir).join(SUPPORT_LEAF).join("cgroup.procs");
+    let pid = std::process::id().to_string();
+    std::fs::write(&procs, &pid).with_context(|| format!("move runner into {}", procs.display()))
+}
+
+/// Wrap the benchmark so it self-places into the scope's `bench` leaf.
+///
+/// The privileged side splits the delegated scope and pins the leaves; here a
+/// tiny `bash` shim writes its own PID into `<scope>/bench/cgroup.procs` (moving
+/// itself, and the benchmark it `exec`s, into the leaf) and then `exec`s the
+/// original command, so the benchmark inherits the leaf's `cpuset.cpus` pinning
+/// while staying the same PID and the same descendant of the profiler.
+///
+/// Must wrap only the leaf, before the profiler wraps the recorder around it, so
+/// the profiler itself is never moved onto the bench cores.
+fn wrap_cgroup_isolation(mut bench_cmd: CommandBuilder, scope_dir: &str) -> Result<CommandBuilder> {
+    let procs_path = Path::new(scope_dir).join(BENCH_LEAF).join("cgroup.procs");
+    let quoted = shell_words::quote(&procs_path.to_string_lossy()).into_owned();
+    // `$0`/`$@`: the shim's own argv[0] is a throwaway "bash"; the real program
+    // and its arguments follow and are re-exec'd verbatim via `exec "$@"`.
+    let script = format!("echo $$ > {quoted} && exec \"$@\"");
+
+    bench_cmd.wrap("bash", ["-c".to_string(), script, "bash".to_string()]);
+    Ok(bench_cmd)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use temp_env::with_vars;
+
+    #[test]
+    fn cgroup_prefix_selects_cgroup_mode() {
+        if !cfg!(target_os = "linux") {
+            return;
+        }
+        with_vars(
+            [(ISOLATION_ENV, Some("CGROUP:/sys/fs/cgroup/scope"))],
+            || {
+                assert_eq!(
+                    resolve_isolation_mode(),
+                    IsolationMode::Cgroup {
+                        scope_dir: "/sys/fs/cgroup/scope".to_string(),
+                    }
+                );
+            },
+        );
+    }
+
+    #[test]
+    fn isolation_false_disables() {
+        with_vars([(ISOLATION_ENV, Some("false"))], || {
+            assert_eq!(resolve_isolation_mode(), IsolationMode::None);
+        });
+    }
+
+    #[test]
+    fn other_value_selects_systemd() {
+        if !cfg!(target_os = "linux") {
+            return;
+        }
+        with_vars([(ISOLATION_ENV, Some("true"))], || {
+            assert_eq!(resolve_isolation_mode(), IsolationMode::Systemd);
+        });
+    }
+
+    #[test]
+    fn cgroup_mode_keeps_profiler_unprivileged() {
+        let cgroup = IsolationMode::Cgroup {
+            scope_dir: "/x".to_string(),
+        };
+        assert!(!cgroup.requires_sudo());
+        assert!(IsolationMode::Systemd.requires_sudo());
+    }
+
+    #[test]
+    fn cgroup_wrap_places_then_execs() {
+        let mut cmd = CommandBuilder::new("bash");
+        cmd.arg("/tmp/bench.sh");
+        let wrapped = wrap_cgroup_isolation(cmd, "/sys/fs/cgroup").unwrap();
+        assert_eq!(
+            wrapped.as_command_line(),
+            "bash -c 'echo $$ > /sys/fs/cgroup/bench/cgroup.procs && exec \"$@\"' bash bash /tmp/bench.sh"
+        );
+    }
+}
diff --git a/src/executor/wall_time/profiler/mod.rs b/src/executor/wall_time/profiler/mod.rs
@@ -48,15 +48,16 @@ pub trait Profiler {
     /// Profilers stash any live state they need for the duration of the run
     /// (control fifos, output paths) on `self`.
     ///
-    /// `isolate` mirrors the decision the executor used to wrap the benchmark in
-    /// the systemd scope: when set, the profiler records system-wide under sudo;
-    /// otherwise it records its own descendant tree unprivileged.
+    /// When `requires_sudo` is set, the profiler must run elevated: the
+    /// isolation mechanism reparented the benchmark out of its subtree, so it can
+    /// only observe it with elevated privileges. Otherwise it records its own
+    /// descendant tree unprivileged.
     async fn wrap_command(
         &mut self,
         cmd: CommandBuilder,
         config: &ExecutorConfig,
         profile_folder: &Path,
-        isolate: bool,
+        requires_sudo: bool,
     ) -> anyhow::Result<CommandBuilder>;
 
     /// The benchmarked process signaled the start of a measured region.

diff --git a/src/executor/wall_time/profiler/perf/mod.rs b/src/executor/wall_time/profiler/perf/mod.rs
@@ -91,7 +91,7 @@ impl Profiler for PerfProfiler {
         mut cmd_builder: CommandBuilder,
         config: &ExecutorConfig,
         profile_folder: &Path,
-        isolate: bool,
+        requires_sudo: bool,
     ) -> anyhow::Result<CommandBuilder> {
         let perf_fifo = PerfFifo::new()?;
         let perf_file_path = profile_folder.join(PERF_PIPEDATA_FILE_NAME);
@@ -183,10 +183,9 @@ impl Profiler for PerfProfiler {
         self.perf_fifo = Some(perf_fifo);
         self.perf_file_path = Some(perf_file_path);
 
-        // Isolated runs reparent the benchmark out of perf's subtree, so perf
-        // must record system-wide under sudo. Unisolated runs record perf's own
-        // descendant tree unprivileged.
-        let wrapped_builder = if isolate {
+        // When the benchmark was reparented out of perf's subtree, perf needs
+        // sudo to observe it; otherwise it records its own descendants unprivileged.
+        let wrapped_builder = if requires_sudo {
             wrap_with_sudo(wrapped_builder)?
         } else {
             wrapped_builder

diff --git a/src/executor/wall_time/profiler/samply/mod.rs b/src/executor/wall_time/profiler/samply/mod.rs
@@ -86,7 +86,7 @@ impl Profiler for SamplyProfiler {
         mut cmd_builder: CommandBuilder,
         _config: &ExecutorConfig,
         profile_folder: &Path,
-        isolate: bool,
+        requires_sudo: bool,
     ) -> anyhow::Result<CommandBuilder> {
         let output_path = profile_folder.join(SAMPLY_OUTPUT_FILE_NAME);
 
@@ -157,10 +157,9 @@ impl Profiler for SamplyProfiler {
 
         self.output_path = Some(output_path);
         self.v8_log_dir = Some(v8_log_dir);
-        // Isolated runs reparent the benchmark out of samply's subtree, so samply
-        // must record system-wide under sudo. Unisolated runs record samply's own
-        // descendant tree unprivileged.
-        let cmd_builder = if isolate {
+        // When the benchmark was reparented out of samply's subtree, samply needs
+        // sudo to observe it; otherwise it records its own descendants unprivileged.
+        let cmd_builder = if requires_sudo {
             wrap_with_sudo(cmd_builder)?
         } else {
             cmd_builder