diff --git a/CHANGELOG.md b/CHANGELOG.md index f64dc94..6ba2af5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,92 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +* Sandbox containers are now reused across commands within a single build, + avoiding per-command `docker create`/`docker rm` overhead. Every `Command` + spawned inside a `BuildBuilder::run` closure runs in the same container, + and the container is recreated transparently if a previous command's OOM + kill brought it down. + +* Added the public `Sandbox`, `SandboxStatistics`, and `BuildResult` types, + plus `SandboxBuilder::start` for direct sandbox construction. The + underlying container is created and started before `start` returns, so + docker errors surface there rather than on the first command. + +* **BREAKING**: `BuildBuilder::run` now returns `BuildResult` instead of + `R`. The result wraps the closure's return value together with + `SandboxStatistics` gathered over the whole build: + + ```rust + let result = build_dir.build(&toolchain, &krate, sandbox).run(|build| { + build.cargo().args(&["test"]).run()?; + Ok(()) + })?; + let peak = result.statistics().memory_peak_bytes(); + let value = result.into_inner(); + ``` + + `BuildResult` also derefs to `&T` for ergonomics. + +* **BREAKING**: `Command::run` returns `()` instead of `ProcessStatistics`. + Peak memory is no longer tracked per command; the cumulative maximum + across all commands in the build is exposed via the `BuildResult` + returned from `BuildBuilder::run`: + + ```rust + let result = build_dir.build(&toolchain, &krate, sandbox).run(|build| { + build.cargo().args(&["test"]).run()?; + build.cargo().args(&["doc"]).run()?; + Ok(()) + })?; + let peak = result.statistics().memory_peak_bytes(); + ``` + +* **BREAKING**: `ProcessOutput::memory_peak_bytes` and the + `ProcessStatistics` type were removed. Use + `BuildResult::statistics().memory_peak_bytes()` (or + `Sandbox::statistics()` when using the sandbox API directly). + +* **BREAKING**: `Command::source_dir_mount_kind` moved to + `SandboxBuilder::source_dir_mount_kind`. The mount kind now applies to + every command spawned in the build, since they share one container: + + ```rust + let sandbox = SandboxBuilder::new() + .source_dir_mount_kind(MountKind::ReadWrite); + build_dir.build(&toolchain, &krate, sandbox).run(|build| { + build.cargo().args(&["test"]).run()?; + Ok(()) + })?; + ``` + + With a writable source mount, mutations from an earlier command + persist into all later commands in the same build (and across reuse + of the source directory by later builds) — only opt in if you trust + every step to leave the source in a sensible state. + +* **BREAKING**: `Command::new_sandboxed` was renamed to + `Command::new_in_sandbox` and now takes an `Rc>` + produced by `SandboxBuilder::start`, instead of a `SandboxBuilder`. + Most callers should use `BuildBuilder::run` instead; the lower-level + form is: + + ```rust + use std::{cell::RefCell, rc::Rc}; + + let sandbox = Rc::new(RefCell::new( + SandboxBuilder::new().start(&workspace, source_dir, target_dir)?, + )); + Command::new_in_sandbox(&workspace, sandbox, "cargo") + .args(&["test"]) + .run()?; + ``` + + By default the command's working directory is the sandbox's source + directory; an explicit `cd(...)` path must point inside the source + directory or it will panic at runtime. + +See https://github.com/rust-lang/rustwide/pull/127 + ## [0.23.1] - 2026-04-19 * extend `ProcessStatistics` struct with some traits & helper functions diff --git a/Cargo.toml b/Cargo.toml index 6be11dd..b0ddd6d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,7 +29,6 @@ tokio = { version = "1.0", features = ["process", "time", "io-util", "rt", "rt-m tokio-stream = { version = "0.1", features = ["io-util"] } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" -scopeguard = "1.0.0" tempfile = "3.0.0" attohttpc = "0.30.1" flate2 = "1" diff --git a/src/build.rs b/src/build.rs index 4d8c95d..4a78294 100644 --- a/src/build.rs +++ b/src/build.rs @@ -1,8 +1,12 @@ -use crate::cmd::{Command, MountKind, Runnable, SandboxBuilder}; -use crate::prepare::Prepare; -use crate::{Crate, PrepareError, Toolchain, Workspace}; +use crate::{ + Crate, PrepareError, Toolchain, Workspace, + cmd::{Command, Runnable, Sandbox, SandboxBuilder, SandboxStatistics, container_dirs}, + prepare::Prepare, +}; +use std::ops::{Deref, DerefMut}; use std::path::PathBuf; use std::vec::Vec; +use std::{cell::RefCell, rc::Rc}; #[derive(Clone)] pub(crate) enum CratePatch { @@ -42,6 +46,38 @@ pub struct BuildBuilder<'a> { patches: Vec, } +/// Output of a completed build together with build-level statistics. +pub struct BuildResult { + output: T, + statistics: SandboxStatistics, +} + +impl BuildResult { + /// Return the wrapped build output. + pub fn into_inner(self) -> T { + self.output + } + + /// Borrow the build-level statistics. + pub fn statistics(&self) -> &SandboxStatistics { + &self.statistics + } +} + +impl Deref for BuildResult { + type Target = T; + + fn deref(&self) -> &Self::Target { + &self.output + } +} + +impl DerefMut for BuildResult { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.output + } +} + impl BuildBuilder<'_> { /// Add a git-based patch to this build. /// Patches get added to the crate's Cargo.toml in the `patch.crates-io` table. @@ -111,6 +147,9 @@ impl BuildBuilder<'_> { /// be provided an instance of [`Build`](struct.Build.html) that allows spawning new processes /// inside the sandbox. /// + /// Returns a [`BuildResult`] containing both the closure's return value and build-level + /// statistics gathered across the sandbox lifetime. + /// /// All the state will be kept on disk as long as the closure doesn't exit: after that things /// might be removed. /// # Example @@ -124,13 +163,17 @@ impl BuildBuilder<'_> { /// # let krate = Crate::local("".as_ref()); /// # let sandbox = SandboxBuilder::new(); /// let mut build_dir = workspace.build_dir("foo"); - /// build_dir.build(&toolchain, &krate, sandbox).run(|build| { + /// let result = build_dir.build(&toolchain, &krate, sandbox).run(|build| { /// build.cargo().args(&["test", "--all"]).run()?; /// Ok(()) /// })?; + /// let _peak = result.statistics().memory_peak_bytes(); /// # Ok(()) /// # } - pub fn run anyhow::Result>(self, f: F) -> anyhow::Result { + pub fn run anyhow::Result>( + self, + f: F, + ) -> anyhow::Result> { self.build_dir .run(self.toolchain, self.krate, self.sandbox, self.patches, f) } @@ -187,7 +230,7 @@ impl BuildDirectory { sandbox: SandboxBuilder, patches: Vec, f: F, - ) -> anyhow::Result { + ) -> anyhow::Result> { let source_dir = self.source_dir(); if source_dir.exists() { crate::utils::remove_dir_all(&source_dir)?; @@ -203,14 +246,23 @@ impl BuildDirectory { })?; std::fs::create_dir_all(self.target_dir())?; + let sandbox = Rc::new(RefCell::new(sandbox.start( + &self.workspace, + source_dir.clone(), + self.target_dir(), + )?)); let res = f(&Build { dir: self, toolchain, - sandbox, + sandbox: sandbox.clone(), })?; + let statistics = sandbox.borrow_mut().cleanup()?; crate::utils::remove_dir_all(&source_dir)?; - Ok(res) + Ok(BuildResult { + output: res, + statistics, + }) } /// Remove all the contents of the build directory, freeing disk space. @@ -241,7 +293,7 @@ impl BuildDirectory { pub struct Build<'ws> { dir: &'ws BuildDirectory, toolchain: &'ws Toolchain, - sandbox: SandboxBuilder, + sandbox: Rc>>, } impl<'ws> Build<'ws> { @@ -251,6 +303,11 @@ impl<'ws> Build<'ws> { /// outside the sandbox. The crate's source directory will be the working directory for the /// command. /// + /// All commands spawned through the same [`Build`] share a single underlying container, so + /// running a sandboxed command from inside another sandboxed command's + /// [`process_lines`](struct.Command.html#method.process_lines) callback is not supported and + /// will panic at runtime. + /// /// # Example /// /// ```no_run @@ -270,17 +327,10 @@ impl<'ws> Build<'ws> { /// # } /// ``` pub fn cmd<'pl, R: Runnable>(&self, bin: R) -> Command<'ws, 'pl> { - let container_dir = &*crate::cmd::container_dirs::TARGET_DIR; + let container_dir = &*container_dirs::TARGET_DIR; - Command::new_sandboxed( - &self.dir.workspace, - self.sandbox - .clone() - .mount(&self.dir.target_dir(), container_dir, MountKind::ReadWrite), - bin, - ) - .cd(self.dir.source_dir()) - .env("CARGO_TARGET_DIR", container_dir) + Command::new_in_sandbox(&self.dir.workspace, self.sandbox.clone(), bin) + .env("CARGO_TARGET_DIR", container_dir) } /// Run `cargo` inside the sandbox, using the toolchain chosen for the build. @@ -310,6 +360,14 @@ impl<'ws> Build<'ws> { self.cmd(self.toolchain.cargo()) } + /// Snapshot the sandbox statistics (e.g. peak memory) gathered so far in + /// this build. The same data is available on the [`BuildResult`] returned + /// from [`BuildBuilder::run`]; this method exposes it mid-build, e.g. for + /// per-step reporting from inside the closure. + pub fn statistics(&self) -> SandboxStatistics { + self.sandbox.borrow().statistics() + } + /// Get the path to the source code on the host machine (outside the sandbox). pub fn host_source_dir(&self) -> PathBuf { self.dir.source_dir() diff --git a/src/cmd/mod.rs b/src/cmd/mod.rs index f55e99c..c0f8456 100644 --- a/src/cmd/mod.rs +++ b/src/cmd/mod.rs @@ -14,12 +14,11 @@ use futures_util::{ }; use log::{error, info}; use process_lines_actions::InnerState; -use std::env::consts::EXE_SUFFIX; use std::ffi::{OsStr, OsString}; -use std::mem; use std::path::{Path, PathBuf}; use std::process::{ExitStatus, Stdio}; use std::time::{Duration, Instant}; +use std::{cell::RefCell, env::consts::EXE_SUFFIX, rc::Rc}; use std::{convert::AsRef, sync::LazyLock}; use tokio::{ io::{AsyncBufReadExt, BufReader}, @@ -200,7 +199,7 @@ impl Runnable for &B { #[allow(clippy::type_complexity)] pub struct Command<'w, 'pl> { workspace: Option<&'w Workspace>, - sandbox: Option, + sandbox: Option>>>, binary: Binary, args: Vec, env: Vec<(OsString, OsString)>, @@ -210,7 +209,6 @@ pub struct Command<'w, 'pl> { no_output_timeout: Option, log_command: bool, log_output: bool, - source_dir_mount_kind: MountKind, } impl<'w> Command<'w, '_> { @@ -219,10 +217,14 @@ impl<'w> Command<'w, '_> { binary.prepare_command(Self::new_inner(binary.name(), Some(workspace), None)) } - /// Create a new, sandboxed command. - pub fn new_sandboxed( + /// Create a new command that runs inside an existing sandbox. + /// + /// By default the command's working directory is the sandbox's source directory; call + /// [`cd`](#method.cd) to override it. Any explicit `cd` path must point inside the + /// sandbox source directory — paths outside it will panic at runtime. + pub fn new_in_sandbox( workspace: &'w Workspace, - sandbox: SandboxBuilder, + sandbox: Rc>>, binary: R, ) -> Self { binary.prepare_command(Self::new_inner( @@ -239,7 +241,7 @@ impl<'w> Command<'w, '_> { fn new_inner( binary: Binary, workspace: Option<&'w Workspace>, - sandbox: Option, + sandbox: Option>>>, ) -> Self { let (timeout, no_output_timeout) = if let Some(workspace) = workspace { ( @@ -261,7 +263,6 @@ impl<'w> Command<'w, '_> { no_output_timeout, log_output: true, log_command: true, - source_dir_mount_kind: MountKind::ReadOnly, } } @@ -310,6 +311,11 @@ impl<'w> Command<'w, '_> { /// Set the function that will be called each time a line is outputted to either the standard /// output or the standard error. Only one function can be set at any time for a command. /// + /// For sandboxed commands, the callback runs while the underlying [`Sandbox`] is mutably + /// borrowed. Spawning another sandboxed command (e.g. via [`Build::cmd`](../build/struct.Build.html#method.cmd)) + /// from inside the callback will panic at runtime — that flow is not supported with the + /// reused-container model. + /// /// The method is useful to analyze the command's output without storing all of it in memory. /// This example builds a crate and detects compiler errors (ICEs): /// @@ -358,27 +364,11 @@ impl<'w> Command<'w, '_> { self } - /// Sets how the source directory is mounted. - /// - /// The default mount kind is read-only. - /// - /// ## Security - /// - /// Be sure you understand the implications of setting this. If you set - /// this to read-write, and the source directory may potentially be - /// reused, then subsequent invocations may see those changes. Beware of - /// trusting those previous invocations or the contents of the source - /// directory. - pub fn source_dir_mount_kind(mut self, mount_kind: MountKind) -> Self { - self.source_dir_mount_kind = mount_kind; - self - } - /// Run the prepared command and return an error if it fails (for example with a non-zero exit /// code or a timeout). - pub fn run(self) -> Result { - let output = self.run_inner(false)?; - Ok(output.statistics) + pub fn run(self) -> Result<(), CommandError> { + self.run_inner(false)?; + Ok(()) } /// Run the prepared command and return its output if it succeedes. If it fails (for example @@ -392,10 +382,7 @@ impl<'w> Command<'w, '_> { } fn run_inner(self, capture: bool) -> Result { - if let Some(mut builder) = self.sandbox { - let workspace = self - .workspace - .expect("sandboxed builds without a workspace are not supported"); + if let Some(sandbox) = self.sandbox { let binary = match self.binary { Binary::Global(path) => path, Binary::ManagedByRustwide(path) => { @@ -404,60 +391,54 @@ impl<'w> Command<'w, '_> { }; let mut cmd = vec![binary.to_string_lossy().as_ref().to_string()]; - for arg in self.args { cmd.push(arg.to_string_lossy().to_string()); } - let source_dir = match self.cd { - Some(path) => path, - None => PathBuf::from("."), - }; - - builder = builder - .mount( - &source_dir, - &container_dirs::WORK_DIR, - self.source_dir_mount_kind, + let workdir = self + .cd + .as_ref() + .map(|path| crate::utils::normalize_path(path)); + let mut env = vec![ + ( + "SOURCE_DIR".to_string(), + container_dirs::WORK_DIR.to_string_lossy().to_string(), + ), + ( + "CARGO_HOME".to_string(), + container_dirs::CARGO_HOME.to_string_lossy().to_string(), + ), + ( + "RUSTUP_HOME".to_string(), + container_dirs::RUSTUP_HOME.to_string_lossy().to_string(), + ), + ]; + env.extend(self.env.iter().map(|(key, value)| { + ( + key.to_string_lossy().to_string(), + value.to_string_lossy().to_string(), ) - .env("SOURCE_DIR", container_dirs::WORK_DIR.to_str().unwrap()) - .workdir(container_dirs::WORK_DIR.to_str().unwrap()) - .cmd(cmd); - - if let Some(user) = native::current_user() { - builder = builder.user(user.user_id, user.group_id); - } - - for (key, value) in self.env { - builder = builder.env( - key.to_string_lossy().as_ref(), - value.to_string_lossy().as_ref(), - ); - } - - builder = builder - .mount( - &workspace.cargo_home(), - &container_dirs::CARGO_HOME, - MountKind::ReadOnly, - ) - .mount( - &workspace.rustup_home(), - &container_dirs::RUSTUP_HOME, - MountKind::ReadOnly, + })); + let user = + native::current_user().map(|user| format!("{}:{}", user.user_id, user.group_id)); + let command = SandboxCommand { + cmd, + env: &env, + workdir: workdir.as_deref().map(|p| p.to_str().unwrap()), + user: user.as_deref(), + }; + sandbox + .try_borrow_mut() + .expect("re-entrant sandboxed commands are not supported") + .run( + command, + self.timeout, + self.no_output_timeout, + self.process_lines, + self.log_output, + self.log_command, + capture, ) - .env("CARGO_HOME", container_dirs::CARGO_HOME.to_str().unwrap()) - .env("RUSTUP_HOME", container_dirs::RUSTUP_HOME.to_str().unwrap()); - - builder.run( - workspace, - self.timeout, - self.no_output_timeout, - self.process_lines, - self.log_output, - self.log_command, - capture, - ) } else { let (binary, managed_by_rustwide) = match self.binary { // global paths should never be normalized @@ -552,50 +533,15 @@ impl From for ProcessOutput { ProcessOutput { stdout: orig.stdout, stderr: orig.stderr, - statistics: ProcessStatistics::default(), } } } -/// collected statistics about the process execution. -#[derive(Debug, Default, Clone)] -#[cfg_attr(test, derive(PartialEq, Eq))] -pub struct ProcessStatistics { - /// peak memory usage in bytes. - /// This is populated for sandboxed commands on systems - /// with cgroups v1/v2. - pub memory_peak: Option, -} - -impl ProcessStatistics { - /// Merge two `ProcessStatistics` into one, following a fixed set of aggregation rules: - /// - /// - `memory_peak`: the maximum of the two values is kept, since a merged peak - /// should reflect the highest peak observed across all runs. If only one side - /// has a value and the other is `None`, that value is used as-is. - pub fn merge(self, other: Self) -> Self { - Self { - memory_peak: match (self.memory_peak, other.memory_peak) { - (Some(a), Some(b)) => Some(a.max(b)), - (a, b) => a.or(b), - }, - } - } - - /// Merge another `ProcessStatistics` into `self` in place. - /// - /// See [`merge`](Self::merge) for the aggregation rules. - pub fn merge_mut(&mut self, other: Self) { - *self = mem::take(self).merge(other); - } -} - /// Output of a [`Command`](struct.Command.html) when it was executed with the /// [`run_capture`](struct.Command.html#method.run_capture) method. pub struct ProcessOutput { stdout: Vec, stderr: Vec, - statistics: ProcessStatistics, } impl ProcessOutput { @@ -608,14 +554,6 @@ impl ProcessOutput { pub fn stderr_lines(&self) -> &[String] { &self.stderr } - - /// Return the peak memory usage in bytes of the sandbox container, if available. - /// - /// This is populated for sandboxed commands on systems with cgroups v2. Returns `None` for - /// non-sandboxed commands or when the metric could not be read. - pub fn memory_peak_bytes(&self) -> Option { - self.statistics.memory_peak - } } enum OutputKind { @@ -757,43 +695,3 @@ fn exe_suffix(file: &OsStr) -> OsString { path.push(EXE_SUFFIX); path } - -#[cfg(test)] -mod tests { - use super::ProcessStatistics; - use test_case::test_case; - - const fn stats(peak: Option) -> ProcessStatistics { - ProcessStatistics { memory_peak: peak } - } - - #[test_case(stats(None), stats(None), stats(None))] - #[test_case(stats(Some(100)), stats(None), stats(Some(100)))] - #[test_case(stats(None), stats(Some(100)), stats(Some(100)))] - #[test_case(stats(Some(300)), stats(Some(100)), stats(Some(300)))] - #[test_case(stats(Some(100)), stats(Some(300)), stats(Some(300)))] - #[test_case(stats(Some(42)), stats(Some(42)), stats(Some(42)))] - fn test_merge(lhs: ProcessStatistics, rhs: ProcessStatistics, expected: ProcessStatistics) { - { - let lhs = lhs.clone(); - let rhs = rhs.clone(); - assert_eq!(lhs.merge(rhs), expected); - } - - { - let mut lhs = lhs.clone(); - lhs.merge_mut(rhs); - assert_eq!(lhs, expected); - } - } - - #[test] - fn merge_mut_accumulate_over_multiple() { - let mut s = stats(None); - s.merge_mut(stats(Some(50))); - s.merge_mut(stats(Some(200))); - s.merge_mut(stats(None)); - s.merge_mut(stats(Some(150))); - assert_eq!(s.memory_peak, Some(200)); - } -} diff --git a/src/cmd/sandbox.rs b/src/cmd/sandbox.rs index e0b97d1..29ea4eb 100644 --- a/src/cmd/sandbox.rs +++ b/src/cmd/sandbox.rs @@ -1,10 +1,12 @@ -use crate::Workspace; -use crate::cmd::{Command, CommandError, ProcessLinesActions, ProcessOutput, ProcessStatistics}; +use crate::{ + Workspace, + cmd::{Command, CommandError, ProcessLinesActions, ProcessOutput, container_dirs}, +}; use log::{error, info}; use serde::Deserialize; use std::{ - error::Error, - fmt, + cell::Cell, + fmt, mem, ops::RangeInclusive, path::{Path, PathBuf}, time::Duration, @@ -76,7 +78,7 @@ impl SandboxImage { } /// Whether to mount a path in the sandbox with write permissions or not. -#[derive(Copy, Clone, PartialEq, Eq)] +#[derive(Copy, Clone, PartialEq, Eq, Debug)] #[non_exhaustive] pub enum MountKind { /// Allow the sandboxed code to change the mounted data. @@ -140,33 +142,79 @@ impl MountConfig { } } -/// The sandbox builder allows to configure a sandbox, used later in a -/// [`Command`](struct.Command.html). +/// The sandbox builder allows configuring a [`Sandbox`]. +/// +/// Call [`SandboxBuilder::start`] to create a live sandbox, then run commands +/// inside it with [`Command::new_in_sandbox`](struct.Command.html#method.new_in_sandbox). #[derive(Clone)] pub struct SandboxBuilder { mounts: Vec, - env: Vec<(String, String)>, + source_dir_mount_kind: MountKind, memory_limit: Option, cpu_limit: Option, cpuset_cpus: Option>, - workdir: Option, - user: Option, - cmd: Vec, enable_networking: bool, } +/// Statistics collected for a sandbox. +#[derive(Debug, Default, Clone, PartialEq, Eq)] +pub struct SandboxStatistics { + memory_peak: Option, +} + +impl SandboxStatistics { + /// Return the peak memory usage in bytes observed across the whole sandbox, if available. + pub fn memory_peak_bytes(&self) -> Option { + self.memory_peak + } + + /// Merge two `SandboxStatistics` into one, keeping the highest observed peak memory. + pub fn merge(self, other: Self) -> Self { + Self { + memory_peak: match (self.memory_peak, other.memory_peak) { + (Some(a), Some(b)) => Some(a.max(b)), + (a, b) => a.or(b), + }, + } + } + + /// Merge another `SandboxStatistics` into `self` in place. + pub fn merge_mut(&mut self, other: Self) { + *self = mem::take(self).merge(other); + } +} + +/// A live sandbox that can execute one or more commands. +/// +/// Sandboxes are returned already started by [`SandboxBuilder::start`] and +/// can be reused across multiple commands. If a command exhausts the +/// container's memory limit and kills the container, the next command +/// transparently recreates it. +pub struct Sandbox<'w> { + workspace: &'w Workspace, + builder: SandboxBuilder, + source_dir: PathBuf, + target_dir: PathBuf, + container: Option>, + memory_peak: Cell>, +} + +pub(crate) struct SandboxCommand<'a> { + pub(crate) cmd: Vec, + pub(crate) env: &'a [(String, String)], + pub(crate) workdir: Option<&'a str>, + pub(crate) user: Option<&'a str>, +} + impl SandboxBuilder { /// Create a new sandbox builder. pub fn new() -> Self { Self { mounts: Vec::new(), - env: Vec::new(), - workdir: None, + source_dir_mount_kind: MountKind::ReadOnly, memory_limit: None, cpu_limit: None, cpuset_cpus: None, - user: None, - cmd: Vec::new(), enable_networking: true, } } @@ -182,6 +230,24 @@ impl SandboxBuilder { self } + /// Sets how the source directory is mounted for reusable sandbox commands. + /// + /// The default mount kind is read-only. + /// + /// ## Security + /// + /// Be sure you understand the implications of setting this. The same container + /// backs every command spawned inside a single + /// [`BuildBuilder::run`](../build/struct.BuildBuilder.html#method.run) closure, so with + /// `MountKind::ReadWrite` any mutation made by an earlier command persists into all + /// later commands in that build — and across reuse of the same source directory by + /// later builds. Do not trust the source directory's contents to be untouched if you + /// opt in to a writable mount. + pub fn source_dir_mount_kind(mut self, mount_kind: MountKind) -> Self { + self.source_dir_mount_kind = mount_kind; + self + } + /// Enable or disable the sandbox's memory limit. When the processes inside the sandbox use /// more memory than the limit the sandbox will be killed. /// @@ -220,24 +286,36 @@ impl SandboxBuilder { self } - pub(super) fn env, S2: Into>(mut self, key: S1, value: S2) -> Self { - self.env.push((key.into(), value.into())); - self - } - - pub(super) fn cmd(mut self, cmd: Vec) -> Self { - self.cmd = cmd; - self - } - - pub(super) fn workdir>(mut self, workdir: S) -> Self { - self.workdir = Some(workdir.into()); - self + /// Start a live sandbox from this configuration. + /// + /// The returned sandbox can be used to run one or more commands against a + /// fixed source directory and target directory. The underlying container is + /// created and started before this returns, so any docker errors surface here + /// rather than on the first command. + pub fn start<'w>( + self, + workspace: &'w Workspace, + source_dir: PathBuf, + target_dir: PathBuf, + ) -> Result, CommandError> { + let source_dir = crate::utils::normalize_path(&source_dir); + let target_dir = crate::utils::normalize_path(&target_dir); + let container = Sandbox::create_container(&self, workspace, &source_dir, &target_dir)?; + Ok(Sandbox { + workspace, + builder: self, + source_dir, + target_dir, + container: Some(container), + memory_peak: Cell::new(None), + }) } - pub(super) fn user(mut self, user: u32, group: u32) -> Self { - self.user = Some(format!("{user}:{group}")); - self + fn create_started(self, workspace: &Workspace) -> Result, CommandError> { + let container = self.create(workspace)?; + container.start()?; + container.record_oom_kill_count(); + Ok(container) } fn create(self, workspace: &Workspace) -> Result, CommandError> { @@ -295,51 +373,13 @@ impl SandboxBuilder { .run_capture() .map_err(|err| CommandError::SandboxContainerCreate(Box::new(err)))?; Ok(Container { - id: out.stdout_lines()[0].clone(), + id: Some(out.stdout_lines()[0].clone()), workspace, - cmd: self.cmd, - env: self.env, - workdir: self.workdir, - user: self.user, + running: Cell::new(true), + oom_killed: Cell::new(false), + oom_kill_count: Cell::new(None), }) } - - #[allow(clippy::too_many_arguments)] - #[allow(clippy::type_complexity)] - pub(super) fn run( - self, - workspace: &Workspace, - timeout: Option, - no_output_timeout: Option, - process_lines: Option<&mut dyn FnMut(&str, &mut ProcessLinesActions)>, - log_output: bool, - log_command: bool, - capture: bool, - ) -> Result { - let container = self.create(workspace)?; - - // Ensure the container is properly deleted even if something panics - scopeguard::defer! {{ - if let Err(err) = container.delete() { - error!("failed to delete container {}", container.id); - error!("caused by: {err}"); - let mut err: &dyn Error = &err; - while let Some(cause) = err.source() { - error!("caused by: {cause}"); - err = cause; - } - } - }} - - container.run( - timeout, - no_output_timeout, - process_lines, - log_output, - log_command, - capture, - ) - } } #[derive(Deserialize)] @@ -352,30 +392,39 @@ struct InspectContainer { struct InspectState { #[serde(rename = "OOMKilled")] oom_killed: bool, + #[serde(rename = "Running")] + running: bool, } -#[derive(Clone)] struct Container<'w> { - // Docker container ID - id: String, + /// Docker container ID. `Some` while the container is live; `take`n by a + /// successful [`Container::delete`] so that [`Drop`] knows there's + /// nothing left to remove. + id: Option, workspace: &'w Workspace, - // Command-level config for `docker exec` (not baked into `docker create`) - cmd: Vec, - env: Vec<(String, String)>, - workdir: Option, - user: Option, + running: Cell, + oom_killed: Cell, + oom_kill_count: Cell>, +} + +impl Container<'_> { + fn id(&self) -> &str { + self.id + .as_deref() + .expect("container has already been deleted") + } } impl fmt::Display for Container<'_> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - self.id.fmt(f) + self.id().fmt(f) } } impl Container<'_> { fn inspect(&self) -> Result { let output = Command::new(self.workspace, "docker") - .args(&["inspect", &self.id]) + .args(&["inspect", self.id()]) .log_output(false) .run_capture()?; @@ -389,16 +438,7 @@ impl Container<'_> { /// Start the container in detached mode (without `-a`). fn start(&self) -> Result<(), CommandError> { Command::new(self.workspace, "docker") - .args(&["start", &self.id]) - .log_output(false) - .run() - .map(|_| ()) - } - - /// Stop a running container. Uses `-t 1` to give `sleep infinity` a short grace period. - fn stop(&self) -> Result<(), CommandError> { - Command::new(self.workspace, "docker") - .args(&["stop", "-t", "1", &self.id]) + .args(&["start", self.id()]) .log_output(false) .run() .map(|_| ()) @@ -407,7 +447,7 @@ impl Container<'_> { /// Helper to `docker exec cat ` and return stdout lines on success. fn exec_cat_file(&self, path: &str) -> Option> { Command::new(self.workspace, "docker") - .args(&["exec", &self.id, "cat", path]) + .args(&["exec", self.id(), "cat", path]) .log_output(false) .log_command(false) .run_capture() @@ -415,6 +455,10 @@ impl Container<'_> { .map(|o| o.stdout_lines().to_vec()) } + fn record_oom_kill_count(&self) { + self.oom_kill_count.set(self.read_oom_kill_count()); + } + /// Best-effort read of peak memory usage from the still-running container. /// Tries cgroups v2 first, then falls back to cgroups v1. fn read_memory_peak(&self) -> Option { @@ -439,7 +483,7 @@ impl Container<'_> { /// while `sleep infinity` (PID 1) survives. In that case `docker inspect` won't /// report `OOMKilled`, so we check the cgroup events directly. /// Tries cgroups v2 first, then falls back to cgroups v1. - fn check_cgroup_oom(&self) -> bool { + fn read_oom_kill_count(&self) -> Option { // Both v1 and v2 expose `oom_kill ` — just in different files. let paths = [ "/sys/fs/cgroup/memory.events", // v2 @@ -447,24 +491,45 @@ impl Container<'_> { ]; for path in paths { if let Some(lines) = self.exec_cat_file(path) { - let found = lines.iter().any(|line| { - line.strip_prefix("oom_kill ") + for line in &lines { + if let Some(count) = line + .strip_prefix("oom_kill ") .and_then(|rest| rest.trim().parse::().ok()) - .is_some_and(|count| count > 0) - }); - if found { - return true; + { + return Some(count); + } } - // File existed but no OOM — don't try the other version - return false; + return Some(0); } } - false + None + } + + fn check_cgroup_oom(&self) -> bool { + let current = self.read_oom_kill_count(); + let previous = self.oom_kill_count.replace(current); + + current.unwrap_or_default() > previous.unwrap_or_default() + } + + fn check_container_oom(&self, details: &InspectContainer) -> bool { + self.running.set(details.state.running); + // `OOMKilled` can stay true after the first failure. Treat it as an + // edge-triggered signal so later commands in the same container don't + // keep being reported as fresh OOMs. + let previous = self.oom_killed.replace(details.state.oom_killed); + details.state.oom_killed && !previous + } + + fn is_running(&self) -> bool { + self.running.get() } - #[allow(clippy::type_complexity)] - fn run( + #[allow(clippy::too_many_arguments, clippy::type_complexity)] + fn run_command( &self, + command: SandboxCommand<'_>, + record_memory_peak: impl FnOnce(Option), timeout: Option, no_output_timeout: Option, process_lines: Option<&mut dyn FnMut(&str, &mut ProcessLinesActions)>, @@ -472,29 +537,26 @@ impl Container<'_> { log_command: bool, capture: bool, ) -> Result { - // Start the container in detached mode (runs `sleep infinity`) - self.start()?; - // Build the `docker exec` command with env/workdir/user from the sandbox config let mut args: Vec = vec!["exec".into()]; - for (var, value) in &self.env { + for (var, value) in command.env { args.push("-e".into()); args.push(format!("{var}={value}")); } - if let Some(ref workdir) = self.workdir { + if let Some(workdir) = command.workdir { args.push("-w".into()); - args.push(workdir.clone()); + args.push(workdir.to_string()); } - if let Some(ref user) = self.user { + if let Some(user) = command.user { args.push("--user".into()); - args.push(user.clone()); + args.push(user.to_string()); } - args.push(self.id.clone()); - args.extend(self.cmd.iter().cloned()); + args.push(self.id().to_string()); + args.extend(command.cmd.iter().cloned()); let mut cmd = Command::new(self.workspace, "docker") .args(&args) @@ -511,36 +573,177 @@ impl Container<'_> { // Read peak memory usage while the container is still running (best-effort) let memory_peak = self.read_memory_peak(); + record_memory_peak(memory_peak); // Check OOM via cgroup events (catches cases where only the exec'd process // was killed, leaving the container's init process alive) let cgroup_oom = self.check_cgroup_oom(); - // Explicitly stop the container now that we're done reading metrics. - // The scopeguard will still call `docker rm -f` for final cleanup. - let _ = self.stop(); - let details = self.inspect()?; + let container_oom = self.check_container_oom(&details); // Return a different error if the container was killed due to an OOM - if details.state.oom_killed || cgroup_oom { + if container_oom || cgroup_oom { Err(match res { Ok(_) | Err(CommandError::ExecutionFailed { .. }) => CommandError::SandboxOOM, Err(err) => err, }) } else { - res.map(|mut output| { - output.statistics = ProcessStatistics { memory_peak }; - output - }) + res } } - fn delete(&self) -> Result<(), CommandError> { - Command::new(self.workspace, "docker") - .args(&["rm", "-f", &self.id]) + /// Run `docker rm -f` for this container, idempotently. On success the + /// stored id is taken (so subsequent calls — including the one in + /// [`Drop`] — are no-ops). On failure the id is restored so [`Drop`] + /// (or a later call) can retry. + fn delete(&mut self) -> Result<(), CommandError> { + let Some(id) = self.id.take() else { + return Ok(()); + }; + if let Err(err) = Command::new(self.workspace, "docker") + .args(&["rm", "-f", &id]) .run() - .map(|_| ()) + { + self.id = Some(id); + return Err(err); + } + Ok(()) + } +} + +impl Drop for Container<'_> { + fn drop(&mut self) { + if let Err(err) = self.delete() { + error!( + "docker rm failed, leaked sandbox container {}:\n{:?}", + self.id.as_deref().unwrap_or_default(), + err + ); + } + } +} + +impl<'w> Sandbox<'w> { + fn update_memory_peak(peak: &Cell>, memory_peak: Option) { + let updated = match (peak.get(), memory_peak) { + (Some(lhs), Some(rhs)) => Some(lhs.max(rhs)), + (lhs, rhs) => lhs.or(rhs), + }; + peak.set(updated); + } + + /// Return the statistics gathered across the sandbox lifetime so far. + pub fn statistics(&self) -> SandboxStatistics { + SandboxStatistics { + memory_peak: self.memory_peak.get(), + } + } + + pub(crate) fn container_workdir(&self, path: &Path) -> Option { + let relative = path.strip_prefix(&self.source_dir).ok()?; + Some(container_dirs::WORK_DIR.join(relative)) + } + + fn create_container( + builder: &SandboxBuilder, + workspace: &'w Workspace, + source_dir: &Path, + target_dir: &Path, + ) -> Result, CommandError> { + builder + .clone() + .mount( + source_dir, + &container_dirs::WORK_DIR, + builder.source_dir_mount_kind, + ) + .mount( + target_dir, + &container_dirs::TARGET_DIR, + MountKind::ReadWrite, + ) + .mount( + &workspace.cargo_home(), + &container_dirs::CARGO_HOME, + MountKind::ReadOnly, + ) + .mount( + &workspace.rustup_home(), + &container_dirs::RUSTUP_HOME, + MountKind::ReadOnly, + ) + .create_started(workspace) + } + + fn ensure_reusable_container(&mut self) -> Result<(), CommandError> { + // The container can be stopped if a previous command got OOM-killed + // at the container level, or missing after an explicit `cleanup()`. + // Either way, recreate before attempting another `docker exec`. + // Assigning the new value drops the old `Container`, whose `Drop` + // impl runs `docker rm`. + let needs_recreate = self + .container + .as_ref() + .is_none_or(|container| !container.is_running()); + if needs_recreate { + self.container = Some(Self::create_container( + &self.builder, + self.workspace, + &self.source_dir, + &self.target_dir, + )?); + } + + Ok(()) + } + + #[allow(clippy::too_many_arguments, clippy::type_complexity)] + pub(crate) fn run( + &mut self, + command: SandboxCommand<'_>, + timeout: Option, + no_output_timeout: Option, + process_lines: Option<&mut dyn FnMut(&str, &mut ProcessLinesActions)>, + log_output: bool, + log_command: bool, + capture: bool, + ) -> Result { + let container_workdir = match command.workdir { + Some(workdir) => self + .container_workdir(Path::new(workdir)) + .expect("explicit workdir must be inside the sandbox source directory"), + None => container_dirs::WORK_DIR.clone(), + }; + let command = SandboxCommand { + workdir: Some(container_workdir.to_str().unwrap()), + ..command + }; + self.ensure_reusable_container()?; + let container = self.container.as_ref().unwrap(); + let peak = &self.memory_peak; + container.run_command( + command, + |memory_peak| Self::update_memory_peak(peak, memory_peak), + timeout, + no_output_timeout, + process_lines, + log_output, + log_command, + capture, + ) + } + + /// Remove the live container owned by this sandbox and return the final + /// statistics. Returns an error if `docker rm` fails; the container is + /// also torn down by [`Drop`] as a fallback if this method is not called + /// (or if its `docker rm` failed and a retry on drop succeeds). + pub fn cleanup(&mut self) -> Result { + if let Some(container) = self.container.as_mut() { + container.delete()?; + } + self.container = None; + Ok(self.statistics()) } } @@ -565,9 +768,44 @@ fn format_cpuset_cpus(cpus: &RangeInclusive) -> String { #[cfg(test)] mod tests { use super::*; + use test_case::test_case; #[test] fn formats_cpuset_cpus() { assert_eq!(format_cpuset_cpus(&(2..=4)), "2-4"); } + + const fn stats(peak: Option) -> SandboxStatistics { + SandboxStatistics { memory_peak: peak } + } + + #[test_case(stats(None), stats(None), stats(None))] + #[test_case(stats(Some(100)), stats(None), stats(Some(100)))] + #[test_case(stats(None), stats(Some(100)), stats(Some(100)))] + #[test_case(stats(Some(300)), stats(Some(100)), stats(Some(300)))] + #[test_case(stats(Some(100)), stats(Some(300)), stats(Some(300)))] + #[test_case(stats(Some(42)), stats(Some(42)), stats(Some(42)))] + fn test_merge(lhs: SandboxStatistics, rhs: SandboxStatistics, expected: SandboxStatistics) { + { + let lhs = lhs.clone(); + let rhs = rhs.clone(); + assert_eq!(lhs.merge(rhs), expected); + } + + { + let mut lhs = lhs.clone(); + lhs.merge_mut(rhs); + assert_eq!(lhs, expected); + } + } + + #[test] + fn merge_mut_accumulate_over_multiple() { + let mut s = stats(None); + s.merge_mut(stats(Some(50))); + s.merge_mut(stats(Some(200))); + s.merge_mut(stats(None)); + s.merge_mut(stats(Some(150))); + assert_eq!(s.memory_peak, Some(200)); + } } diff --git a/src/lib.rs b/src/lib.rs index 9d7a653..3f6b2c7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -32,7 +32,8 @@ mod tools; mod utils; mod workspace; -pub use crate::build::{Build, BuildBuilder, BuildDirectory}; +pub use crate::build::{Build, BuildBuilder, BuildDirectory, BuildResult}; +pub use crate::cmd::SandboxStatistics; #[cfg(feature = "alternate-registries")] pub use crate::crates::AlternativeRegistry; pub use crate::crates::Crate; diff --git a/tests/buildtest/container_cleanup.rs b/tests/buildtest/container_cleanup.rs index 416902c..60c44a9 100644 --- a/tests/buildtest/container_cleanup.rs +++ b/tests/buildtest/container_cleanup.rs @@ -24,6 +24,99 @@ fn test_container_cleanup_on_success() { }); } +#[test] +fn test_container_reused_across_commands() { + super::runner::run("hello-world", |run| { + let container_ids = run.run(SandboxBuilder::new().enable_networking(false), |build| { + let first = build.cmd("cat").args(&["/etc/hostname"]).run_capture()?; + let second = build.cmd("cat").args(&["/etc/hostname"]).run_capture()?; + + Ok(vec![ + first.stdout_lines()[0].trim().to_string(), + second.stdout_lines()[0].trim().to_string(), + ]) + })?; + + assert_eq!(container_ids.len(), 2); + assert_eq!(container_ids[0], container_ids[1]); + assert!( + !container_ids[0].is_empty(), + "should capture a container ID" + ); + assert_container_stopped_and_removed(&container_ids[0]); + Ok(()) + }); +} + +#[test] +#[cfg(not(windows))] +fn test_container_recreated_when_previous_dies() { + super::runner::run("hello-world", |run| { + let container_ids = run.run(SandboxBuilder::new().enable_networking(false), |build| { + // Capture the original container's short ID via /etc/hostname. + let first = build.cmd("cat").args(&["/etc/hostname"]).run_capture()?; + let first_id = first.stdout_lines()[0].trim().to_string(); + assert!(!first_id.is_empty(), "should capture a container ID"); + + // Simulate a whole-container OOM (PID 1 killed) by stopping the + // container externally. The next `docker exec` will fail, and + // the inspect that follows refreshes the sandbox's cached + // running flag so the *next* command recreates the container. + let killed = std::process::Command::new("docker") + .args(["kill", &first_id]) + .output() + .expect("failed to spawn docker kill"); + assert!(killed.status.success(), "docker kill failed: {killed:?}"); + + // First command after the kill detects the dead state; its + // result is not load-bearing here. + let _ = build.cmd("cat").args(&["/etc/hostname"]).run_capture(); + + // The next command should transparently run in a fresh container. + let second = build.cmd("cat").args(&["/etc/hostname"]).run_capture()?; + let second_id = second.stdout_lines()[0].trim().to_string(); + assert_ne!( + first_id, second_id, + "expected a new container after the previous one died" + ); + + Ok(vec![first_id, second_id]) + })?; + + // Both the killed-and-replaced container and the fresh one should be + // gone after the build finishes. + for id in container_ids.iter() { + assert_container_stopped_and_removed(id); + } + Ok(()) + }); +} + +#[test] +#[cfg(not(windows))] +fn test_reused_container_oom_does_not_poison_later_commands() { + use rustwide::cmd::CommandError; + + super::runner::run("allocate", |run| { + run.run( + SandboxBuilder::new() + .enable_networking(false) + .memory_limit(Some(512 * 1024 * 1024)), + |build| { + let first = build.cargo().args(&["run", "--", "1024"]).run(); + assert!( + matches!(first, Err(CommandError::SandboxOOM)), + "expected first command to OOM, got {first:?}" + ); + + build.cmd("true").run()?; + Ok(()) + }, + )?; + Ok(()) + }); +} + #[test] fn test_container_cleanup_on_command_failure() { super::runner::run("hello-world", |run| { diff --git a/tests/buildtest/mod.rs b/tests/buildtest/mod.rs index b2b61b9..7687446 100644 --- a/tests/buildtest/mod.rs +++ b/tests/buildtest/mod.rs @@ -173,18 +173,20 @@ fn test_memory_peak() { .enable_networking(false) .memory_limit(Some(512 * 1024 * 1024)), |build| { - let output = build.cargo().args(&["run", "--", "400"]).run_capture()?; - let peak = output.memory_peak_bytes().expect("missing memory peak"); - - assert!( - peak > 400_000_000 && peak < 450_000_000, - "memory peak roughly be 400 MiB, but it is {}", - peak - ); - + build.cargo().args(&["run", "--", "400"]).run_capture()?; Ok(()) }, - )?; + )? + .statistics() + .memory_peak_bytes() + .inspect(|peak| { + assert!( + *peak > 400_000_000 && *peak < 450_000_000, + "memory peak roughly be 400 MiB, but it is {}", + peak + ); + }) + .expect("missing memory peak"); Ok(()) }); } diff --git a/tests/buildtest/runner.rs b/tests/buildtest/runner.rs index 074f699..79702f1 100644 --- a/tests/buildtest/runner.rs +++ b/tests/buildtest/runner.rs @@ -1,5 +1,7 @@ use rand::{RngExt as _, distr::Alphanumeric}; -use rustwide::{Build, BuildBuilder, Crate, Toolchain, Workspace, cmd::SandboxBuilder}; +use rustwide::{ + Build, BuildBuilder, BuildResult, Crate, Toolchain, Workspace, cmd::SandboxBuilder, +}; use std::path::Path; pub(crate) fn run(crate_name: &str, f: impl FnOnce(&mut Runner) -> anyhow::Result<()>) { @@ -58,7 +60,7 @@ impl Runner { &self, sandbox: SandboxBuilder, f: impl FnOnce(&Build) -> anyhow::Result, - ) -> anyhow::Result { + ) -> anyhow::Result> { self.build(sandbox, |builder| builder.run(f)) } } diff --git a/tests/integration/crates_git.rs b/tests/integration/crates_git.rs index 5b1f91b..0ac6d7d 100644 --- a/tests/integration/crates_git.rs +++ b/tests/integration/crates_git.rs @@ -25,6 +25,7 @@ fn test_fetch() -> anyhow::Result<()> { .stdout_lines()[0] .to_string()) }) + .map(|result| result.into_inner()) }; // Check if the initial commit was fetched