diff options
Diffstat (limited to '')
-rw-r--r-- | crates/cargo-util/src/process_builder.rs | 689 |
1 files changed, 689 insertions, 0 deletions
diff --git a/crates/cargo-util/src/process_builder.rs b/crates/cargo-util/src/process_builder.rs new file mode 100644 index 0000000..76392f2 --- /dev/null +++ b/crates/cargo-util/src/process_builder.rs @@ -0,0 +1,689 @@ +use crate::process_error::ProcessError; +use crate::read2; + +use anyhow::{bail, Context, Result}; +use jobserver::Client; +use shell_escape::escape; +use tempfile::NamedTempFile; + +use std::collections::BTreeMap; +use std::env; +use std::ffi::{OsStr, OsString}; +use std::fmt; +use std::io::{self, Write}; +use std::iter::once; +use std::path::Path; +use std::process::{Command, ExitStatus, Output, Stdio}; + +/// A builder object for an external process, similar to [`std::process::Command`]. +#[derive(Clone, Debug)] +pub struct ProcessBuilder { + /// The program to execute. + program: OsString, + /// A list of arguments to pass to the program. + args: Vec<OsString>, + /// Any environment variables that should be set for the program. + env: BTreeMap<String, Option<OsString>>, + /// The directory to run the program from. + cwd: Option<OsString>, + /// A list of wrappers that wrap the original program when calling + /// [`ProcessBuilder::wrapped`]. The last one is the outermost one. + wrappers: Vec<OsString>, + /// The `make` jobserver. See the [jobserver crate] for + /// more information. + /// + /// [jobserver crate]: https://docs.rs/jobserver/ + jobserver: Option<Client>, + /// `true` to include environment variable in display. + display_env_vars: bool, + /// `true` to retry with an argfile if hitting "command line too big" error. + /// See [`ProcessBuilder::retry_with_argfile`] for more information. + retry_with_argfile: bool, + /// Data to write to stdin. + stdin: Option<Vec<u8>>, +} + +impl fmt::Display for ProcessBuilder { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "`")?; + + if self.display_env_vars { + for (key, val) in self.env.iter() { + if let Some(val) = val { + let val = escape(val.to_string_lossy()); + if cfg!(windows) { + write!(f, "set {}={}&& ", key, val)?; + } else { + write!(f, "{}={} ", key, val)?; + } + } + } + } + + write!(f, "{}", self.get_program().to_string_lossy())?; + + for arg in self.get_args() { + write!(f, " {}", escape(arg.to_string_lossy()))?; + } + + write!(f, "`") + } +} + +impl ProcessBuilder { + /// Creates a new [`ProcessBuilder`] with the given executable path. + pub fn new<T: AsRef<OsStr>>(cmd: T) -> ProcessBuilder { + ProcessBuilder { + program: cmd.as_ref().to_os_string(), + args: Vec::new(), + cwd: None, + env: BTreeMap::new(), + wrappers: Vec::new(), + jobserver: None, + display_env_vars: false, + retry_with_argfile: false, + stdin: None, + } + } + + /// (chainable) Sets the executable for the process. + pub fn program<T: AsRef<OsStr>>(&mut self, program: T) -> &mut ProcessBuilder { + self.program = program.as_ref().to_os_string(); + self + } + + /// (chainable) Adds `arg` to the args list. + pub fn arg<T: AsRef<OsStr>>(&mut self, arg: T) -> &mut ProcessBuilder { + self.args.push(arg.as_ref().to_os_string()); + self + } + + /// (chainable) Adds multiple `args` to the args list. + pub fn args<T: AsRef<OsStr>>(&mut self, args: &[T]) -> &mut ProcessBuilder { + self.args + .extend(args.iter().map(|t| t.as_ref().to_os_string())); + self + } + + /// (chainable) Replaces the args list with the given `args`. + pub fn args_replace<T: AsRef<OsStr>>(&mut self, args: &[T]) -> &mut ProcessBuilder { + if let Some(program) = self.wrappers.pop() { + // User intend to replace all args, so we + // - use the outermost wrapper as the main program, and + // - cleanup other inner wrappers. + self.program = program; + self.wrappers = Vec::new(); + } + self.args = args.iter().map(|t| t.as_ref().to_os_string()).collect(); + self + } + + /// (chainable) Sets the current working directory of the process. + pub fn cwd<T: AsRef<OsStr>>(&mut self, path: T) -> &mut ProcessBuilder { + self.cwd = Some(path.as_ref().to_os_string()); + self + } + + /// (chainable) Sets an environment variable for the process. + pub fn env<T: AsRef<OsStr>>(&mut self, key: &str, val: T) -> &mut ProcessBuilder { + self.env + .insert(key.to_string(), Some(val.as_ref().to_os_string())); + self + } + + /// (chainable) Unsets an environment variable for the process. + pub fn env_remove(&mut self, key: &str) -> &mut ProcessBuilder { + self.env.insert(key.to_string(), None); + self + } + + /// Gets the executable name. + pub fn get_program(&self) -> &OsString { + self.wrappers.last().unwrap_or(&self.program) + } + + /// Gets the program arguments. + pub fn get_args(&self) -> impl Iterator<Item = &OsString> { + self.wrappers + .iter() + .rev() + .chain(once(&self.program)) + .chain(self.args.iter()) + .skip(1) // Skip the main `program + } + + /// Gets the current working directory for the process. + pub fn get_cwd(&self) -> Option<&Path> { + self.cwd.as_ref().map(Path::new) + } + + /// Gets an environment variable as the process will see it (will inherit from environment + /// unless explicitally unset). + pub fn get_env(&self, var: &str) -> Option<OsString> { + self.env + .get(var) + .cloned() + .or_else(|| Some(env::var_os(var))) + .and_then(|s| s) + } + + /// Gets all environment variables explicitly set or unset for the process (not inherited + /// vars). + pub fn get_envs(&self) -> &BTreeMap<String, Option<OsString>> { + &self.env + } + + /// Sets the `make` jobserver. See the [jobserver crate][jobserver_docs] for + /// more information. + /// + /// [jobserver_docs]: https://docs.rs/jobserver/0.1.6/jobserver/ + pub fn inherit_jobserver(&mut self, jobserver: &Client) -> &mut Self { + self.jobserver = Some(jobserver.clone()); + self + } + + /// Enables environment variable display. + pub fn display_env_vars(&mut self) -> &mut Self { + self.display_env_vars = true; + self + } + + /// Enables retrying with an argfile if hitting "command line too big" error + /// + /// This is primarily for the `@path` arg of rustc and rustdoc, which treat + /// each line as an command-line argument, so `LF` and `CRLF` bytes are not + /// valid as an argument for argfile at this moment. + /// For example, `RUSTDOCFLAGS="--crate-version foo\nbar" cargo doc` is + /// valid when invoking from command-line but not from argfile. + /// + /// To sum up, the limitations of the argfile are: + /// + /// - Must be valid UTF-8 encoded. + /// - Must not contain any newlines in each argument. + /// + /// Ref: + /// + /// - <https://doc.rust-lang.org/rustdoc/command-line-arguments.html#path-load-command-line-flags-from-a-path> + /// - <https://doc.rust-lang.org/rustc/command-line-arguments.html#path-load-command-line-flags-from-a-path> + pub fn retry_with_argfile(&mut self, enabled: bool) -> &mut Self { + self.retry_with_argfile = enabled; + self + } + + /// Sets a value that will be written to stdin of the process on launch. + pub fn stdin<T: Into<Vec<u8>>>(&mut self, stdin: T) -> &mut Self { + self.stdin = Some(stdin.into()); + self + } + + fn should_retry_with_argfile(&self, err: &io::Error) -> bool { + self.retry_with_argfile && imp::command_line_too_big(err) + } + + /// Like [`Command::status`] but with a better error message. + pub fn status(&self) -> Result<ExitStatus> { + self._status() + .with_context(|| ProcessError::could_not_execute(self)) + } + + fn _status(&self) -> io::Result<ExitStatus> { + if !debug_force_argfile(self.retry_with_argfile) { + let mut cmd = self.build_command(); + match cmd.spawn() { + Err(ref e) if self.should_retry_with_argfile(e) => {} + Err(e) => return Err(e), + Ok(mut child) => return child.wait(), + } + } + let (mut cmd, argfile) = self.build_command_with_argfile()?; + let status = cmd.spawn()?.wait(); + close_tempfile_and_log_error(argfile); + status + } + + /// Runs the process, waiting for completion, and mapping non-success exit codes to an error. + pub fn exec(&self) -> Result<()> { + let exit = self.status()?; + if exit.success() { + Ok(()) + } else { + Err(ProcessError::new( + &format!("process didn't exit successfully: {}", self), + Some(exit), + None, + ) + .into()) + } + } + + /// Replaces the current process with the target process. + /// + /// On Unix, this executes the process using the Unix syscall `execvp`, which will block + /// this process, and will only return if there is an error. + /// + /// On Windows this isn't technically possible. Instead we emulate it to the best of our + /// ability. One aspect we fix here is that we specify a handler for the Ctrl-C handler. + /// In doing so (and by effectively ignoring it) we should emulate proxying Ctrl-C + /// handling to the application at hand, which will either terminate or handle it itself. + /// According to Microsoft's documentation at + /// <https://docs.microsoft.com/en-us/windows/console/ctrl-c-and-ctrl-break-signals>. + /// the Ctrl-C signal is sent to all processes attached to a terminal, which should + /// include our child process. If the child terminates then we'll reap them in Cargo + /// pretty quickly, and if the child handles the signal then we won't terminate + /// (and we shouldn't!) until the process itself later exits. + pub fn exec_replace(&self) -> Result<()> { + imp::exec_replace(self) + } + + /// Like [`Command::output`] but with a better error message. + pub fn output(&self) -> Result<Output> { + self._output() + .with_context(|| ProcessError::could_not_execute(self)) + } + + fn _output(&self) -> io::Result<Output> { + if !debug_force_argfile(self.retry_with_argfile) { + let mut cmd = self.build_command(); + match piped(&mut cmd, self.stdin.is_some()).spawn() { + Err(ref e) if self.should_retry_with_argfile(e) => {} + Err(e) => return Err(e), + Ok(mut child) => { + if let Some(stdin) = &self.stdin { + child.stdin.take().unwrap().write_all(stdin)?; + } + return child.wait_with_output(); + } + } + } + let (mut cmd, argfile) = self.build_command_with_argfile()?; + let mut child = piped(&mut cmd, self.stdin.is_some()).spawn()?; + if let Some(stdin) = &self.stdin { + child.stdin.take().unwrap().write_all(stdin)?; + } + let output = child.wait_with_output(); + close_tempfile_and_log_error(argfile); + output + } + + /// Executes the process, returning the stdio output, or an error if non-zero exit status. + pub fn exec_with_output(&self) -> Result<Output> { + let output = self.output()?; + if output.status.success() { + Ok(output) + } else { + Err(ProcessError::new( + &format!("process didn't exit successfully: {}", self), + Some(output.status), + Some(&output), + ) + .into()) + } + } + + /// Executes a command, passing each line of stdout and stderr to the supplied callbacks, which + /// can mutate the string data. + /// + /// If any invocations of these function return an error, it will be propagated. + /// + /// If `capture_output` is true, then all the output will also be buffered + /// and stored in the returned `Output` object. If it is false, no caching + /// is done, and the callbacks are solely responsible for handling the + /// output. + pub fn exec_with_streaming( + &self, + on_stdout_line: &mut dyn FnMut(&str) -> Result<()>, + on_stderr_line: &mut dyn FnMut(&str) -> Result<()>, + capture_output: bool, + ) -> Result<Output> { + let mut stdout = Vec::new(); + let mut stderr = Vec::new(); + + let mut callback_error = None; + let mut stdout_pos = 0; + let mut stderr_pos = 0; + + let spawn = |mut cmd| { + if !debug_force_argfile(self.retry_with_argfile) { + match piped(&mut cmd, false).spawn() { + Err(ref e) if self.should_retry_with_argfile(e) => {} + Err(e) => return Err(e), + Ok(child) => return Ok((child, None)), + } + } + let (mut cmd, argfile) = self.build_command_with_argfile()?; + Ok((piped(&mut cmd, false).spawn()?, Some(argfile))) + }; + + let status = (|| { + let cmd = self.build_command(); + let (mut child, argfile) = spawn(cmd)?; + let out = child.stdout.take().unwrap(); + let err = child.stderr.take().unwrap(); + read2(out, err, &mut |is_out, data, eof| { + let pos = if is_out { + &mut stdout_pos + } else { + &mut stderr_pos + }; + let idx = if eof { + data.len() + } else { + match data[*pos..].iter().rposition(|b| *b == b'\n') { + Some(i) => *pos + i + 1, + None => { + *pos = data.len(); + return; + } + } + }; + + let new_lines = &data[..idx]; + + for line in String::from_utf8_lossy(new_lines).lines() { + if callback_error.is_some() { + break; + } + let callback_result = if is_out { + on_stdout_line(line) + } else { + on_stderr_line(line) + }; + if let Err(e) = callback_result { + callback_error = Some(e); + break; + } + } + + if capture_output { + let dst = if is_out { &mut stdout } else { &mut stderr }; + dst.extend(new_lines); + } + + data.drain(..idx); + *pos = 0; + })?; + let status = child.wait(); + if let Some(argfile) = argfile { + close_tempfile_and_log_error(argfile); + } + status + })() + .with_context(|| ProcessError::could_not_execute(self))?; + let output = Output { + status, + stdout, + stderr, + }; + + { + let to_print = if capture_output { Some(&output) } else { None }; + if let Some(e) = callback_error { + let cx = ProcessError::new( + &format!("failed to parse process output: {}", self), + Some(output.status), + to_print, + ); + bail!(anyhow::Error::new(cx).context(e)); + } else if !output.status.success() { + bail!(ProcessError::new( + &format!("process didn't exit successfully: {}", self), + Some(output.status), + to_print, + )); + } + } + + Ok(output) + } + + /// Builds the command with an `@<path>` argfile that contains all the + /// arguments. This is primarily served for rustc/rustdoc command family. + fn build_command_with_argfile(&self) -> io::Result<(Command, NamedTempFile)> { + use std::io::Write as _; + + let mut tmp = tempfile::Builder::new() + .prefix("cargo-argfile.") + .tempfile()?; + + let mut arg = OsString::from("@"); + arg.push(tmp.path()); + let mut cmd = self.build_command_without_args(); + cmd.arg(arg); + log::debug!("created argfile at {} for {self}", tmp.path().display()); + + let cap = self.get_args().map(|arg| arg.len() + 1).sum::<usize>(); + let mut buf = Vec::with_capacity(cap); + for arg in &self.args { + let arg = arg.to_str().ok_or_else(|| { + io::Error::new( + io::ErrorKind::Other, + format!( + "argument for argfile contains invalid UTF-8 characters: `{}`", + arg.to_string_lossy() + ), + ) + })?; + if arg.contains('\n') { + return Err(io::Error::new( + io::ErrorKind::Other, + format!("argument for argfile contains newlines: `{arg}`"), + )); + } + writeln!(buf, "{arg}")?; + } + tmp.write_all(&mut buf)?; + Ok((cmd, tmp)) + } + + /// Builds a command from `ProcessBuilder` for everything but not `args`. + fn build_command_without_args(&self) -> Command { + let mut command = { + let mut iter = self.wrappers.iter().rev().chain(once(&self.program)); + let mut cmd = Command::new(iter.next().expect("at least one `program` exists")); + cmd.args(iter); + cmd + }; + if let Some(cwd) = self.get_cwd() { + command.current_dir(cwd); + } + for (k, v) in &self.env { + match *v { + Some(ref v) => { + command.env(k, v); + } + None => { + command.env_remove(k); + } + } + } + if let Some(ref c) = self.jobserver { + c.configure(&mut command); + } + command + } + + /// Converts `ProcessBuilder` into a `std::process::Command`, and handles + /// the jobserver, if present. + /// + /// Note that this method doesn't take argfile fallback into account. The + /// caller should handle it by themselves. + pub fn build_command(&self) -> Command { + let mut command = self.build_command_without_args(); + for arg in &self.args { + command.arg(arg); + } + command + } + + /// Wraps an existing command with the provided wrapper, if it is present and valid. + /// + /// # Examples + /// + /// ```rust + /// use cargo_util::ProcessBuilder; + /// // Running this would execute `rustc` + /// let cmd = ProcessBuilder::new("rustc"); + /// + /// // Running this will execute `sccache rustc` + /// let cmd = cmd.wrapped(Some("sccache")); + /// ``` + pub fn wrapped(mut self, wrapper: Option<impl AsRef<OsStr>>) -> Self { + if let Some(wrapper) = wrapper.as_ref() { + let wrapper = wrapper.as_ref(); + if !wrapper.is_empty() { + self.wrappers.push(wrapper.to_os_string()); + } + } + self + } +} + +/// Forces the command to use `@path` argfile. +/// +/// You should set `__CARGO_TEST_FORCE_ARGFILE` to enable this. +fn debug_force_argfile(retry_enabled: bool) -> bool { + cfg!(debug_assertions) && env::var("__CARGO_TEST_FORCE_ARGFILE").is_ok() && retry_enabled +} + +/// Creates new pipes for stderr, stdout, and optionally stdin. +fn piped(cmd: &mut Command, pipe_stdin: bool) -> &mut Command { + cmd.stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .stdin(if pipe_stdin { + Stdio::piped() + } else { + Stdio::null() + }) +} + +fn close_tempfile_and_log_error(file: NamedTempFile) { + file.close().unwrap_or_else(|e| { + log::warn!("failed to close temporary file: {e}"); + }); +} + +#[cfg(unix)] +mod imp { + use super::{close_tempfile_and_log_error, debug_force_argfile, ProcessBuilder, ProcessError}; + use anyhow::Result; + use std::io; + use std::os::unix::process::CommandExt; + + pub fn exec_replace(process_builder: &ProcessBuilder) -> Result<()> { + let mut error; + let mut file = None; + if debug_force_argfile(process_builder.retry_with_argfile) { + let (mut command, argfile) = process_builder.build_command_with_argfile()?; + file = Some(argfile); + error = command.exec() + } else { + let mut command = process_builder.build_command(); + error = command.exec(); + if process_builder.should_retry_with_argfile(&error) { + let (mut command, argfile) = process_builder.build_command_with_argfile()?; + file = Some(argfile); + error = command.exec() + } + } + if let Some(file) = file { + close_tempfile_and_log_error(file); + } + + Err(anyhow::Error::from(error).context(ProcessError::new( + &format!("could not execute process {}", process_builder), + None, + None, + ))) + } + + pub fn command_line_too_big(err: &io::Error) -> bool { + err.raw_os_error() == Some(libc::E2BIG) + } +} + +#[cfg(windows)] +mod imp { + use super::{ProcessBuilder, ProcessError}; + use anyhow::Result; + use std::io; + use windows_sys::Win32::Foundation::{BOOL, FALSE, TRUE}; + use windows_sys::Win32::System::Console::SetConsoleCtrlHandler; + + unsafe extern "system" fn ctrlc_handler(_: u32) -> BOOL { + // Do nothing; let the child process handle it. + TRUE + } + + pub fn exec_replace(process_builder: &ProcessBuilder) -> Result<()> { + unsafe { + if SetConsoleCtrlHandler(Some(ctrlc_handler), TRUE) == FALSE { + return Err(ProcessError::new("Could not set Ctrl-C handler.", None, None).into()); + } + } + + // Just execute the process as normal. + process_builder.exec() + } + + pub fn command_line_too_big(err: &io::Error) -> bool { + use windows_sys::Win32::Foundation::ERROR_FILENAME_EXCED_RANGE; + err.raw_os_error() == Some(ERROR_FILENAME_EXCED_RANGE as i32) + } +} + +#[cfg(test)] +mod tests { + use super::ProcessBuilder; + use std::fs; + + #[test] + fn argfile_build_succeeds() { + let mut cmd = ProcessBuilder::new("echo"); + cmd.args(["foo", "bar"].as_slice()); + let (cmd, argfile) = cmd.build_command_with_argfile().unwrap(); + + assert_eq!(cmd.get_program(), "echo"); + let cmd_args: Vec<_> = cmd.get_args().map(|s| s.to_str().unwrap()).collect(); + assert_eq!(cmd_args.len(), 1); + assert!(cmd_args[0].starts_with("@")); + assert!(cmd_args[0].contains("cargo-argfile.")); + + let buf = fs::read_to_string(argfile.path()).unwrap(); + assert_eq!(buf, "foo\nbar\n"); + } + + #[test] + fn argfile_build_fails_if_arg_contains_newline() { + let mut cmd = ProcessBuilder::new("echo"); + cmd.arg("foo\n"); + let err = cmd.build_command_with_argfile().unwrap_err(); + assert_eq!( + err.to_string(), + "argument for argfile contains newlines: `foo\n`" + ); + } + + #[test] + fn argfile_build_fails_if_arg_contains_invalid_utf8() { + let mut cmd = ProcessBuilder::new("echo"); + + #[cfg(windows)] + let invalid_arg = { + use std::os::windows::prelude::*; + std::ffi::OsString::from_wide(&[0x0066, 0x006f, 0xD800, 0x006f]) + }; + + #[cfg(unix)] + let invalid_arg = { + use std::os::unix::ffi::OsStrExt; + std::ffi::OsStr::from_bytes(&[0x66, 0x6f, 0x80, 0x6f]).to_os_string() + }; + + cmd.arg(invalid_arg); + let err = cmd.build_command_with_argfile().unwrap_err(); + assert_eq!( + err.to_string(), + "argument for argfile contains invalid UTF-8 characters: `fo�o`" + ); + } +} |