diff options
Diffstat (limited to '')
-rw-r--r-- | crates/cargo-util/Cargo.toml | 28 | ||||
l--------- | crates/cargo-util/LICENSE-APACHE | 1 | ||||
l--------- | crates/cargo-util/LICENSE-MIT | 1 | ||||
-rw-r--r-- | crates/cargo-util/src/lib.rs | 18 | ||||
-rw-r--r-- | crates/cargo-util/src/paths.rs | 788 | ||||
-rw-r--r-- | crates/cargo-util/src/process_builder.rs | 689 | ||||
-rw-r--r-- | crates/cargo-util/src/process_error.rs | 200 | ||||
-rw-r--r-- | crates/cargo-util/src/read2.rs | 178 | ||||
-rw-r--r-- | crates/cargo-util/src/registry.rs | 45 | ||||
-rw-r--r-- | crates/cargo-util/src/sha256.rs | 56 |
10 files changed, 2004 insertions, 0 deletions
diff --git a/crates/cargo-util/Cargo.toml b/crates/cargo-util/Cargo.toml new file mode 100644 index 0000000..aa25c13 --- /dev/null +++ b/crates/cargo-util/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "cargo-util" +version = "0.2.3" +edition = "2021" +license = "MIT OR Apache-2.0" +homepage = "https://github.com/rust-lang/cargo" +repository = "https://github.com/rust-lang/cargo" +description = "Miscellaneous support code used by Cargo." + +[dependencies] +anyhow = "1.0.34" +crypto-hash = "0.3.1" +filetime = "0.2.9" +hex = "0.4.2" +jobserver = "0.1.26" +libc = "0.2.88" +log = "0.4.6" +same-file = "1.0.6" +shell-escape = "0.1.4" +tempfile = "3.1.0" +walkdir = "2.3.1" + +[target.'cfg(target_os = "macos")'.dependencies] +core-foundation = { version = "0.9.0", features = ["mac_os_10_7_support"] } + +[target.'cfg(windows)'.dependencies] +miow = "0.5.0" +windows-sys = { version = "0.45.0", features = ["Win32_Storage_FileSystem", "Win32_Foundation", "Win32_System_Console"] } diff --git a/crates/cargo-util/LICENSE-APACHE b/crates/cargo-util/LICENSE-APACHE new file mode 120000 index 0000000..1cd601d --- /dev/null +++ b/crates/cargo-util/LICENSE-APACHE @@ -0,0 +1 @@ +../../LICENSE-APACHE
\ No newline at end of file diff --git a/crates/cargo-util/LICENSE-MIT b/crates/cargo-util/LICENSE-MIT new file mode 120000 index 0000000..b2cfbdc --- /dev/null +++ b/crates/cargo-util/LICENSE-MIT @@ -0,0 +1 @@ +../../LICENSE-MIT
\ No newline at end of file diff --git a/crates/cargo-util/src/lib.rs b/crates/cargo-util/src/lib.rs new file mode 100644 index 0000000..0cbc920 --- /dev/null +++ b/crates/cargo-util/src/lib.rs @@ -0,0 +1,18 @@ +//! Miscellaneous support code used by Cargo. + +pub use self::read2::read2; +pub use process_builder::ProcessBuilder; +pub use process_error::{exit_status_to_string, is_simple_exit_code, ProcessError}; +pub use sha256::Sha256; + +pub mod paths; +mod process_builder; +mod process_error; +mod read2; +pub mod registry; +mod sha256; + +/// Whether or not this running in a Continuous Integration environment. +pub fn is_ci() -> bool { + std::env::var("CI").is_ok() || std::env::var("TF_BUILD").is_ok() +} diff --git a/crates/cargo-util/src/paths.rs b/crates/cargo-util/src/paths.rs new file mode 100644 index 0000000..69df7a2 --- /dev/null +++ b/crates/cargo-util/src/paths.rs @@ -0,0 +1,788 @@ +//! Various utilities for working with files and paths. + +use anyhow::{Context, Result}; +use filetime::FileTime; +use std::env; +use std::ffi::{OsStr, OsString}; +use std::fs::{self, File, OpenOptions}; +use std::io; +use std::io::prelude::*; +use std::iter; +use std::path::{Component, Path, PathBuf}; +use tempfile::Builder as TempFileBuilder; + +/// Joins paths into a string suitable for the `PATH` environment variable. +/// +/// This is equivalent to [`std::env::join_paths`], but includes a more +/// detailed error message. The given `env` argument is the name of the +/// environment variable this is will be used for, which is included in the +/// error message. +pub fn join_paths<T: AsRef<OsStr>>(paths: &[T], env: &str) -> Result<OsString> { + env::join_paths(paths.iter()).with_context(|| { + let mut message = format!( + "failed to join paths from `${env}` together\n\n\ + Check if any of path segments listed below contain an \ + unterminated quote character or path separator:" + ); + for path in paths { + use std::fmt::Write; + write!(&mut message, "\n {:?}", Path::new(path)).unwrap(); + } + + message + }) +} + +/// Returns the name of the environment variable used for searching for +/// dynamic libraries. +pub fn dylib_path_envvar() -> &'static str { + if cfg!(windows) { + "PATH" + } else if cfg!(target_os = "macos") { + // When loading and linking a dynamic library or bundle, dlopen + // searches in LD_LIBRARY_PATH, DYLD_LIBRARY_PATH, PWD, and + // DYLD_FALLBACK_LIBRARY_PATH. + // In the Mach-O format, a dynamic library has an "install path." + // Clients linking against the library record this path, and the + // dynamic linker, dyld, uses it to locate the library. + // dyld searches DYLD_LIBRARY_PATH *before* the install path. + // dyld searches DYLD_FALLBACK_LIBRARY_PATH only if it cannot + // find the library in the install path. + // Setting DYLD_LIBRARY_PATH can easily have unintended + // consequences. + // + // Also, DYLD_LIBRARY_PATH appears to have significant performance + // penalty starting in 10.13. Cargo's testsuite ran more than twice as + // slow with it on CI. + "DYLD_FALLBACK_LIBRARY_PATH" + } else { + "LD_LIBRARY_PATH" + } +} + +/// Returns a list of directories that are searched for dynamic libraries. +/// +/// Note that some operating systems will have defaults if this is empty that +/// will need to be dealt with. +pub fn dylib_path() -> Vec<PathBuf> { + match env::var_os(dylib_path_envvar()) { + Some(var) => env::split_paths(&var).collect(), + None => Vec::new(), + } +} + +/// Normalize a path, removing things like `.` and `..`. +/// +/// CAUTION: This does not resolve symlinks (unlike +/// [`std::fs::canonicalize`]). This may cause incorrect or surprising +/// behavior at times. This should be used carefully. Unfortunately, +/// [`std::fs::canonicalize`] can be hard to use correctly, since it can often +/// fail, or on Windows returns annoying device paths. This is a problem Cargo +/// needs to improve on. +pub fn normalize_path(path: &Path) -> PathBuf { + let mut components = path.components().peekable(); + let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek().cloned() { + components.next(); + PathBuf::from(c.as_os_str()) + } else { + PathBuf::new() + }; + + for component in components { + match component { + Component::Prefix(..) => unreachable!(), + Component::RootDir => { + ret.push(component.as_os_str()); + } + Component::CurDir => {} + Component::ParentDir => { + ret.pop(); + } + Component::Normal(c) => { + ret.push(c); + } + } + } + ret +} + +/// Returns the absolute path of where the given executable is located based +/// on searching the `PATH` environment variable. +/// +/// Returns an error if it cannot be found. +pub fn resolve_executable(exec: &Path) -> Result<PathBuf> { + if exec.components().count() == 1 { + let paths = env::var_os("PATH").ok_or_else(|| anyhow::format_err!("no PATH"))?; + let candidates = env::split_paths(&paths).flat_map(|path| { + let candidate = path.join(&exec); + let with_exe = if env::consts::EXE_EXTENSION.is_empty() { + None + } else { + Some(candidate.with_extension(env::consts::EXE_EXTENSION)) + }; + iter::once(candidate).chain(with_exe) + }); + for candidate in candidates { + if candidate.is_file() { + return Ok(candidate); + } + } + + anyhow::bail!("no executable for `{}` found in PATH", exec.display()) + } else { + Ok(exec.into()) + } +} + +/// Reads a file to a string. +/// +/// Equivalent to [`std::fs::read_to_string`] with better error messages. +pub fn read(path: &Path) -> Result<String> { + match String::from_utf8(read_bytes(path)?) { + Ok(s) => Ok(s), + Err(_) => anyhow::bail!("path at `{}` was not valid utf-8", path.display()), + } +} + +/// Reads a file into a bytes vector. +/// +/// Equivalent to [`std::fs::read`] with better error messages. +pub fn read_bytes(path: &Path) -> Result<Vec<u8>> { + fs::read(path).with_context(|| format!("failed to read `{}`", path.display())) +} + +/// Writes a file to disk. +/// +/// Equivalent to [`std::fs::write`] with better error messages. +pub fn write<P: AsRef<Path>, C: AsRef<[u8]>>(path: P, contents: C) -> Result<()> { + let path = path.as_ref(); + fs::write(path, contents.as_ref()) + .with_context(|| format!("failed to write `{}`", path.display())) +} + +/// Equivalent to [`write()`], but does not write anything if the file contents +/// are identical to the given contents. +pub fn write_if_changed<P: AsRef<Path>, C: AsRef<[u8]>>(path: P, contents: C) -> Result<()> { + (|| -> Result<()> { + let contents = contents.as_ref(); + let mut f = OpenOptions::new() + .read(true) + .write(true) + .create(true) + .open(&path)?; + let mut orig = Vec::new(); + f.read_to_end(&mut orig)?; + if orig != contents { + f.set_len(0)?; + f.seek(io::SeekFrom::Start(0))?; + f.write_all(contents)?; + } + Ok(()) + })() + .with_context(|| format!("failed to write `{}`", path.as_ref().display()))?; + Ok(()) +} + +/// Equivalent to [`write()`], but appends to the end instead of replacing the +/// contents. +pub fn append(path: &Path, contents: &[u8]) -> Result<()> { + (|| -> Result<()> { + let mut f = OpenOptions::new() + .write(true) + .append(true) + .create(true) + .open(path)?; + + f.write_all(contents)?; + Ok(()) + })() + .with_context(|| format!("failed to write `{}`", path.display()))?; + Ok(()) +} + +/// Creates a new file. +pub fn create<P: AsRef<Path>>(path: P) -> Result<File> { + let path = path.as_ref(); + File::create(path).with_context(|| format!("failed to create file `{}`", path.display())) +} + +/// Opens an existing file. +pub fn open<P: AsRef<Path>>(path: P) -> Result<File> { + let path = path.as_ref(); + File::open(path).with_context(|| format!("failed to open file `{}`", path.display())) +} + +/// Returns the last modification time of a file. +pub fn mtime(path: &Path) -> Result<FileTime> { + let meta = + fs::metadata(path).with_context(|| format!("failed to stat `{}`", path.display()))?; + Ok(FileTime::from_last_modification_time(&meta)) +} + +/// Returns the maximum mtime of the given path, recursing into +/// subdirectories, and following symlinks. +pub fn mtime_recursive(path: &Path) -> Result<FileTime> { + let meta = + fs::metadata(path).with_context(|| format!("failed to stat `{}`", path.display()))?; + if !meta.is_dir() { + return Ok(FileTime::from_last_modification_time(&meta)); + } + let max_meta = walkdir::WalkDir::new(path) + .follow_links(true) + .into_iter() + .filter_map(|e| match e { + Ok(e) => Some(e), + Err(e) => { + // Ignore errors while walking. If Cargo can't access it, the + // build script probably can't access it, either. + log::debug!("failed to determine mtime while walking directory: {}", e); + None + } + }) + .filter_map(|e| { + if e.path_is_symlink() { + // Use the mtime of both the symlink and its target, to + // handle the case where the symlink is modified to a + // different target. + let sym_meta = match std::fs::symlink_metadata(e.path()) { + Ok(m) => m, + Err(err) => { + // I'm not sure when this is really possible (maybe a + // race with unlinking?). Regardless, if Cargo can't + // read it, the build script probably can't either. + log::debug!( + "failed to determine mtime while fetching symlink metadata of {}: {}", + e.path().display(), + err + ); + return None; + } + }; + let sym_mtime = FileTime::from_last_modification_time(&sym_meta); + // Walkdir follows symlinks. + match e.metadata() { + Ok(target_meta) => { + let target_mtime = FileTime::from_last_modification_time(&target_meta); + Some(sym_mtime.max(target_mtime)) + } + Err(err) => { + // Can't access the symlink target. If Cargo can't + // access it, the build script probably can't access + // it either. + log::debug!( + "failed to determine mtime of symlink target for {}: {}", + e.path().display(), + err + ); + Some(sym_mtime) + } + } + } else { + let meta = match e.metadata() { + Ok(m) => m, + Err(err) => { + // I'm not sure when this is really possible (maybe a + // race with unlinking?). Regardless, if Cargo can't + // read it, the build script probably can't either. + log::debug!( + "failed to determine mtime while fetching metadata of {}: {}", + e.path().display(), + err + ); + return None; + } + }; + Some(FileTime::from_last_modification_time(&meta)) + } + }) + .max() + // or_else handles the case where there are no files in the directory. + .unwrap_or_else(|| FileTime::from_last_modification_time(&meta)); + Ok(max_meta) +} + +/// Record the current time on the filesystem (using the filesystem's clock) +/// using a file at the given directory. Returns the current time. +pub fn set_invocation_time(path: &Path) -> Result<FileTime> { + // note that if `FileTime::from_system_time(SystemTime::now());` is determined to be sufficient, + // then this can be removed. + let timestamp = path.join("invoked.timestamp"); + write( + ×tamp, + "This file has an mtime of when this was started.", + )?; + let ft = mtime(×tamp)?; + log::debug!("invocation time for {:?} is {}", path, ft); + Ok(ft) +} + +/// Converts a path to UTF-8 bytes. +pub fn path2bytes(path: &Path) -> Result<&[u8]> { + #[cfg(unix)] + { + use std::os::unix::prelude::*; + Ok(path.as_os_str().as_bytes()) + } + #[cfg(windows)] + { + match path.as_os_str().to_str() { + Some(s) => Ok(s.as_bytes()), + None => Err(anyhow::format_err!( + "invalid non-unicode path: {}", + path.display() + )), + } + } +} + +/// Converts UTF-8 bytes to a path. +pub fn bytes2path(bytes: &[u8]) -> Result<PathBuf> { + #[cfg(unix)] + { + use std::os::unix::prelude::*; + Ok(PathBuf::from(OsStr::from_bytes(bytes))) + } + #[cfg(windows)] + { + use std::str; + match str::from_utf8(bytes) { + Ok(s) => Ok(PathBuf::from(s)), + Err(..) => Err(anyhow::format_err!("invalid non-unicode path")), + } + } +} + +/// Returns an iterator that walks up the directory hierarchy towards the root. +/// +/// Each item is a [`Path`]. It will start with the given path, finishing at +/// the root. If the `stop_root_at` parameter is given, it will stop at the +/// given path (which will be the last item). +pub fn ancestors<'a>(path: &'a Path, stop_root_at: Option<&Path>) -> PathAncestors<'a> { + PathAncestors::new(path, stop_root_at) +} + +pub struct PathAncestors<'a> { + current: Option<&'a Path>, + stop_at: Option<PathBuf>, +} + +impl<'a> PathAncestors<'a> { + fn new(path: &'a Path, stop_root_at: Option<&Path>) -> PathAncestors<'a> { + let stop_at = env::var("__CARGO_TEST_ROOT") + .ok() + .map(PathBuf::from) + .or_else(|| stop_root_at.map(|p| p.to_path_buf())); + PathAncestors { + current: Some(path), + //HACK: avoid reading `~/.cargo/config` when testing Cargo itself. + stop_at, + } + } +} + +impl<'a> Iterator for PathAncestors<'a> { + type Item = &'a Path; + + fn next(&mut self) -> Option<&'a Path> { + if let Some(path) = self.current { + self.current = path.parent(); + + if let Some(ref stop_at) = self.stop_at { + if path == stop_at { + self.current = None; + } + } + + Some(path) + } else { + None + } + } +} + +/// Equivalent to [`std::fs::create_dir_all`] with better error messages. +pub fn create_dir_all(p: impl AsRef<Path>) -> Result<()> { + _create_dir_all(p.as_ref()) +} + +fn _create_dir_all(p: &Path) -> Result<()> { + fs::create_dir_all(p) + .with_context(|| format!("failed to create directory `{}`", p.display()))?; + Ok(()) +} + +/// Recursively remove all files and directories at the given directory. +/// +/// This does *not* follow symlinks. +pub fn remove_dir_all<P: AsRef<Path>>(p: P) -> Result<()> { + _remove_dir_all(p.as_ref()) +} + +fn _remove_dir_all(p: &Path) -> Result<()> { + if p.symlink_metadata() + .with_context(|| format!("could not get metadata for `{}` to remove", p.display()))? + .is_symlink() + { + return remove_file(p); + } + let entries = p + .read_dir() + .with_context(|| format!("failed to read directory `{}`", p.display()))?; + for entry in entries { + let entry = entry?; + let path = entry.path(); + if entry.file_type()?.is_dir() { + remove_dir_all(&path)?; + } else { + remove_file(&path)?; + } + } + remove_dir(&p) +} + +/// Equivalent to [`std::fs::remove_dir`] with better error messages. +pub fn remove_dir<P: AsRef<Path>>(p: P) -> Result<()> { + _remove_dir(p.as_ref()) +} + +fn _remove_dir(p: &Path) -> Result<()> { + fs::remove_dir(p).with_context(|| format!("failed to remove directory `{}`", p.display()))?; + Ok(()) +} + +/// Equivalent to [`std::fs::remove_file`] with better error messages. +/// +/// If the file is readonly, this will attempt to change the permissions to +/// force the file to be deleted. +pub fn remove_file<P: AsRef<Path>>(p: P) -> Result<()> { + _remove_file(p.as_ref()) +} + +fn _remove_file(p: &Path) -> Result<()> { + let mut err = match fs::remove_file(p) { + Ok(()) => return Ok(()), + Err(e) => e, + }; + + if err.kind() == io::ErrorKind::PermissionDenied && set_not_readonly(p).unwrap_or(false) { + match fs::remove_file(p) { + Ok(()) => return Ok(()), + Err(e) => err = e, + } + } + + Err(err).with_context(|| format!("failed to remove file `{}`", p.display()))?; + Ok(()) +} + +fn set_not_readonly(p: &Path) -> io::Result<bool> { + let mut perms = p.metadata()?.permissions(); + if !perms.readonly() { + return Ok(false); + } + perms.set_readonly(false); + fs::set_permissions(p, perms)?; + Ok(true) +} + +/// Hardlink (file) or symlink (dir) src to dst if possible, otherwise copy it. +/// +/// If the destination already exists, it is removed before linking. +pub fn link_or_copy(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> Result<()> { + let src = src.as_ref(); + let dst = dst.as_ref(); + _link_or_copy(src, dst) +} + +fn _link_or_copy(src: &Path, dst: &Path) -> Result<()> { + log::debug!("linking {} to {}", src.display(), dst.display()); + if same_file::is_same_file(src, dst).unwrap_or(false) { + return Ok(()); + } + + // NB: we can't use dst.exists(), as if dst is a broken symlink, + // dst.exists() will return false. This is problematic, as we still need to + // unlink dst in this case. symlink_metadata(dst).is_ok() will tell us + // whether dst exists *without* following symlinks, which is what we want. + if fs::symlink_metadata(dst).is_ok() { + remove_file(&dst)?; + } + + let link_result = if src.is_dir() { + #[cfg(target_os = "redox")] + use std::os::redox::fs::symlink; + #[cfg(unix)] + use std::os::unix::fs::symlink; + #[cfg(windows)] + // FIXME: This should probably panic or have a copy fallback. Symlinks + // are not supported in all windows environments. Currently symlinking + // is only used for .dSYM directories on macos, but this shouldn't be + // accidentally relied upon. + use std::os::windows::fs::symlink_dir as symlink; + + let dst_dir = dst.parent().unwrap(); + let src = if src.starts_with(dst_dir) { + src.strip_prefix(dst_dir).unwrap() + } else { + src + }; + symlink(src, dst) + } else if env::var_os("__CARGO_COPY_DONT_LINK_DO_NOT_USE_THIS").is_some() { + // This is a work-around for a bug in macOS 10.15. When running on + // APFS, there seems to be a strange race condition with + // Gatekeeper where it will forcefully kill a process launched via + // `cargo run` with SIGKILL. Copying seems to avoid the problem. + // This shouldn't affect anyone except Cargo's test suite because + // it is very rare, and only seems to happen under heavy load and + // rapidly creating lots of executables and running them. + // See https://github.com/rust-lang/cargo/issues/7821 for the + // gory details. + fs::copy(src, dst).map(|_| ()) + } else { + if cfg!(target_os = "macos") { + // This is a work-around for a bug on macos. There seems to be a race condition + // with APFS when hard-linking binaries. Gatekeeper does not have signing or + // hash information stored in kernel when running the process. Therefore killing it. + // This problem does not appear when copying files as kernel has time to process it. + // Note that: fs::copy on macos is using CopyOnWrite (syscall fclonefileat) which should be + // as fast as hardlinking. + // See https://github.com/rust-lang/cargo/issues/10060 for the details + fs::copy(src, dst).map(|_| ()) + } else { + fs::hard_link(src, dst) + } + }; + link_result + .or_else(|err| { + log::debug!("link failed {}. falling back to fs::copy", err); + fs::copy(src, dst).map(|_| ()) + }) + .with_context(|| { + format!( + "failed to link or copy `{}` to `{}`", + src.display(), + dst.display() + ) + })?; + Ok(()) +} + +/// Copies a file from one location to another. +/// +/// Equivalent to [`std::fs::copy`] with better error messages. +pub fn copy<P: AsRef<Path>, Q: AsRef<Path>>(from: P, to: Q) -> Result<u64> { + let from = from.as_ref(); + let to = to.as_ref(); + fs::copy(from, to) + .with_context(|| format!("failed to copy `{}` to `{}`", from.display(), to.display())) +} + +/// Changes the filesystem mtime (and atime if possible) for the given file. +/// +/// This intentionally does not return an error, as this is sometimes not +/// supported on network filesystems. For the current uses in Cargo, this is a +/// "best effort" approach, and errors shouldn't be propagated. +pub fn set_file_time_no_err<P: AsRef<Path>>(path: P, time: FileTime) { + let path = path.as_ref(); + match filetime::set_file_times(path, time, time) { + Ok(()) => log::debug!("set file mtime {} to {}", path.display(), time), + Err(e) => log::warn!( + "could not set mtime of {} to {}: {:?}", + path.display(), + time, + e + ), + } +} + +/// Strips `base` from `path`. +/// +/// This canonicalizes both paths before stripping. This is useful if the +/// paths are obtained in different ways, and one or the other may or may not +/// have been normalized in some way. +pub fn strip_prefix_canonical<P: AsRef<Path>>( + path: P, + base: P, +) -> Result<PathBuf, std::path::StripPrefixError> { + // Not all filesystems support canonicalize. Just ignore if it doesn't work. + let safe_canonicalize = |path: &Path| match path.canonicalize() { + Ok(p) => p, + Err(e) => { + log::warn!("cannot canonicalize {:?}: {:?}", path, e); + path.to_path_buf() + } + }; + let canon_path = safe_canonicalize(path.as_ref()); + let canon_base = safe_canonicalize(base.as_ref()); + canon_path.strip_prefix(canon_base).map(|p| p.to_path_buf()) +} + +/// Creates an excluded from cache directory atomically with its parents as needed. +/// +/// The atomicity only covers creating the leaf directory and exclusion from cache. Any missing +/// parent directories will not be created in an atomic manner. +/// +/// This function is idempotent and in addition to that it won't exclude ``p`` from cache if it +/// already exists. +pub fn create_dir_all_excluded_from_backups_atomic(p: impl AsRef<Path>) -> Result<()> { + let path = p.as_ref(); + if path.is_dir() { + return Ok(()); + } + + let parent = path.parent().unwrap(); + let base = path.file_name().unwrap(); + create_dir_all(parent)?; + // We do this in two steps (first create a temporary directory and exclude + // it from backups, then rename it to the desired name. If we created the + // directory directly where it should be and then excluded it from backups + // we would risk a situation where cargo is interrupted right after the directory + // creation but before the exclusion the directory would remain non-excluded from + // backups because we only perform exclusion right after we created the directory + // ourselves. + // + // We need the tempdir created in parent instead of $TMP, because only then we can be + // easily sure that rename() will succeed (the new name needs to be on the same mount + // point as the old one). + let tempdir = TempFileBuilder::new().prefix(base).tempdir_in(parent)?; + exclude_from_backups(tempdir.path()); + exclude_from_content_indexing(tempdir.path()); + // Previously std::fs::create_dir_all() (through paths::create_dir_all()) was used + // here to create the directory directly and fs::create_dir_all() explicitly treats + // the directory being created concurrently by another thread or process as success, + // hence the check below to follow the existing behavior. If we get an error at + // rename() and suddenly the directory (which didn't exist a moment earlier) exists + // we can infer from it's another cargo process doing work. + if let Err(e) = fs::rename(tempdir.path(), path) { + if !path.exists() { + return Err(anyhow::Error::from(e)); + } + } + Ok(()) +} + +/// Mark an existing directory as excluded from backups and indexing. +/// +/// Errors in marking it are ignored. +pub fn exclude_from_backups_and_indexing(p: impl AsRef<Path>) { + let path = p.as_ref(); + exclude_from_backups(path); + exclude_from_content_indexing(path); +} + +/// Marks the directory as excluded from archives/backups. +/// +/// This is recommended to prevent derived/temporary files from bloating backups. There are two +/// mechanisms used to achieve this right now: +/// +/// * A dedicated resource property excluding from Time Machine backups on macOS +/// * CACHEDIR.TAG files supported by various tools in a platform-independent way +fn exclude_from_backups(path: &Path) { + exclude_from_time_machine(path); + let _ = std::fs::write( + path.join("CACHEDIR.TAG"), + "Signature: 8a477f597d28d172789f06886806bc55 +# This file is a cache directory tag created by cargo. +# For information about cache directory tags see https://bford.info/cachedir/ +", + ); + // Similarly to exclude_from_time_machine() we ignore errors here as it's an optional feature. +} + +/// Marks the directory as excluded from content indexing. +/// +/// This is recommended to prevent the content of derived/temporary files from being indexed. +/// This is very important for Windows users, as the live content indexing significantly slows +/// cargo's I/O operations. +/// +/// This is currently a no-op on non-Windows platforms. +fn exclude_from_content_indexing(path: &Path) { + #[cfg(windows)] + { + use std::iter::once; + use std::os::windows::prelude::OsStrExt; + use windows_sys::Win32::Storage::FileSystem::{ + GetFileAttributesW, SetFileAttributesW, FILE_ATTRIBUTE_NOT_CONTENT_INDEXED, + }; + + let path: Vec<u16> = path.as_os_str().encode_wide().chain(once(0)).collect(); + unsafe { + SetFileAttributesW( + path.as_ptr(), + GetFileAttributesW(path.as_ptr()) | FILE_ATTRIBUTE_NOT_CONTENT_INDEXED, + ); + } + } + #[cfg(not(windows))] + { + let _ = path; + } +} + +#[cfg(not(target_os = "macos"))] +fn exclude_from_time_machine(_: &Path) {} + +#[cfg(target_os = "macos")] +/// Marks files or directories as excluded from Time Machine on macOS +fn exclude_from_time_machine(path: &Path) { + use core_foundation::base::TCFType; + use core_foundation::{number, string, url}; + use std::ptr; + + // For compatibility with 10.7 a string is used instead of global kCFURLIsExcludedFromBackupKey + let is_excluded_key: Result<string::CFString, _> = "NSURLIsExcludedFromBackupKey".parse(); + let path = url::CFURL::from_path(path, false); + if let (Some(path), Ok(is_excluded_key)) = (path, is_excluded_key) { + unsafe { + url::CFURLSetResourcePropertyForKey( + path.as_concrete_TypeRef(), + is_excluded_key.as_concrete_TypeRef(), + number::kCFBooleanTrue as *const _, + ptr::null_mut(), + ); + } + } + // Errors are ignored, since it's an optional feature and failure + // doesn't prevent Cargo from working +} + +#[cfg(test)] +mod tests { + use super::join_paths; + + #[test] + fn join_paths_lists_paths_on_error() { + let valid_paths = vec!["/testing/one", "/testing/two"]; + // does not fail on valid input + let _joined = join_paths(&valid_paths, "TESTING1").unwrap(); + + #[cfg(unix)] + { + let invalid_paths = vec!["/testing/one", "/testing/t:wo/three"]; + let err = join_paths(&invalid_paths, "TESTING2").unwrap_err(); + assert_eq!( + err.to_string(), + "failed to join paths from `$TESTING2` together\n\n\ + Check if any of path segments listed below contain an \ + unterminated quote character or path separator:\ + \n \"/testing/one\"\ + \n \"/testing/t:wo/three\"\ + " + ); + } + #[cfg(windows)] + { + let invalid_paths = vec!["/testing/one", "/testing/t\"wo/three"]; + let err = join_paths(&invalid_paths, "TESTING2").unwrap_err(); + assert_eq!( + err.to_string(), + "failed to join paths from `$TESTING2` together\n\n\ + Check if any of path segments listed below contain an \ + unterminated quote character or path separator:\ + \n \"/testing/one\"\ + \n \"/testing/t\\\"wo/three\"\ + " + ); + } + } +} diff --git a/crates/cargo-util/src/process_builder.rs b/crates/cargo-util/src/process_builder.rs new file mode 100644 index 0000000..76392f2 --- /dev/null +++ b/crates/cargo-util/src/process_builder.rs @@ -0,0 +1,689 @@ +use crate::process_error::ProcessError; +use crate::read2; + +use anyhow::{bail, Context, Result}; +use jobserver::Client; +use shell_escape::escape; +use tempfile::NamedTempFile; + +use std::collections::BTreeMap; +use std::env; +use std::ffi::{OsStr, OsString}; +use std::fmt; +use std::io::{self, Write}; +use std::iter::once; +use std::path::Path; +use std::process::{Command, ExitStatus, Output, Stdio}; + +/// A builder object for an external process, similar to [`std::process::Command`]. +#[derive(Clone, Debug)] +pub struct ProcessBuilder { + /// The program to execute. + program: OsString, + /// A list of arguments to pass to the program. + args: Vec<OsString>, + /// Any environment variables that should be set for the program. + env: BTreeMap<String, Option<OsString>>, + /// The directory to run the program from. + cwd: Option<OsString>, + /// A list of wrappers that wrap the original program when calling + /// [`ProcessBuilder::wrapped`]. The last one is the outermost one. + wrappers: Vec<OsString>, + /// The `make` jobserver. See the [jobserver crate] for + /// more information. + /// + /// [jobserver crate]: https://docs.rs/jobserver/ + jobserver: Option<Client>, + /// `true` to include environment variable in display. + display_env_vars: bool, + /// `true` to retry with an argfile if hitting "command line too big" error. + /// See [`ProcessBuilder::retry_with_argfile`] for more information. + retry_with_argfile: bool, + /// Data to write to stdin. + stdin: Option<Vec<u8>>, +} + +impl fmt::Display for ProcessBuilder { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "`")?; + + if self.display_env_vars { + for (key, val) in self.env.iter() { + if let Some(val) = val { + let val = escape(val.to_string_lossy()); + if cfg!(windows) { + write!(f, "set {}={}&& ", key, val)?; + } else { + write!(f, "{}={} ", key, val)?; + } + } + } + } + + write!(f, "{}", self.get_program().to_string_lossy())?; + + for arg in self.get_args() { + write!(f, " {}", escape(arg.to_string_lossy()))?; + } + + write!(f, "`") + } +} + +impl ProcessBuilder { + /// Creates a new [`ProcessBuilder`] with the given executable path. + pub fn new<T: AsRef<OsStr>>(cmd: T) -> ProcessBuilder { + ProcessBuilder { + program: cmd.as_ref().to_os_string(), + args: Vec::new(), + cwd: None, + env: BTreeMap::new(), + wrappers: Vec::new(), + jobserver: None, + display_env_vars: false, + retry_with_argfile: false, + stdin: None, + } + } + + /// (chainable) Sets the executable for the process. + pub fn program<T: AsRef<OsStr>>(&mut self, program: T) -> &mut ProcessBuilder { + self.program = program.as_ref().to_os_string(); + self + } + + /// (chainable) Adds `arg` to the args list. + pub fn arg<T: AsRef<OsStr>>(&mut self, arg: T) -> &mut ProcessBuilder { + self.args.push(arg.as_ref().to_os_string()); + self + } + + /// (chainable) Adds multiple `args` to the args list. + pub fn args<T: AsRef<OsStr>>(&mut self, args: &[T]) -> &mut ProcessBuilder { + self.args + .extend(args.iter().map(|t| t.as_ref().to_os_string())); + self + } + + /// (chainable) Replaces the args list with the given `args`. + pub fn args_replace<T: AsRef<OsStr>>(&mut self, args: &[T]) -> &mut ProcessBuilder { + if let Some(program) = self.wrappers.pop() { + // User intend to replace all args, so we + // - use the outermost wrapper as the main program, and + // - cleanup other inner wrappers. + self.program = program; + self.wrappers = Vec::new(); + } + self.args = args.iter().map(|t| t.as_ref().to_os_string()).collect(); + self + } + + /// (chainable) Sets the current working directory of the process. + pub fn cwd<T: AsRef<OsStr>>(&mut self, path: T) -> &mut ProcessBuilder { + self.cwd = Some(path.as_ref().to_os_string()); + self + } + + /// (chainable) Sets an environment variable for the process. + pub fn env<T: AsRef<OsStr>>(&mut self, key: &str, val: T) -> &mut ProcessBuilder { + self.env + .insert(key.to_string(), Some(val.as_ref().to_os_string())); + self + } + + /// (chainable) Unsets an environment variable for the process. + pub fn env_remove(&mut self, key: &str) -> &mut ProcessBuilder { + self.env.insert(key.to_string(), None); + self + } + + /// Gets the executable name. + pub fn get_program(&self) -> &OsString { + self.wrappers.last().unwrap_or(&self.program) + } + + /// Gets the program arguments. + pub fn get_args(&self) -> impl Iterator<Item = &OsString> { + self.wrappers + .iter() + .rev() + .chain(once(&self.program)) + .chain(self.args.iter()) + .skip(1) // Skip the main `program + } + + /// Gets the current working directory for the process. + pub fn get_cwd(&self) -> Option<&Path> { + self.cwd.as_ref().map(Path::new) + } + + /// Gets an environment variable as the process will see it (will inherit from environment + /// unless explicitally unset). + pub fn get_env(&self, var: &str) -> Option<OsString> { + self.env + .get(var) + .cloned() + .or_else(|| Some(env::var_os(var))) + .and_then(|s| s) + } + + /// Gets all environment variables explicitly set or unset for the process (not inherited + /// vars). + pub fn get_envs(&self) -> &BTreeMap<String, Option<OsString>> { + &self.env + } + + /// Sets the `make` jobserver. See the [jobserver crate][jobserver_docs] for + /// more information. + /// + /// [jobserver_docs]: https://docs.rs/jobserver/0.1.6/jobserver/ + pub fn inherit_jobserver(&mut self, jobserver: &Client) -> &mut Self { + self.jobserver = Some(jobserver.clone()); + self + } + + /// Enables environment variable display. + pub fn display_env_vars(&mut self) -> &mut Self { + self.display_env_vars = true; + self + } + + /// Enables retrying with an argfile if hitting "command line too big" error + /// + /// This is primarily for the `@path` arg of rustc and rustdoc, which treat + /// each line as an command-line argument, so `LF` and `CRLF` bytes are not + /// valid as an argument for argfile at this moment. + /// For example, `RUSTDOCFLAGS="--crate-version foo\nbar" cargo doc` is + /// valid when invoking from command-line but not from argfile. + /// + /// To sum up, the limitations of the argfile are: + /// + /// - Must be valid UTF-8 encoded. + /// - Must not contain any newlines in each argument. + /// + /// Ref: + /// + /// - <https://doc.rust-lang.org/rustdoc/command-line-arguments.html#path-load-command-line-flags-from-a-path> + /// - <https://doc.rust-lang.org/rustc/command-line-arguments.html#path-load-command-line-flags-from-a-path> + pub fn retry_with_argfile(&mut self, enabled: bool) -> &mut Self { + self.retry_with_argfile = enabled; + self + } + + /// Sets a value that will be written to stdin of the process on launch. + pub fn stdin<T: Into<Vec<u8>>>(&mut self, stdin: T) -> &mut Self { + self.stdin = Some(stdin.into()); + self + } + + fn should_retry_with_argfile(&self, err: &io::Error) -> bool { + self.retry_with_argfile && imp::command_line_too_big(err) + } + + /// Like [`Command::status`] but with a better error message. + pub fn status(&self) -> Result<ExitStatus> { + self._status() + .with_context(|| ProcessError::could_not_execute(self)) + } + + fn _status(&self) -> io::Result<ExitStatus> { + if !debug_force_argfile(self.retry_with_argfile) { + let mut cmd = self.build_command(); + match cmd.spawn() { + Err(ref e) if self.should_retry_with_argfile(e) => {} + Err(e) => return Err(e), + Ok(mut child) => return child.wait(), + } + } + let (mut cmd, argfile) = self.build_command_with_argfile()?; + let status = cmd.spawn()?.wait(); + close_tempfile_and_log_error(argfile); + status + } + + /// Runs the process, waiting for completion, and mapping non-success exit codes to an error. + pub fn exec(&self) -> Result<()> { + let exit = self.status()?; + if exit.success() { + Ok(()) + } else { + Err(ProcessError::new( + &format!("process didn't exit successfully: {}", self), + Some(exit), + None, + ) + .into()) + } + } + + /// Replaces the current process with the target process. + /// + /// On Unix, this executes the process using the Unix syscall `execvp`, which will block + /// this process, and will only return if there is an error. + /// + /// On Windows this isn't technically possible. Instead we emulate it to the best of our + /// ability. One aspect we fix here is that we specify a handler for the Ctrl-C handler. + /// In doing so (and by effectively ignoring it) we should emulate proxying Ctrl-C + /// handling to the application at hand, which will either terminate or handle it itself. + /// According to Microsoft's documentation at + /// <https://docs.microsoft.com/en-us/windows/console/ctrl-c-and-ctrl-break-signals>. + /// the Ctrl-C signal is sent to all processes attached to a terminal, which should + /// include our child process. If the child terminates then we'll reap them in Cargo + /// pretty quickly, and if the child handles the signal then we won't terminate + /// (and we shouldn't!) until the process itself later exits. + pub fn exec_replace(&self) -> Result<()> { + imp::exec_replace(self) + } + + /// Like [`Command::output`] but with a better error message. + pub fn output(&self) -> Result<Output> { + self._output() + .with_context(|| ProcessError::could_not_execute(self)) + } + + fn _output(&self) -> io::Result<Output> { + if !debug_force_argfile(self.retry_with_argfile) { + let mut cmd = self.build_command(); + match piped(&mut cmd, self.stdin.is_some()).spawn() { + Err(ref e) if self.should_retry_with_argfile(e) => {} + Err(e) => return Err(e), + Ok(mut child) => { + if let Some(stdin) = &self.stdin { + child.stdin.take().unwrap().write_all(stdin)?; + } + return child.wait_with_output(); + } + } + } + let (mut cmd, argfile) = self.build_command_with_argfile()?; + let mut child = piped(&mut cmd, self.stdin.is_some()).spawn()?; + if let Some(stdin) = &self.stdin { + child.stdin.take().unwrap().write_all(stdin)?; + } + let output = child.wait_with_output(); + close_tempfile_and_log_error(argfile); + output + } + + /// Executes the process, returning the stdio output, or an error if non-zero exit status. + pub fn exec_with_output(&self) -> Result<Output> { + let output = self.output()?; + if output.status.success() { + Ok(output) + } else { + Err(ProcessError::new( + &format!("process didn't exit successfully: {}", self), + Some(output.status), + Some(&output), + ) + .into()) + } + } + + /// Executes a command, passing each line of stdout and stderr to the supplied callbacks, which + /// can mutate the string data. + /// + /// If any invocations of these function return an error, it will be propagated. + /// + /// If `capture_output` is true, then all the output will also be buffered + /// and stored in the returned `Output` object. If it is false, no caching + /// is done, and the callbacks are solely responsible for handling the + /// output. + pub fn exec_with_streaming( + &self, + on_stdout_line: &mut dyn FnMut(&str) -> Result<()>, + on_stderr_line: &mut dyn FnMut(&str) -> Result<()>, + capture_output: bool, + ) -> Result<Output> { + let mut stdout = Vec::new(); + let mut stderr = Vec::new(); + + let mut callback_error = None; + let mut stdout_pos = 0; + let mut stderr_pos = 0; + + let spawn = |mut cmd| { + if !debug_force_argfile(self.retry_with_argfile) { + match piped(&mut cmd, false).spawn() { + Err(ref e) if self.should_retry_with_argfile(e) => {} + Err(e) => return Err(e), + Ok(child) => return Ok((child, None)), + } + } + let (mut cmd, argfile) = self.build_command_with_argfile()?; + Ok((piped(&mut cmd, false).spawn()?, Some(argfile))) + }; + + let status = (|| { + let cmd = self.build_command(); + let (mut child, argfile) = spawn(cmd)?; + let out = child.stdout.take().unwrap(); + let err = child.stderr.take().unwrap(); + read2(out, err, &mut |is_out, data, eof| { + let pos = if is_out { + &mut stdout_pos + } else { + &mut stderr_pos + }; + let idx = if eof { + data.len() + } else { + match data[*pos..].iter().rposition(|b| *b == b'\n') { + Some(i) => *pos + i + 1, + None => { + *pos = data.len(); + return; + } + } + }; + + let new_lines = &data[..idx]; + + for line in String::from_utf8_lossy(new_lines).lines() { + if callback_error.is_some() { + break; + } + let callback_result = if is_out { + on_stdout_line(line) + } else { + on_stderr_line(line) + }; + if let Err(e) = callback_result { + callback_error = Some(e); + break; + } + } + + if capture_output { + let dst = if is_out { &mut stdout } else { &mut stderr }; + dst.extend(new_lines); + } + + data.drain(..idx); + *pos = 0; + })?; + let status = child.wait(); + if let Some(argfile) = argfile { + close_tempfile_and_log_error(argfile); + } + status + })() + .with_context(|| ProcessError::could_not_execute(self))?; + let output = Output { + status, + stdout, + stderr, + }; + + { + let to_print = if capture_output { Some(&output) } else { None }; + if let Some(e) = callback_error { + let cx = ProcessError::new( + &format!("failed to parse process output: {}", self), + Some(output.status), + to_print, + ); + bail!(anyhow::Error::new(cx).context(e)); + } else if !output.status.success() { + bail!(ProcessError::new( + &format!("process didn't exit successfully: {}", self), + Some(output.status), + to_print, + )); + } + } + + Ok(output) + } + + /// Builds the command with an `@<path>` argfile that contains all the + /// arguments. This is primarily served for rustc/rustdoc command family. + fn build_command_with_argfile(&self) -> io::Result<(Command, NamedTempFile)> { + use std::io::Write as _; + + let mut tmp = tempfile::Builder::new() + .prefix("cargo-argfile.") + .tempfile()?; + + let mut arg = OsString::from("@"); + arg.push(tmp.path()); + let mut cmd = self.build_command_without_args(); + cmd.arg(arg); + log::debug!("created argfile at {} for {self}", tmp.path().display()); + + let cap = self.get_args().map(|arg| arg.len() + 1).sum::<usize>(); + let mut buf = Vec::with_capacity(cap); + for arg in &self.args { + let arg = arg.to_str().ok_or_else(|| { + io::Error::new( + io::ErrorKind::Other, + format!( + "argument for argfile contains invalid UTF-8 characters: `{}`", + arg.to_string_lossy() + ), + ) + })?; + if arg.contains('\n') { + return Err(io::Error::new( + io::ErrorKind::Other, + format!("argument for argfile contains newlines: `{arg}`"), + )); + } + writeln!(buf, "{arg}")?; + } + tmp.write_all(&mut buf)?; + Ok((cmd, tmp)) + } + + /// Builds a command from `ProcessBuilder` for everything but not `args`. + fn build_command_without_args(&self) -> Command { + let mut command = { + let mut iter = self.wrappers.iter().rev().chain(once(&self.program)); + let mut cmd = Command::new(iter.next().expect("at least one `program` exists")); + cmd.args(iter); + cmd + }; + if let Some(cwd) = self.get_cwd() { + command.current_dir(cwd); + } + for (k, v) in &self.env { + match *v { + Some(ref v) => { + command.env(k, v); + } + None => { + command.env_remove(k); + } + } + } + if let Some(ref c) = self.jobserver { + c.configure(&mut command); + } + command + } + + /// Converts `ProcessBuilder` into a `std::process::Command`, and handles + /// the jobserver, if present. + /// + /// Note that this method doesn't take argfile fallback into account. The + /// caller should handle it by themselves. + pub fn build_command(&self) -> Command { + let mut command = self.build_command_without_args(); + for arg in &self.args { + command.arg(arg); + } + command + } + + /// Wraps an existing command with the provided wrapper, if it is present and valid. + /// + /// # Examples + /// + /// ```rust + /// use cargo_util::ProcessBuilder; + /// // Running this would execute `rustc` + /// let cmd = ProcessBuilder::new("rustc"); + /// + /// // Running this will execute `sccache rustc` + /// let cmd = cmd.wrapped(Some("sccache")); + /// ``` + pub fn wrapped(mut self, wrapper: Option<impl AsRef<OsStr>>) -> Self { + if let Some(wrapper) = wrapper.as_ref() { + let wrapper = wrapper.as_ref(); + if !wrapper.is_empty() { + self.wrappers.push(wrapper.to_os_string()); + } + } + self + } +} + +/// Forces the command to use `@path` argfile. +/// +/// You should set `__CARGO_TEST_FORCE_ARGFILE` to enable this. +fn debug_force_argfile(retry_enabled: bool) -> bool { + cfg!(debug_assertions) && env::var("__CARGO_TEST_FORCE_ARGFILE").is_ok() && retry_enabled +} + +/// Creates new pipes for stderr, stdout, and optionally stdin. +fn piped(cmd: &mut Command, pipe_stdin: bool) -> &mut Command { + cmd.stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .stdin(if pipe_stdin { + Stdio::piped() + } else { + Stdio::null() + }) +} + +fn close_tempfile_and_log_error(file: NamedTempFile) { + file.close().unwrap_or_else(|e| { + log::warn!("failed to close temporary file: {e}"); + }); +} + +#[cfg(unix)] +mod imp { + use super::{close_tempfile_and_log_error, debug_force_argfile, ProcessBuilder, ProcessError}; + use anyhow::Result; + use std::io; + use std::os::unix::process::CommandExt; + + pub fn exec_replace(process_builder: &ProcessBuilder) -> Result<()> { + let mut error; + let mut file = None; + if debug_force_argfile(process_builder.retry_with_argfile) { + let (mut command, argfile) = process_builder.build_command_with_argfile()?; + file = Some(argfile); + error = command.exec() + } else { + let mut command = process_builder.build_command(); + error = command.exec(); + if process_builder.should_retry_with_argfile(&error) { + let (mut command, argfile) = process_builder.build_command_with_argfile()?; + file = Some(argfile); + error = command.exec() + } + } + if let Some(file) = file { + close_tempfile_and_log_error(file); + } + + Err(anyhow::Error::from(error).context(ProcessError::new( + &format!("could not execute process {}", process_builder), + None, + None, + ))) + } + + pub fn command_line_too_big(err: &io::Error) -> bool { + err.raw_os_error() == Some(libc::E2BIG) + } +} + +#[cfg(windows)] +mod imp { + use super::{ProcessBuilder, ProcessError}; + use anyhow::Result; + use std::io; + use windows_sys::Win32::Foundation::{BOOL, FALSE, TRUE}; + use windows_sys::Win32::System::Console::SetConsoleCtrlHandler; + + unsafe extern "system" fn ctrlc_handler(_: u32) -> BOOL { + // Do nothing; let the child process handle it. + TRUE + } + + pub fn exec_replace(process_builder: &ProcessBuilder) -> Result<()> { + unsafe { + if SetConsoleCtrlHandler(Some(ctrlc_handler), TRUE) == FALSE { + return Err(ProcessError::new("Could not set Ctrl-C handler.", None, None).into()); + } + } + + // Just execute the process as normal. + process_builder.exec() + } + + pub fn command_line_too_big(err: &io::Error) -> bool { + use windows_sys::Win32::Foundation::ERROR_FILENAME_EXCED_RANGE; + err.raw_os_error() == Some(ERROR_FILENAME_EXCED_RANGE as i32) + } +} + +#[cfg(test)] +mod tests { + use super::ProcessBuilder; + use std::fs; + + #[test] + fn argfile_build_succeeds() { + let mut cmd = ProcessBuilder::new("echo"); + cmd.args(["foo", "bar"].as_slice()); + let (cmd, argfile) = cmd.build_command_with_argfile().unwrap(); + + assert_eq!(cmd.get_program(), "echo"); + let cmd_args: Vec<_> = cmd.get_args().map(|s| s.to_str().unwrap()).collect(); + assert_eq!(cmd_args.len(), 1); + assert!(cmd_args[0].starts_with("@")); + assert!(cmd_args[0].contains("cargo-argfile.")); + + let buf = fs::read_to_string(argfile.path()).unwrap(); + assert_eq!(buf, "foo\nbar\n"); + } + + #[test] + fn argfile_build_fails_if_arg_contains_newline() { + let mut cmd = ProcessBuilder::new("echo"); + cmd.arg("foo\n"); + let err = cmd.build_command_with_argfile().unwrap_err(); + assert_eq!( + err.to_string(), + "argument for argfile contains newlines: `foo\n`" + ); + } + + #[test] + fn argfile_build_fails_if_arg_contains_invalid_utf8() { + let mut cmd = ProcessBuilder::new("echo"); + + #[cfg(windows)] + let invalid_arg = { + use std::os::windows::prelude::*; + std::ffi::OsString::from_wide(&[0x0066, 0x006f, 0xD800, 0x006f]) + }; + + #[cfg(unix)] + let invalid_arg = { + use std::os::unix::ffi::OsStrExt; + std::ffi::OsStr::from_bytes(&[0x66, 0x6f, 0x80, 0x6f]).to_os_string() + }; + + cmd.arg(invalid_arg); + let err = cmd.build_command_with_argfile().unwrap_err(); + assert_eq!( + err.to_string(), + "argument for argfile contains invalid UTF-8 characters: `fo�o`" + ); + } +} diff --git a/crates/cargo-util/src/process_error.rs b/crates/cargo-util/src/process_error.rs new file mode 100644 index 0000000..9b4a38c --- /dev/null +++ b/crates/cargo-util/src/process_error.rs @@ -0,0 +1,200 @@ +//! Error value for [`crate::ProcessBuilder`] when a process fails. + +use std::fmt; +use std::process::{ExitStatus, Output}; +use std::str; + +#[derive(Debug)] +pub struct ProcessError { + /// A detailed description to show to the user why the process failed. + pub desc: String, + + /// The exit status of the process. + /// + /// This can be `None` if the process failed to launch (like process not + /// found) or if the exit status wasn't a code but was instead something + /// like termination via a signal. + pub code: Option<i32>, + + /// The stdout from the process. + /// + /// This can be `None` if the process failed to launch, or the output was + /// not captured. + pub stdout: Option<Vec<u8>>, + + /// The stderr from the process. + /// + /// This can be `None` if the process failed to launch, or the output was + /// not captured. + pub stderr: Option<Vec<u8>>, +} + +impl fmt::Display for ProcessError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.desc.fmt(f) + } +} + +impl std::error::Error for ProcessError {} + +impl ProcessError { + /// Creates a new [`ProcessError`]. + /// + /// * `status` can be `None` if the process did not launch. + /// * `output` can be `None` if the process did not launch, or output was not captured. + pub fn new(msg: &str, status: Option<ExitStatus>, output: Option<&Output>) -> ProcessError { + let exit = match status { + Some(s) => exit_status_to_string(s), + None => "never executed".to_string(), + }; + + Self::new_raw( + msg, + status.and_then(|s| s.code()), + &exit, + output.map(|s| s.stdout.as_slice()), + output.map(|s| s.stderr.as_slice()), + ) + } + + /// Creates a new [`ProcessError`] with the raw output data. + /// + /// * `code` can be `None` for situations like being killed by a signal on unix. + pub fn new_raw( + msg: &str, + code: Option<i32>, + status: &str, + stdout: Option<&[u8]>, + stderr: Option<&[u8]>, + ) -> ProcessError { + let mut desc = format!("{} ({})", msg, status); + + if let Some(out) = stdout { + match str::from_utf8(out) { + Ok(s) if !s.trim().is_empty() => { + desc.push_str("\n--- stdout\n"); + desc.push_str(s); + } + Ok(..) | Err(..) => {} + } + } + if let Some(out) = stderr { + match str::from_utf8(out) { + Ok(s) if !s.trim().is_empty() => { + desc.push_str("\n--- stderr\n"); + desc.push_str(s); + } + Ok(..) | Err(..) => {} + } + } + + ProcessError { + desc, + code, + stdout: stdout.map(|s| s.to_vec()), + stderr: stderr.map(|s| s.to_vec()), + } + } + + /// Creates a [`ProcessError`] with "could not execute process {cmd}". + /// + /// * `cmd` is usually but not limited to [`std::process::Command`]. + pub fn could_not_execute(cmd: impl fmt::Display) -> ProcessError { + ProcessError::new(&format!("could not execute process {cmd}"), None, None) + } +} + +/// Converts an [`ExitStatus`] to a human-readable string suitable for +/// displaying to a user. +pub fn exit_status_to_string(status: ExitStatus) -> String { + return status_to_string(status); + + #[cfg(unix)] + fn status_to_string(status: ExitStatus) -> String { + use std::os::unix::process::*; + + if let Some(signal) = status.signal() { + let name = match signal as libc::c_int { + libc::SIGABRT => ", SIGABRT: process abort signal", + libc::SIGALRM => ", SIGALRM: alarm clock", + libc::SIGFPE => ", SIGFPE: erroneous arithmetic operation", + libc::SIGHUP => ", SIGHUP: hangup", + libc::SIGILL => ", SIGILL: illegal instruction", + libc::SIGINT => ", SIGINT: terminal interrupt signal", + libc::SIGKILL => ", SIGKILL: kill", + libc::SIGPIPE => ", SIGPIPE: write on a pipe with no one to read", + libc::SIGQUIT => ", SIGQUIT: terminal quit signal", + libc::SIGSEGV => ", SIGSEGV: invalid memory reference", + libc::SIGTERM => ", SIGTERM: termination signal", + libc::SIGBUS => ", SIGBUS: access to undefined memory", + #[cfg(not(target_os = "haiku"))] + libc::SIGSYS => ", SIGSYS: bad system call", + libc::SIGTRAP => ", SIGTRAP: trace/breakpoint trap", + _ => "", + }; + format!("signal: {}{}", signal, name) + } else { + status.to_string() + } + } + + #[cfg(windows)] + fn status_to_string(status: ExitStatus) -> String { + use windows_sys::Win32::Foundation::*; + + let mut base = status.to_string(); + let extra = match status.code().unwrap() as i32 { + STATUS_ACCESS_VIOLATION => "STATUS_ACCESS_VIOLATION", + STATUS_IN_PAGE_ERROR => "STATUS_IN_PAGE_ERROR", + STATUS_INVALID_HANDLE => "STATUS_INVALID_HANDLE", + STATUS_INVALID_PARAMETER => "STATUS_INVALID_PARAMETER", + STATUS_NO_MEMORY => "STATUS_NO_MEMORY", + STATUS_ILLEGAL_INSTRUCTION => "STATUS_ILLEGAL_INSTRUCTION", + STATUS_NONCONTINUABLE_EXCEPTION => "STATUS_NONCONTINUABLE_EXCEPTION", + STATUS_INVALID_DISPOSITION => "STATUS_INVALID_DISPOSITION", + STATUS_ARRAY_BOUNDS_EXCEEDED => "STATUS_ARRAY_BOUNDS_EXCEEDED", + STATUS_FLOAT_DENORMAL_OPERAND => "STATUS_FLOAT_DENORMAL_OPERAND", + STATUS_FLOAT_DIVIDE_BY_ZERO => "STATUS_FLOAT_DIVIDE_BY_ZERO", + STATUS_FLOAT_INEXACT_RESULT => "STATUS_FLOAT_INEXACT_RESULT", + STATUS_FLOAT_INVALID_OPERATION => "STATUS_FLOAT_INVALID_OPERATION", + STATUS_FLOAT_OVERFLOW => "STATUS_FLOAT_OVERFLOW", + STATUS_FLOAT_STACK_CHECK => "STATUS_FLOAT_STACK_CHECK", + STATUS_FLOAT_UNDERFLOW => "STATUS_FLOAT_UNDERFLOW", + STATUS_INTEGER_DIVIDE_BY_ZERO => "STATUS_INTEGER_DIVIDE_BY_ZERO", + STATUS_INTEGER_OVERFLOW => "STATUS_INTEGER_OVERFLOW", + STATUS_PRIVILEGED_INSTRUCTION => "STATUS_PRIVILEGED_INSTRUCTION", + STATUS_STACK_OVERFLOW => "STATUS_STACK_OVERFLOW", + STATUS_DLL_NOT_FOUND => "STATUS_DLL_NOT_FOUND", + STATUS_ORDINAL_NOT_FOUND => "STATUS_ORDINAL_NOT_FOUND", + STATUS_ENTRYPOINT_NOT_FOUND => "STATUS_ENTRYPOINT_NOT_FOUND", + STATUS_CONTROL_C_EXIT => "STATUS_CONTROL_C_EXIT", + STATUS_DLL_INIT_FAILED => "STATUS_DLL_INIT_FAILED", + STATUS_FLOAT_MULTIPLE_FAULTS => "STATUS_FLOAT_MULTIPLE_FAULTS", + STATUS_FLOAT_MULTIPLE_TRAPS => "STATUS_FLOAT_MULTIPLE_TRAPS", + STATUS_REG_NAT_CONSUMPTION => "STATUS_REG_NAT_CONSUMPTION", + STATUS_HEAP_CORRUPTION => "STATUS_HEAP_CORRUPTION", + STATUS_STACK_BUFFER_OVERRUN => "STATUS_STACK_BUFFER_OVERRUN", + STATUS_ASSERTION_FAILURE => "STATUS_ASSERTION_FAILURE", + _ => return base, + }; + base.push_str(", "); + base.push_str(extra); + base + } +} + +/// Returns `true` if the given process exit code is something a normal +/// process would exit with. +/// +/// This helps differentiate from abnormal termination codes, such as +/// segmentation faults or signals. +pub fn is_simple_exit_code(code: i32) -> bool { + // Typical unix exit codes are 0 to 127. + // Windows doesn't have anything "typical", and is a + // 32-bit number (which appears signed here, but is really + // unsigned). However, most of the interesting NTSTATUS + // codes are very large. This is just a rough + // approximation of which codes are "normal" and which + // ones are abnormal termination. + code >= 0 && code <= 127 +} diff --git a/crates/cargo-util/src/read2.rs b/crates/cargo-util/src/read2.rs new file mode 100644 index 0000000..742dc1d --- /dev/null +++ b/crates/cargo-util/src/read2.rs @@ -0,0 +1,178 @@ +pub use self::imp::read2; + +#[cfg(unix)] +mod imp { + use std::io; + use std::io::prelude::*; + use std::mem; + use std::os::unix::prelude::*; + use std::process::{ChildStderr, ChildStdout}; + + pub fn read2( + mut out_pipe: ChildStdout, + mut err_pipe: ChildStderr, + data: &mut dyn FnMut(bool, &mut Vec<u8>, bool), + ) -> io::Result<()> { + unsafe { + libc::fcntl(out_pipe.as_raw_fd(), libc::F_SETFL, libc::O_NONBLOCK); + libc::fcntl(err_pipe.as_raw_fd(), libc::F_SETFL, libc::O_NONBLOCK); + } + + let mut out_done = false; + let mut err_done = false; + let mut out = Vec::new(); + let mut err = Vec::new(); + + let mut fds: [libc::pollfd; 2] = unsafe { mem::zeroed() }; + fds[0].fd = out_pipe.as_raw_fd(); + fds[0].events = libc::POLLIN; + fds[1].fd = err_pipe.as_raw_fd(); + fds[1].events = libc::POLLIN; + let mut nfds = 2; + let mut errfd = 1; + + while nfds > 0 { + // wait for either pipe to become readable using `select` + let r = unsafe { libc::poll(fds.as_mut_ptr(), nfds, -1) }; + if r == -1 { + let err = io::Error::last_os_error(); + if err.kind() == io::ErrorKind::Interrupted { + continue; + } + return Err(err); + } + + // Read as much as we can from each pipe, ignoring EWOULDBLOCK or + // EAGAIN. If we hit EOF, then this will happen because the underlying + // reader will return Ok(0), in which case we'll see `Ok` ourselves. In + // this case we flip the other fd back into blocking mode and read + // whatever's leftover on that file descriptor. + let handle = |res: io::Result<_>| match res { + Ok(_) => Ok(true), + Err(e) => { + if e.kind() == io::ErrorKind::WouldBlock { + Ok(false) + } else { + Err(e) + } + } + }; + if !err_done && fds[errfd].revents != 0 && handle(err_pipe.read_to_end(&mut err))? { + err_done = true; + nfds -= 1; + } + data(false, &mut err, err_done); + if !out_done && fds[0].revents != 0 && handle(out_pipe.read_to_end(&mut out))? { + out_done = true; + fds[0].fd = err_pipe.as_raw_fd(); + errfd = 0; + nfds -= 1; + } + data(true, &mut out, out_done); + } + Ok(()) + } +} + +#[cfg(windows)] +mod imp { + use std::io; + use std::os::windows::prelude::*; + use std::process::{ChildStderr, ChildStdout}; + use std::slice; + + use miow::iocp::{CompletionPort, CompletionStatus}; + use miow::pipe::NamedPipe; + use miow::Overlapped; + use windows_sys::Win32::Foundation::ERROR_BROKEN_PIPE; + + struct Pipe<'a> { + dst: &'a mut Vec<u8>, + overlapped: Overlapped, + pipe: NamedPipe, + done: bool, + } + + pub fn read2( + out_pipe: ChildStdout, + err_pipe: ChildStderr, + data: &mut dyn FnMut(bool, &mut Vec<u8>, bool), + ) -> io::Result<()> { + let mut out = Vec::new(); + let mut err = Vec::new(); + + let port = CompletionPort::new(1)?; + port.add_handle(0, &out_pipe)?; + port.add_handle(1, &err_pipe)?; + + unsafe { + let mut out_pipe = Pipe::new(out_pipe, &mut out); + let mut err_pipe = Pipe::new(err_pipe, &mut err); + + out_pipe.read()?; + err_pipe.read()?; + + let mut status = [CompletionStatus::zero(), CompletionStatus::zero()]; + + while !out_pipe.done || !err_pipe.done { + for status in port.get_many(&mut status, None)? { + if status.token() == 0 { + out_pipe.complete(status); + data(true, out_pipe.dst, out_pipe.done); + out_pipe.read()?; + } else { + err_pipe.complete(status); + data(false, err_pipe.dst, err_pipe.done); + err_pipe.read()?; + } + } + } + + Ok(()) + } + } + + impl<'a> Pipe<'a> { + unsafe fn new<P: IntoRawHandle>(p: P, dst: &'a mut Vec<u8>) -> Pipe<'a> { + Pipe { + dst, + pipe: NamedPipe::from_raw_handle(p.into_raw_handle()), + overlapped: Overlapped::zero(), + done: false, + } + } + + unsafe fn read(&mut self) -> io::Result<()> { + let dst = slice_to_end(self.dst); + match self.pipe.read_overlapped(dst, self.overlapped.raw()) { + Ok(_) => Ok(()), + Err(e) => { + if e.raw_os_error() == Some(ERROR_BROKEN_PIPE as i32) { + self.done = true; + Ok(()) + } else { + Err(e) + } + } + } + } + + unsafe fn complete(&mut self, status: &CompletionStatus) { + let prev = self.dst.len(); + self.dst.set_len(prev + status.bytes_transferred() as usize); + if status.bytes_transferred() == 0 { + self.done = true; + } + } + } + + unsafe fn slice_to_end(v: &mut Vec<u8>) -> &mut [u8] { + if v.capacity() == 0 { + v.reserve(16); + } + if v.capacity() == v.len() { + v.reserve(1); + } + slice::from_raw_parts_mut(v.as_mut_ptr().add(v.len()), v.capacity() - v.len()) + } +} diff --git a/crates/cargo-util/src/registry.rs b/crates/cargo-util/src/registry.rs new file mode 100644 index 0000000..6b1ccd2 --- /dev/null +++ b/crates/cargo-util/src/registry.rs @@ -0,0 +1,45 @@ +/// Make a path to a dependency, which aligns to +/// +/// - [index from of Cargo's index on filesystem][1], and +/// - [index from Crates.io][2]. +/// +/// [1]: https://docs.rs/cargo/latest/cargo/sources/registry/index.html#the-format-of-the-index +/// [2]: https://github.com/rust-lang/crates.io-index +pub fn make_dep_path(dep_name: &str, prefix_only: bool) -> String { + let (slash, name) = if prefix_only { + ("", "") + } else { + ("/", dep_name) + }; + match dep_name.len() { + 1 => format!("1{}{}", slash, name), + 2 => format!("2{}{}", slash, name), + 3 => format!("3/{}{}{}", &dep_name[..1], slash, name), + _ => format!("{}/{}{}{}", &dep_name[0..2], &dep_name[2..4], slash, name), + } +} + +#[cfg(test)] +mod tests { + use super::make_dep_path; + + #[test] + fn prefix_only() { + assert_eq!(make_dep_path("a", true), "1"); + assert_eq!(make_dep_path("ab", true), "2"); + assert_eq!(make_dep_path("abc", true), "3/a"); + assert_eq!(make_dep_path("Abc", true), "3/A"); + assert_eq!(make_dep_path("AbCd", true), "Ab/Cd"); + assert_eq!(make_dep_path("aBcDe", true), "aB/cD"); + } + + #[test] + fn full() { + assert_eq!(make_dep_path("a", false), "1/a"); + assert_eq!(make_dep_path("ab", false), "2/ab"); + assert_eq!(make_dep_path("abc", false), "3/a/abc"); + assert_eq!(make_dep_path("Abc", false), "3/A/Abc"); + assert_eq!(make_dep_path("AbCd", false), "Ab/Cd/AbCd"); + assert_eq!(make_dep_path("aBcDe", false), "aB/cD/aBcDe"); + } +} diff --git a/crates/cargo-util/src/sha256.rs b/crates/cargo-util/src/sha256.rs new file mode 100644 index 0000000..58821f4 --- /dev/null +++ b/crates/cargo-util/src/sha256.rs @@ -0,0 +1,56 @@ +use super::paths; +use anyhow::{Context, Result}; +use crypto_hash::{Algorithm, Hasher}; +use std::fs::File; +use std::io::{self, Read, Write}; +use std::path::Path; + +pub struct Sha256(Hasher); + +impl Sha256 { + pub fn new() -> Sha256 { + let hasher = Hasher::new(Algorithm::SHA256); + Sha256(hasher) + } + + pub fn update(&mut self, bytes: &[u8]) -> &mut Sha256 { + let _ = self.0.write_all(bytes); + self + } + + pub fn update_file(&mut self, mut file: &File) -> io::Result<&mut Sha256> { + let mut buf = [0; 64 * 1024]; + loop { + let n = file.read(&mut buf)?; + if n == 0 { + break Ok(self); + } + self.update(&buf[..n]); + } + } + + pub fn update_path<P: AsRef<Path>>(&mut self, path: P) -> Result<&mut Sha256> { + let path = path.as_ref(); + let file = paths::open(path)?; + self.update_file(&file) + .with_context(|| format!("failed to read `{}`", path.display()))?; + Ok(self) + } + + pub fn finish(&mut self) -> [u8; 32] { + let mut ret = [0u8; 32]; + let data = self.0.finish(); + ret.copy_from_slice(&data[..]); + ret + } + + pub fn finish_hex(&mut self) -> String { + hex::encode(self.finish()) + } +} + +impl Default for Sha256 { + fn default() -> Self { + Self::new() + } +} |