From 36d22d82aa202bb199967e9512281e9a53db42c9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 21:33:14 +0200 Subject: Adding upstream version 115.7.0esr. Signed-off-by: Daniel Baumann --- third_party/rust/walkdir/src/dent.rs | 375 ++++++++ third_party/rust/walkdir/src/error.rs | 265 ++++++ third_party/rust/walkdir/src/lib.rs | 1161 +++++++++++++++++++++++ third_party/rust/walkdir/src/tests/mod.rs | 4 + third_party/rust/walkdir/src/tests/recursive.rs | 1023 ++++++++++++++++++++ third_party/rust/walkdir/src/tests/util.rs | 252 +++++ third_party/rust/walkdir/src/util.rs | 25 + 7 files changed, 3105 insertions(+) create mode 100644 third_party/rust/walkdir/src/dent.rs create mode 100644 third_party/rust/walkdir/src/error.rs create mode 100644 third_party/rust/walkdir/src/lib.rs create mode 100644 third_party/rust/walkdir/src/tests/mod.rs create mode 100644 third_party/rust/walkdir/src/tests/recursive.rs create mode 100644 third_party/rust/walkdir/src/tests/util.rs create mode 100644 third_party/rust/walkdir/src/util.rs (limited to 'third_party/rust/walkdir/src') diff --git a/third_party/rust/walkdir/src/dent.rs b/third_party/rust/walkdir/src/dent.rs new file mode 100644 index 0000000000..a28ed3dea0 --- /dev/null +++ b/third_party/rust/walkdir/src/dent.rs @@ -0,0 +1,375 @@ +use std::ffi::OsStr; +use std::fmt; +use std::fs::{self, FileType}; +use std::path::{Path, PathBuf}; + +use crate::error::Error; +use crate::Result; + +/// A directory entry. +/// +/// This is the type of value that is yielded from the iterators defined in +/// this crate. +/// +/// On Unix systems, this type implements the [`DirEntryExt`] trait, which +/// provides efficient access to the inode number of the directory entry. +/// +/// # Differences with `std::fs::DirEntry` +/// +/// This type mostly mirrors the type by the same name in [`std::fs`]. There +/// are some differences however: +/// +/// * All recursive directory iterators must inspect the entry's type. +/// Therefore, the value is stored and its access is guaranteed to be cheap and +/// successful. +/// * [`path`] and [`file_name`] return borrowed variants. +/// * If [`follow_links`] was enabled on the originating iterator, then all +/// operations except for [`path`] operate on the link target. Otherwise, all +/// operations operate on the symbolic link. +/// +/// [`std::fs`]: https://doc.rust-lang.org/stable/std/fs/index.html +/// [`path`]: #method.path +/// [`file_name`]: #method.file_name +/// [`follow_links`]: struct.WalkDir.html#method.follow_links +/// [`DirEntryExt`]: trait.DirEntryExt.html +pub struct DirEntry { + /// The path as reported by the [`fs::ReadDir`] iterator (even if it's a + /// symbolic link). + /// + /// [`fs::ReadDir`]: https://doc.rust-lang.org/stable/std/fs/struct.ReadDir.html + path: PathBuf, + /// The file type. Necessary for recursive iteration, so store it. + ty: FileType, + /// Is set when this entry was created from a symbolic link and the user + /// expects the iterator to follow symbolic links. + follow_link: bool, + /// The depth at which this entry was generated relative to the root. + depth: usize, + /// The underlying inode number (Unix only). + #[cfg(unix)] + ino: u64, + /// The underlying metadata (Windows only). We store this on Windows + /// because this comes for free while reading a directory. + /// + /// We use this to determine whether an entry is a directory or not, which + /// works around a bug in Rust's standard library: + /// https://github.com/rust-lang/rust/issues/46484 + #[cfg(windows)] + metadata: fs::Metadata, +} + +impl DirEntry { + /// The full path that this entry represents. + /// + /// The full path is created by joining the parents of this entry up to the + /// root initially given to [`WalkDir::new`] with the file name of this + /// entry. + /// + /// Note that this *always* returns the path reported by the underlying + /// directory entry, even when symbolic links are followed. To get the + /// target path, use [`path_is_symlink`] to (cheaply) check if this entry + /// corresponds to a symbolic link, and [`std::fs::read_link`] to resolve + /// the target. + /// + /// [`WalkDir::new`]: struct.WalkDir.html#method.new + /// [`path_is_symlink`]: struct.DirEntry.html#method.path_is_symlink + /// [`std::fs::read_link`]: https://doc.rust-lang.org/stable/std/fs/fn.read_link.html + pub fn path(&self) -> &Path { + &self.path + } + + /// The full path that this entry represents. + /// + /// Analogous to [`path`], but moves ownership of the path. + /// + /// [`path`]: struct.DirEntry.html#method.path + pub fn into_path(self) -> PathBuf { + self.path + } + + /// Returns `true` if and only if this entry was created from a symbolic + /// link. This is unaffected by the [`follow_links`] setting. + /// + /// When `true`, the value returned by the [`path`] method is a + /// symbolic link name. To get the full target path, you must call + /// [`std::fs::read_link(entry.path())`]. + /// + /// [`path`]: struct.DirEntry.html#method.path + /// [`follow_links`]: struct.WalkDir.html#method.follow_links + /// [`std::fs::read_link(entry.path())`]: https://doc.rust-lang.org/stable/std/fs/fn.read_link.html + pub fn path_is_symlink(&self) -> bool { + self.ty.is_symlink() || self.follow_link + } + + /// Return the metadata for the file that this entry points to. + /// + /// This will follow symbolic links if and only if the [`WalkDir`] value + /// has [`follow_links`] enabled. + /// + /// # Platform behavior + /// + /// This always calls [`std::fs::symlink_metadata`]. + /// + /// If this entry is a symbolic link and [`follow_links`] is enabled, then + /// [`std::fs::metadata`] is called instead. + /// + /// # Errors + /// + /// Similar to [`std::fs::metadata`], returns errors for path values that + /// the program does not have permissions to access or if the path does not + /// exist. + /// + /// [`WalkDir`]: struct.WalkDir.html + /// [`follow_links`]: struct.WalkDir.html#method.follow_links + /// [`std::fs::metadata`]: https://doc.rust-lang.org/std/fs/fn.metadata.html + /// [`std::fs::symlink_metadata`]: https://doc.rust-lang.org/stable/std/fs/fn.symlink_metadata.html + pub fn metadata(&self) -> Result { + self.metadata_internal() + } + + #[cfg(windows)] + fn metadata_internal(&self) -> Result { + if self.follow_link { + fs::metadata(&self.path) + } else { + Ok(self.metadata.clone()) + } + .map_err(|err| Error::from_entry(self, err)) + } + + #[cfg(not(windows))] + fn metadata_internal(&self) -> Result { + if self.follow_link { + fs::metadata(&self.path) + } else { + fs::symlink_metadata(&self.path) + } + .map_err(|err| Error::from_entry(self, err)) + } + + /// Return the file type for the file that this entry points to. + /// + /// If this is a symbolic link and [`follow_links`] is `true`, then this + /// returns the type of the target. + /// + /// This never makes any system calls. + /// + /// [`follow_links`]: struct.WalkDir.html#method.follow_links + pub fn file_type(&self) -> fs::FileType { + self.ty + } + + /// Return the file name of this entry. + /// + /// If this entry has no file name (e.g., `/`), then the full path is + /// returned. + pub fn file_name(&self) -> &OsStr { + self.path.file_name().unwrap_or_else(|| self.path.as_os_str()) + } + + /// Returns the depth at which this entry was created relative to the root. + /// + /// The smallest depth is `0` and always corresponds to the path given + /// to the `new` function on `WalkDir`. Its direct descendents have depth + /// `1`, and their descendents have depth `2`, and so on. + pub fn depth(&self) -> usize { + self.depth + } + + /// Returns true if and only if this entry points to a directory. + /// + /// This works around a bug in Rust's standard library: + /// https://github.com/rust-lang/rust/issues/46484 + #[cfg(windows)] + pub(crate) fn is_dir(&self) -> bool { + use std::os::windows::fs::MetadataExt; + use winapi::um::winnt::FILE_ATTRIBUTE_DIRECTORY; + self.metadata.file_attributes() & FILE_ATTRIBUTE_DIRECTORY != 0 + } + + /// Returns true if and only if this entry points to a directory. + #[cfg(not(windows))] + pub(crate) fn is_dir(&self) -> bool { + self.ty.is_dir() + } + + #[cfg(windows)] + pub(crate) fn from_entry( + depth: usize, + ent: &fs::DirEntry, + ) -> Result { + let path = ent.path(); + let ty = ent + .file_type() + .map_err(|err| Error::from_path(depth, path.clone(), err))?; + let md = ent + .metadata() + .map_err(|err| Error::from_path(depth, path.clone(), err))?; + Ok(DirEntry { + path: path, + ty: ty, + follow_link: false, + depth: depth, + metadata: md, + }) + } + + #[cfg(unix)] + pub(crate) fn from_entry( + depth: usize, + ent: &fs::DirEntry, + ) -> Result { + use std::os::unix::fs::DirEntryExt; + + let ty = ent + .file_type() + .map_err(|err| Error::from_path(depth, ent.path(), err))?; + Ok(DirEntry { + path: ent.path(), + ty: ty, + follow_link: false, + depth: depth, + ino: ent.ino(), + }) + } + + #[cfg(not(any(unix, windows)))] + pub(crate) fn from_entry( + depth: usize, + ent: &fs::DirEntry, + ) -> Result { + let ty = ent + .file_type() + .map_err(|err| Error::from_path(depth, ent.path(), err))?; + Ok(DirEntry { + path: ent.path(), + ty: ty, + follow_link: false, + depth: depth, + }) + } + + #[cfg(windows)] + pub(crate) fn from_path( + depth: usize, + pb: PathBuf, + follow: bool, + ) -> Result { + let md = if follow { + fs::metadata(&pb) + .map_err(|err| Error::from_path(depth, pb.clone(), err))? + } else { + fs::symlink_metadata(&pb) + .map_err(|err| Error::from_path(depth, pb.clone(), err))? + }; + Ok(DirEntry { + path: pb, + ty: md.file_type(), + follow_link: follow, + depth: depth, + metadata: md, + }) + } + + #[cfg(unix)] + pub(crate) fn from_path( + depth: usize, + pb: PathBuf, + follow: bool, + ) -> Result { + use std::os::unix::fs::MetadataExt; + + let md = if follow { + fs::metadata(&pb) + .map_err(|err| Error::from_path(depth, pb.clone(), err))? + } else { + fs::symlink_metadata(&pb) + .map_err(|err| Error::from_path(depth, pb.clone(), err))? + }; + Ok(DirEntry { + path: pb, + ty: md.file_type(), + follow_link: follow, + depth: depth, + ino: md.ino(), + }) + } + + #[cfg(not(any(unix, windows)))] + pub(crate) fn from_path( + depth: usize, + pb: PathBuf, + follow: bool, + ) -> Result { + let md = if follow { + fs::metadata(&pb) + .map_err(|err| Error::from_path(depth, pb.clone(), err))? + } else { + fs::symlink_metadata(&pb) + .map_err(|err| Error::from_path(depth, pb.clone(), err))? + }; + Ok(DirEntry { + path: pb, + ty: md.file_type(), + follow_link: follow, + depth: depth, + }) + } +} + +impl Clone for DirEntry { + #[cfg(windows)] + fn clone(&self) -> DirEntry { + DirEntry { + path: self.path.clone(), + ty: self.ty, + follow_link: self.follow_link, + depth: self.depth, + metadata: self.metadata.clone(), + } + } + + #[cfg(unix)] + fn clone(&self) -> DirEntry { + DirEntry { + path: self.path.clone(), + ty: self.ty, + follow_link: self.follow_link, + depth: self.depth, + ino: self.ino, + } + } + + #[cfg(not(any(unix, windows)))] + fn clone(&self) -> DirEntry { + DirEntry { + path: self.path.clone(), + ty: self.ty, + follow_link: self.follow_link, + depth: self.depth, + } + } +} + +impl fmt::Debug for DirEntry { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "DirEntry({:?})", self.path) + } +} + +/// Unix-specific extension methods for `walkdir::DirEntry` +#[cfg(unix)] +pub trait DirEntryExt { + /// Returns the underlying `d_ino` field in the contained `dirent` + /// structure. + fn ino(&self) -> u64; +} + +#[cfg(unix)] +impl DirEntryExt for DirEntry { + /// Returns the underlying `d_ino` field in the contained `dirent` + /// structure. + fn ino(&self) -> u64 { + self.ino + } +} diff --git a/third_party/rust/walkdir/src/error.rs b/third_party/rust/walkdir/src/error.rs new file mode 100644 index 0000000000..9e25a075ec --- /dev/null +++ b/third_party/rust/walkdir/src/error.rs @@ -0,0 +1,265 @@ +use std::error; +use std::fmt; +use std::io; +use std::path::{Path, PathBuf}; + +use crate::DirEntry; + +/// An error produced by recursively walking a directory. +/// +/// This error type is a light wrapper around [`std::io::Error`]. In +/// particular, it adds the following information: +/// +/// * The depth at which the error occurred in the file tree, relative to the +/// root. +/// * The path, if any, associated with the IO error. +/// * An indication that a loop occurred when following symbolic links. In this +/// case, there is no underlying IO error. +/// +/// To maintain good ergonomics, this type has a +/// [`impl From for std::io::Error`][impl] defined which preserves the original context. +/// This allows you to use an [`io::Result`] with methods in this crate if you don't care about +/// accessing the underlying error data in a structured form. +/// +/// [`std::io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html +/// [`io::Result`]: https://doc.rust-lang.org/stable/std/io/type.Result.html +/// [impl]: struct.Error.html#impl-From%3CError%3E +#[derive(Debug)] +pub struct Error { + depth: usize, + inner: ErrorInner, +} + +#[derive(Debug)] +enum ErrorInner { + Io { path: Option, err: io::Error }, + Loop { ancestor: PathBuf, child: PathBuf }, +} + +impl Error { + /// Returns the path associated with this error if one exists. + /// + /// For example, if an error occurred while opening a directory handle, + /// the error will include the path passed to [`std::fs::read_dir`]. + /// + /// [`std::fs::read_dir`]: https://doc.rust-lang.org/stable/std/fs/fn.read_dir.html + pub fn path(&self) -> Option<&Path> { + match self.inner { + ErrorInner::Io { path: None, .. } => None, + ErrorInner::Io { path: Some(ref path), .. } => Some(path), + ErrorInner::Loop { ref child, .. } => Some(child), + } + } + + /// Returns the path at which a cycle was detected. + /// + /// If no cycle was detected, [`None`] is returned. + /// + /// A cycle is detected when a directory entry is equivalent to one of + /// its ancestors. + /// + /// To get the path to the child directory entry in the cycle, use the + /// [`path`] method. + /// + /// [`None`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html#variant.None + /// [`path`]: struct.Error.html#path + pub fn loop_ancestor(&self) -> Option<&Path> { + match self.inner { + ErrorInner::Loop { ref ancestor, .. } => Some(ancestor), + _ => None, + } + } + + /// Returns the depth at which this error occurred relative to the root. + /// + /// The smallest depth is `0` and always corresponds to the path given to + /// the [`new`] function on [`WalkDir`]. Its direct descendents have depth + /// `1`, and their descendents have depth `2`, and so on. + /// + /// [`new`]: struct.WalkDir.html#method.new + /// [`WalkDir`]: struct.WalkDir.html + pub fn depth(&self) -> usize { + self.depth + } + + /// Inspect the original [`io::Error`] if there is one. + /// + /// [`None`] is returned if the [`Error`] doesn't correspond to an + /// [`io::Error`]. This might happen, for example, when the error was + /// produced because a cycle was found in the directory tree while + /// following symbolic links. + /// + /// This method returns a borrowed value that is bound to the lifetime of the [`Error`]. To + /// obtain an owned value, the [`into_io_error`] can be used instead. + /// + /// > This is the original [`io::Error`] and is _not_ the same as + /// > [`impl From for std::io::Error`][impl] which contains additional context about the + /// error. + /// + /// # Example + /// + /// ```rust,no_run + /// use std::io; + /// use std::path::Path; + /// + /// use walkdir::WalkDir; + /// + /// for entry in WalkDir::new("foo") { + /// match entry { + /// Ok(entry) => println!("{}", entry.path().display()), + /// Err(err) => { + /// let path = err.path().unwrap_or(Path::new("")).display(); + /// println!("failed to access entry {}", path); + /// if let Some(inner) = err.io_error() { + /// match inner.kind() { + /// io::ErrorKind::InvalidData => { + /// println!( + /// "entry contains invalid data: {}", + /// inner) + /// } + /// io::ErrorKind::PermissionDenied => { + /// println!( + /// "Missing permission to read entry: {}", + /// inner) + /// } + /// _ => { + /// println!( + /// "Unexpected error occurred: {}", + /// inner) + /// } + /// } + /// } + /// } + /// } + /// } + /// ``` + /// + /// [`None`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html#variant.None + /// [`io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html + /// [`From`]: https://doc.rust-lang.org/stable/std/convert/trait.From.html + /// [`Error`]: struct.Error.html + /// [`into_io_error`]: struct.Error.html#method.into_io_error + /// [impl]: struct.Error.html#impl-From%3CError%3E + pub fn io_error(&self) -> Option<&io::Error> { + match self.inner { + ErrorInner::Io { ref err, .. } => Some(err), + ErrorInner::Loop { .. } => None, + } + } + + /// Similar to [`io_error`] except consumes self to convert to the original + /// [`io::Error`] if one exists. + /// + /// [`io_error`]: struct.Error.html#method.io_error + /// [`io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html + pub fn into_io_error(self) -> Option { + match self.inner { + ErrorInner::Io { err, .. } => Some(err), + ErrorInner::Loop { .. } => None, + } + } + + pub(crate) fn from_path( + depth: usize, + pb: PathBuf, + err: io::Error, + ) -> Self { + Error { + depth: depth, + inner: ErrorInner::Io { path: Some(pb), err: err }, + } + } + + pub(crate) fn from_entry(dent: &DirEntry, err: io::Error) -> Self { + Error { + depth: dent.depth(), + inner: ErrorInner::Io { + path: Some(dent.path().to_path_buf()), + err: err, + }, + } + } + + pub(crate) fn from_io(depth: usize, err: io::Error) -> Self { + Error { depth: depth, inner: ErrorInner::Io { path: None, err: err } } + } + + pub(crate) fn from_loop( + depth: usize, + ancestor: &Path, + child: &Path, + ) -> Self { + Error { + depth: depth, + inner: ErrorInner::Loop { + ancestor: ancestor.to_path_buf(), + child: child.to_path_buf(), + }, + } + } +} + +impl error::Error for Error { + #[allow(deprecated)] + fn description(&self) -> &str { + match self.inner { + ErrorInner::Io { ref err, .. } => err.description(), + ErrorInner::Loop { .. } => "file system loop found", + } + } + + fn cause(&self) -> Option<&dyn error::Error> { + self.source() + } + + fn source(&self) -> Option<&(dyn error::Error + 'static)> { + match self.inner { + ErrorInner::Io { ref err, .. } => Some(err), + ErrorInner::Loop { .. } => None, + } + } +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.inner { + ErrorInner::Io { path: None, ref err } => err.fmt(f), + ErrorInner::Io { path: Some(ref path), ref err } => write!( + f, + "IO error for operation on {}: {}", + path.display(), + err + ), + ErrorInner::Loop { ref ancestor, ref child } => write!( + f, + "File system loop found: \ + {} points to an ancestor {}", + child.display(), + ancestor.display() + ), + } + } +} + +impl From for io::Error { + /// Convert the [`Error`] to an [`io::Error`], preserving the original + /// [`Error`] as the ["inner error"]. Note that this also makes the display + /// of the error include the context. + /// + /// This is different from [`into_io_error`] which returns the original + /// [`io::Error`]. + /// + /// [`Error`]: struct.Error.html + /// [`io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html + /// ["inner error"]: https://doc.rust-lang.org/std/io/struct.Error.html#method.into_inner + /// [`into_io_error`]: struct.WalkDir.html#method.into_io_error + fn from(walk_err: Error) -> io::Error { + let kind = match walk_err { + Error { inner: ErrorInner::Io { ref err, .. }, .. } => err.kind(), + Error { inner: ErrorInner::Loop { .. }, .. } => { + io::ErrorKind::Other + } + }; + io::Error::new(kind, walk_err) + } +} diff --git a/third_party/rust/walkdir/src/lib.rs b/third_party/rust/walkdir/src/lib.rs new file mode 100644 index 0000000000..929c5655e4 --- /dev/null +++ b/third_party/rust/walkdir/src/lib.rs @@ -0,0 +1,1161 @@ +/*! +Crate `walkdir` provides an efficient and cross platform implementation +of recursive directory traversal. Several options are exposed to control +iteration, such as whether to follow symbolic links (default off), limit the +maximum number of simultaneous open file descriptors and the ability to +efficiently skip descending into directories. + +To use this crate, add `walkdir` as a dependency to your project's +`Cargo.toml`: + +```toml +[dependencies] +walkdir = "2" +``` + +# From the top + +The [`WalkDir`] type builds iterators. The [`DirEntry`] type describes values +yielded by the iterator. Finally, the [`Error`] type is a small wrapper around +[`std::io::Error`] with additional information, such as if a loop was detected +while following symbolic links (not enabled by default). + +[`WalkDir`]: struct.WalkDir.html +[`DirEntry`]: struct.DirEntry.html +[`Error`]: struct.Error.html +[`std::io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html + +# Example + +The following code recursively iterates over the directory given and prints +the path for each entry: + +```no_run +use walkdir::WalkDir; +# use walkdir::Error; + +# fn try_main() -> Result<(), Error> { +for entry in WalkDir::new("foo") { + println!("{}", entry?.path().display()); +} +# Ok(()) +# } +``` + +Or, if you'd like to iterate over all entries and ignore any errors that +may arise, use [`filter_map`]. (e.g., This code below will silently skip +directories that the owner of the running process does not have permission to +access.) + +```no_run +use walkdir::WalkDir; + +for entry in WalkDir::new("foo").into_iter().filter_map(|e| e.ok()) { + println!("{}", entry.path().display()); +} +``` + +[`filter_map`]: https://doc.rust-lang.org/stable/std/iter/trait.Iterator.html#method.filter_map + +# Example: follow symbolic links + +The same code as above, except [`follow_links`] is enabled: + +```no_run +use walkdir::WalkDir; +# use walkdir::Error; + +# fn try_main() -> Result<(), Error> { +for entry in WalkDir::new("foo").follow_links(true) { + println!("{}", entry?.path().display()); +} +# Ok(()) +# } +``` + +[`follow_links`]: struct.WalkDir.html#method.follow_links + +# Example: skip hidden files and directories on unix + +This uses the [`filter_entry`] iterator adapter to avoid yielding hidden files +and directories efficiently (i.e. without recursing into hidden directories): + +```no_run +use walkdir::{DirEntry, WalkDir}; +# use walkdir::Error; + +fn is_hidden(entry: &DirEntry) -> bool { + entry.file_name() + .to_str() + .map(|s| s.starts_with(".")) + .unwrap_or(false) +} + +# fn try_main() -> Result<(), Error> { +let walker = WalkDir::new("foo").into_iter(); +for entry in walker.filter_entry(|e| !is_hidden(e)) { + println!("{}", entry?.path().display()); +} +# Ok(()) +# } +``` + +[`filter_entry`]: struct.IntoIter.html#method.filter_entry +*/ + +#![deny(missing_docs)] +#![allow(unknown_lints)] + +#[cfg(doctest)] +doc_comment::doctest!("../README.md"); + +use std::cmp::{min, Ordering}; +use std::fmt; +use std::fs::{self, ReadDir}; +use std::io; +use std::path::{Path, PathBuf}; +use std::result; +use std::vec; + +use same_file::Handle; + +pub use crate::dent::DirEntry; +#[cfg(unix)] +pub use crate::dent::DirEntryExt; +pub use crate::error::Error; + +mod dent; +mod error; +#[cfg(test)] +mod tests; +mod util; + +/// Like try, but for iterators that return [`Option>`]. +/// +/// [`Option>`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html +macro_rules! itry { + ($e:expr) => { + match $e { + Ok(v) => v, + Err(err) => return Some(Err(From::from(err))), + } + }; +} + +/// A result type for walkdir operations. +/// +/// Note that this result type embeds the error type in this crate. This +/// is only useful if you care about the additional information provided by +/// the error (such as the path associated with the error or whether a loop +/// was dectected). If you want things to Just Work, then you can use +/// [`io::Result`] instead since the error type in this package will +/// automatically convert to an [`io::Result`] when using the [`try!`] macro. +/// +/// [`io::Result`]: https://doc.rust-lang.org/stable/std/io/type.Result.html +/// [`try!`]: https://doc.rust-lang.org/stable/std/macro.try.html +pub type Result = ::std::result::Result; + +/// A builder to create an iterator for recursively walking a directory. +/// +/// Results are returned in depth first fashion, with directories yielded +/// before their contents. If [`contents_first`] is true, contents are yielded +/// before their directories. The order is unspecified but if [`sort_by`] is +/// given, directory entries are sorted according to this function. Directory +/// entries `.` and `..` are always omitted. +/// +/// If an error occurs at any point during iteration, then it is returned in +/// place of its corresponding directory entry and iteration continues as +/// normal. If an error occurs while opening a directory for reading, then it +/// is not descended into (but the error is still yielded by the iterator). +/// Iteration may be stopped at any time. When the iterator is destroyed, all +/// resources associated with it are freed. +/// +/// [`contents_first`]: struct.WalkDir.html#method.contents_first +/// [`sort_by`]: struct.WalkDir.html#method.sort_by +/// +/// # Usage +/// +/// This type implements [`IntoIterator`] so that it may be used as the subject +/// of a `for` loop. You may need to call [`into_iter`] explicitly if you want +/// to use iterator adapters such as [`filter_entry`]. +/// +/// Idiomatic use of this type should use method chaining to set desired +/// options. For example, this only shows entries with a depth of `1`, `2` or +/// `3` (relative to `foo`): +/// +/// ```no_run +/// use walkdir::WalkDir; +/// # use walkdir::Error; +/// +/// # fn try_main() -> Result<(), Error> { +/// for entry in WalkDir::new("foo").min_depth(1).max_depth(3) { +/// println!("{}", entry?.path().display()); +/// } +/// # Ok(()) +/// # } +/// ``` +/// +/// [`IntoIterator`]: https://doc.rust-lang.org/stable/std/iter/trait.IntoIterator.html +/// [`into_iter`]: https://doc.rust-lang.org/nightly/core/iter/trait.IntoIterator.html#tymethod.into_iter +/// [`filter_entry`]: struct.IntoIter.html#method.filter_entry +/// +/// Note that the iterator by default includes the top-most directory. Since +/// this is the only directory yielded with depth `0`, it is easy to ignore it +/// with the [`min_depth`] setting: +/// +/// ```no_run +/// use walkdir::WalkDir; +/// # use walkdir::Error; +/// +/// # fn try_main() -> Result<(), Error> { +/// for entry in WalkDir::new("foo").min_depth(1) { +/// println!("{}", entry?.path().display()); +/// } +/// # Ok(()) +/// # } +/// ``` +/// +/// [`min_depth`]: struct.WalkDir.html#method.min_depth +/// +/// This will only return descendents of the `foo` directory and not `foo` +/// itself. +/// +/// # Loops +/// +/// This iterator (like most/all recursive directory iterators) assumes that +/// no loops can be made with *hard* links on your file system. In particular, +/// this would require creating a hard link to a directory such that it creates +/// a loop. On most platforms, this operation is illegal. +/// +/// Note that when following symbolic/soft links, loops are detected and an +/// error is reported. +#[derive(Debug)] +pub struct WalkDir { + opts: WalkDirOptions, + root: PathBuf, +} + +struct WalkDirOptions { + follow_links: bool, + max_open: usize, + min_depth: usize, + max_depth: usize, + sorter: Option< + Box< + dyn FnMut(&DirEntry, &DirEntry) -> Ordering + + Send + + Sync + + 'static, + >, + >, + contents_first: bool, + same_file_system: bool, +} + +impl fmt::Debug for WalkDirOptions { + fn fmt( + &self, + f: &mut fmt::Formatter<'_>, + ) -> result::Result<(), fmt::Error> { + let sorter_str = if self.sorter.is_some() { + // FnMut isn't `Debug` + "Some(...)" + } else { + "None" + }; + f.debug_struct("WalkDirOptions") + .field("follow_links", &self.follow_links) + .field("max_open", &self.max_open) + .field("min_depth", &self.min_depth) + .field("max_depth", &self.max_depth) + .field("sorter", &sorter_str) + .field("contents_first", &self.contents_first) + .field("same_file_system", &self.same_file_system) + .finish() + } +} + +impl WalkDir { + /// Create a builder for a recursive directory iterator starting at the + /// file path `root`. If `root` is a directory, then it is the first item + /// yielded by the iterator. If `root` is a file, then it is the first + /// and only item yielded by the iterator. If `root` is a symlink, then it + /// is always followed for the purposes of directory traversal. (A root + /// `DirEntry` still obeys its documentation with respect to symlinks and + /// the `follow_links` setting.) + pub fn new>(root: P) -> Self { + WalkDir { + opts: WalkDirOptions { + follow_links: false, + max_open: 10, + min_depth: 0, + max_depth: ::std::usize::MAX, + sorter: None, + contents_first: false, + same_file_system: false, + }, + root: root.as_ref().to_path_buf(), + } + } + + /// Set the minimum depth of entries yielded by the iterator. + /// + /// The smallest depth is `0` and always corresponds to the path given + /// to the `new` function on this type. Its direct descendents have depth + /// `1`, and their descendents have depth `2`, and so on. + pub fn min_depth(mut self, depth: usize) -> Self { + self.opts.min_depth = depth; + if self.opts.min_depth > self.opts.max_depth { + self.opts.min_depth = self.opts.max_depth; + } + self + } + + /// Set the maximum depth of entries yield by the iterator. + /// + /// The smallest depth is `0` and always corresponds to the path given + /// to the `new` function on this type. Its direct descendents have depth + /// `1`, and their descendents have depth `2`, and so on. + /// + /// Note that this will not simply filter the entries of the iterator, but + /// it will actually avoid descending into directories when the depth is + /// exceeded. + pub fn max_depth(mut self, depth: usize) -> Self { + self.opts.max_depth = depth; + if self.opts.max_depth < self.opts.min_depth { + self.opts.max_depth = self.opts.min_depth; + } + self + } + + /// Follow symbolic links. By default, this is disabled. + /// + /// When `yes` is `true`, symbolic links are followed as if they were + /// normal directories and files. If a symbolic link is broken or is + /// involved in a loop, an error is yielded. + /// + /// When enabled, the yielded [`DirEntry`] values represent the target of + /// the link while the path corresponds to the link. See the [`DirEntry`] + /// type for more details. + /// + /// [`DirEntry`]: struct.DirEntry.html + pub fn follow_links(mut self, yes: bool) -> Self { + self.opts.follow_links = yes; + self + } + + /// Set the maximum number of simultaneously open file descriptors used + /// by the iterator. + /// + /// `n` must be greater than or equal to `1`. If `n` is `0`, then it is set + /// to `1` automatically. If this is not set, then it defaults to some + /// reasonably low number. + /// + /// This setting has no impact on the results yielded by the iterator + /// (even when `n` is `1`). Instead, this setting represents a trade off + /// between scarce resources (file descriptors) and memory. Namely, when + /// the maximum number of file descriptors is reached and a new directory + /// needs to be opened to continue iteration, then a previous directory + /// handle is closed and has its unyielded entries stored in memory. In + /// practice, this is a satisfying trade off because it scales with respect + /// to the *depth* of your file tree. Therefore, low values (even `1`) are + /// acceptable. + /// + /// Note that this value does not impact the number of system calls made by + /// an exhausted iterator. + /// + /// # Platform behavior + /// + /// On Windows, if `follow_links` is enabled, then this limit is not + /// respected. In particular, the maximum number of file descriptors opened + /// is proportional to the depth of the directory tree traversed. + pub fn max_open(mut self, mut n: usize) -> Self { + if n == 0 { + n = 1; + } + self.opts.max_open = n; + self + } + + /// Set a function for sorting directory entries with a comparator + /// function. + /// + /// If a compare function is set, the resulting iterator will return all + /// paths in sorted order. The compare function will be called to compare + /// entries from the same directory. + /// + /// ```rust,no_run + /// use std::cmp; + /// use std::ffi::OsString; + /// use walkdir::WalkDir; + /// + /// WalkDir::new("foo").sort_by(|a,b| a.file_name().cmp(b.file_name())); + /// ``` + pub fn sort_by(mut self, cmp: F) -> Self + where + F: FnMut(&DirEntry, &DirEntry) -> Ordering + Send + Sync + 'static, + { + self.opts.sorter = Some(Box::new(cmp)); + self + } + + /// Set a function for sorting directory entries with a key extraction + /// function. + /// + /// If a compare function is set, the resulting iterator will return all + /// paths in sorted order. The compare function will be called to compare + /// entries from the same directory. + /// + /// ```rust,no_run + /// use std::cmp; + /// use std::ffi::OsString; + /// use walkdir::WalkDir; + /// + /// WalkDir::new("foo").sort_by_key(|a| a.file_name().to_owned()); + /// ``` + pub fn sort_by_key(self, mut cmp: F) -> Self + where + F: FnMut(&DirEntry) -> K + Send + Sync + 'static, + K: Ord, + { + self.sort_by(move |a, b| cmp(a).cmp(&cmp(b))) + } + + /// Sort directory entries by file name, to ensure a deterministic order. + /// + /// This is a convenience function for calling `Self::sort_by()`. + /// + /// ```rust,no_run + /// use walkdir::WalkDir; + /// + /// WalkDir::new("foo").sort_by_file_name(); + /// ``` + pub fn sort_by_file_name(self) -> Self { + self.sort_by(|a, b| a.file_name().cmp(b.file_name())) + } + + /// Yield a directory's contents before the directory itself. By default, + /// this is disabled. + /// + /// When `yes` is `false` (as is the default), the directory is yielded + /// before its contents are read. This is useful when, e.g. you want to + /// skip processing of some directories. + /// + /// When `yes` is `true`, the iterator yields the contents of a directory + /// before yielding the directory itself. This is useful when, e.g. you + /// want to recursively delete a directory. + /// + /// # Example + /// + /// Assume the following directory tree: + /// + /// ```text + /// foo/ + /// abc/ + /// qrs + /// tuv + /// def/ + /// ``` + /// + /// With contents_first disabled (the default), the following code visits + /// the directory tree in depth-first order: + /// + /// ```no_run + /// use walkdir::WalkDir; + /// + /// for entry in WalkDir::new("foo") { + /// let entry = entry.unwrap(); + /// println!("{}", entry.path().display()); + /// } + /// + /// // foo + /// // foo/abc + /// // foo/abc/qrs + /// // foo/abc/tuv + /// // foo/def + /// ``` + /// + /// With contents_first enabled: + /// + /// ```no_run + /// use walkdir::WalkDir; + /// + /// for entry in WalkDir::new("foo").contents_first(true) { + /// let entry = entry.unwrap(); + /// println!("{}", entry.path().display()); + /// } + /// + /// // foo/abc/qrs + /// // foo/abc/tuv + /// // foo/abc + /// // foo/def + /// // foo + /// ``` + pub fn contents_first(mut self, yes: bool) -> Self { + self.opts.contents_first = yes; + self + } + + /// Do not cross file system boundaries. + /// + /// When this option is enabled, directory traversal will not descend into + /// directories that are on a different file system from the root path. + /// + /// Currently, this option is only supported on Unix and Windows. If this + /// option is used on an unsupported platform, then directory traversal + /// will immediately return an error and will not yield any entries. + pub fn same_file_system(mut self, yes: bool) -> Self { + self.opts.same_file_system = yes; + self + } +} + +impl IntoIterator for WalkDir { + type Item = Result; + type IntoIter = IntoIter; + + fn into_iter(self) -> IntoIter { + IntoIter { + opts: self.opts, + start: Some(self.root), + stack_list: vec![], + stack_path: vec![], + oldest_opened: 0, + depth: 0, + deferred_dirs: vec![], + root_device: None, + } + } +} + +/// An iterator for recursively descending into a directory. +/// +/// A value with this type must be constructed with the [`WalkDir`] type, which +/// uses a builder pattern to set options such as min/max depth, max open file +/// descriptors and whether the iterator should follow symbolic links. After +/// constructing a `WalkDir`, call [`.into_iter()`] at the end of the chain. +/// +/// The order of elements yielded by this iterator is unspecified. +/// +/// [`WalkDir`]: struct.WalkDir.html +/// [`.into_iter()`]: struct.WalkDir.html#into_iter.v +#[derive(Debug)] +pub struct IntoIter { + /// Options specified in the builder. Depths, max fds, etc. + opts: WalkDirOptions, + /// The start path. + /// + /// This is only `Some(...)` at the beginning. After the first iteration, + /// this is always `None`. + start: Option, + /// A stack of open (up to max fd) or closed handles to directories. + /// An open handle is a plain [`fs::ReadDir`] while a closed handle is + /// a `Vec` corresponding to the as-of-yet consumed entries. + /// + /// [`fs::ReadDir`]: https://doc.rust-lang.org/stable/std/fs/struct.ReadDir.html + stack_list: Vec, + /// A stack of file paths. + /// + /// This is *only* used when [`follow_links`] is enabled. In all other + /// cases this stack is empty. + /// + /// [`follow_links`]: struct.WalkDir.html#method.follow_links + stack_path: Vec, + /// An index into `stack_list` that points to the oldest open directory + /// handle. If the maximum fd limit is reached and a new directory needs to + /// be read, the handle at this index is closed before the new directory is + /// opened. + oldest_opened: usize, + /// The current depth of iteration (the length of the stack at the + /// beginning of each iteration). + depth: usize, + /// A list of DirEntries corresponding to directories, that are + /// yielded after their contents has been fully yielded. This is only + /// used when `contents_first` is enabled. + deferred_dirs: Vec, + /// The device of the root file path when the first call to `next` was + /// made. + /// + /// If the `same_file_system` option isn't enabled, then this is always + /// `None`. Conversely, if it is enabled, this is always `Some(...)` after + /// handling the root path. + root_device: Option, +} + +/// An ancestor is an item in the directory tree traversed by walkdir, and is +/// used to check for loops in the tree when traversing symlinks. +#[derive(Debug)] +struct Ancestor { + /// The path of this ancestor. + path: PathBuf, + /// An open file to this ancesor. This is only used on Windows where + /// opening a file handle appears to be quite expensive, so we choose to + /// cache it. This comes at the cost of not respecting the file descriptor + /// limit set by the user. + #[cfg(windows)] + handle: Handle, +} + +impl Ancestor { + /// Create a new ancestor from the given directory path. + #[cfg(windows)] + fn new(dent: &DirEntry) -> io::Result { + let handle = Handle::from_path(dent.path())?; + Ok(Ancestor { path: dent.path().to_path_buf(), handle: handle }) + } + + /// Create a new ancestor from the given directory path. + #[cfg(not(windows))] + fn new(dent: &DirEntry) -> io::Result { + Ok(Ancestor { path: dent.path().to_path_buf() }) + } + + /// Returns true if and only if the given open file handle corresponds to + /// the same directory as this ancestor. + #[cfg(windows)] + fn is_same(&self, child: &Handle) -> io::Result { + Ok(child == &self.handle) + } + + /// Returns true if and only if the given open file handle corresponds to + /// the same directory as this ancestor. + #[cfg(not(windows))] + fn is_same(&self, child: &Handle) -> io::Result { + Ok(child == &Handle::from_path(&self.path)?) + } +} + +/// A sequence of unconsumed directory entries. +/// +/// This represents the opened or closed state of a directory handle. When +/// open, future entries are read by iterating over the raw `fs::ReadDir`. +/// When closed, all future entries are read into memory. Iteration then +/// proceeds over a [`Vec`]. +/// +/// [`fs::ReadDir`]: https://doc.rust-lang.org/stable/std/fs/struct.ReadDir.html +/// [`Vec`]: https://doc.rust-lang.org/stable/std/vec/struct.Vec.html +#[derive(Debug)] +enum DirList { + /// An opened handle. + /// + /// This includes the depth of the handle itself. + /// + /// If there was an error with the initial [`fs::read_dir`] call, then it + /// is stored here. (We use an [`Option<...>`] to make yielding the error + /// exactly once simpler.) + /// + /// [`fs::read_dir`]: https://doc.rust-lang.org/stable/std/fs/fn.read_dir.html + /// [`Option<...>`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html + Opened { depth: usize, it: result::Result> }, + /// A closed handle. + /// + /// All remaining directory entries are read into memory. + Closed(vec::IntoIter>), +} + +impl Iterator for IntoIter { + type Item = Result; + /// Advances the iterator and returns the next value. + /// + /// # Errors + /// + /// If the iterator fails to retrieve the next value, this method returns + /// an error value. The error will be wrapped in an Option::Some. + fn next(&mut self) -> Option> { + if let Some(start) = self.start.take() { + if self.opts.same_file_system { + let result = util::device_num(&start) + .map_err(|e| Error::from_path(0, start.clone(), e)); + self.root_device = Some(itry!(result)); + } + let dent = itry!(DirEntry::from_path(0, start, false)); + if let Some(result) = self.handle_entry(dent) { + return Some(result); + } + } + while !self.stack_list.is_empty() { + self.depth = self.stack_list.len(); + if let Some(dentry) = self.get_deferred_dir() { + return Some(Ok(dentry)); + } + if self.depth > self.opts.max_depth { + // If we've exceeded the max depth, pop the current dir + // so that we don't descend. + self.pop(); + continue; + } + // Unwrap is safe here because we've verified above that + // `self.stack_list` is not empty + let next = self + .stack_list + .last_mut() + .expect("BUG: stack should be non-empty") + .next(); + match next { + None => self.pop(), + Some(Err(err)) => return Some(Err(err)), + Some(Ok(dent)) => { + if let Some(result) = self.handle_entry(dent) { + return Some(result); + } + } + } + } + if self.opts.contents_first { + self.depth = self.stack_list.len(); + if let Some(dentry) = self.get_deferred_dir() { + return Some(Ok(dentry)); + } + } + None + } +} + +impl IntoIter { + /// Skips the current directory. + /// + /// This causes the iterator to stop traversing the contents of the least + /// recently yielded directory. This means any remaining entries in that + /// directory will be skipped (including sub-directories). + /// + /// Note that the ergonomics of this method are questionable since it + /// borrows the iterator mutably. Namely, you must write out the looping + /// condition manually. For example, to skip hidden entries efficiently on + /// unix systems: + /// + /// ```no_run + /// use walkdir::{DirEntry, WalkDir}; + /// + /// fn is_hidden(entry: &DirEntry) -> bool { + /// entry.file_name() + /// .to_str() + /// .map(|s| s.starts_with(".")) + /// .unwrap_or(false) + /// } + /// + /// let mut it = WalkDir::new("foo").into_iter(); + /// loop { + /// let entry = match it.next() { + /// None => break, + /// Some(Err(err)) => panic!("ERROR: {}", err), + /// Some(Ok(entry)) => entry, + /// }; + /// if is_hidden(&entry) { + /// if entry.file_type().is_dir() { + /// it.skip_current_dir(); + /// } + /// continue; + /// } + /// println!("{}", entry.path().display()); + /// } + /// ``` + /// + /// You may find it more convenient to use the [`filter_entry`] iterator + /// adapter. (See its documentation for the same example functionality as + /// above.) + /// + /// [`filter_entry`]: #method.filter_entry + pub fn skip_current_dir(&mut self) { + if !self.stack_list.is_empty() { + self.pop(); + } + } + + /// Yields only entries which satisfy the given predicate and skips + /// descending into directories that do not satisfy the given predicate. + /// + /// The predicate is applied to all entries. If the predicate is + /// true, iteration carries on as normal. If the predicate is false, the + /// entry is ignored and if it is a directory, it is not descended into. + /// + /// This is often more convenient to use than [`skip_current_dir`]. For + /// example, to skip hidden files and directories efficiently on unix + /// systems: + /// + /// ```no_run + /// use walkdir::{DirEntry, WalkDir}; + /// # use walkdir::Error; + /// + /// fn is_hidden(entry: &DirEntry) -> bool { + /// entry.file_name() + /// .to_str() + /// .map(|s| s.starts_with(".")) + /// .unwrap_or(false) + /// } + /// + /// # fn try_main() -> Result<(), Error> { + /// for entry in WalkDir::new("foo") + /// .into_iter() + /// .filter_entry(|e| !is_hidden(e)) { + /// println!("{}", entry?.path().display()); + /// } + /// # Ok(()) + /// # } + /// ``` + /// + /// Note that the iterator will still yield errors for reading entries that + /// may not satisfy the predicate. + /// + /// Note that entries skipped with [`min_depth`] and [`max_depth`] are not + /// passed to this predicate. + /// + /// Note that if the iterator has `contents_first` enabled, then this + /// method is no different than calling the standard `Iterator::filter` + /// method (because directory entries are yielded after they've been + /// descended into). + /// + /// [`skip_current_dir`]: #method.skip_current_dir + /// [`min_depth`]: struct.WalkDir.html#method.min_depth + /// [`max_depth`]: struct.WalkDir.html#method.max_depth + pub fn filter_entry

(self, predicate: P) -> FilterEntry + where + P: FnMut(&DirEntry) -> bool, + { + FilterEntry { it: self, predicate: predicate } + } + + fn handle_entry( + &mut self, + mut dent: DirEntry, + ) -> Option> { + if self.opts.follow_links && dent.file_type().is_symlink() { + dent = itry!(self.follow(dent)); + } + let is_normal_dir = !dent.file_type().is_symlink() && dent.is_dir(); + if is_normal_dir { + if self.opts.same_file_system && dent.depth() > 0 { + if itry!(self.is_same_file_system(&dent)) { + itry!(self.push(&dent)); + } + } else { + itry!(self.push(&dent)); + } + } else if dent.depth() == 0 && dent.file_type().is_symlink() { + // As a special case, if we are processing a root entry, then we + // always follow it even if it's a symlink and follow_links is + // false. We are careful to not let this change the semantics of + // the DirEntry however. Namely, the DirEntry should still respect + // the follow_links setting. When it's disabled, it should report + // itself as a symlink. When it's enabled, it should always report + // itself as the target. + let md = itry!(fs::metadata(dent.path()).map_err(|err| { + Error::from_path(dent.depth(), dent.path().to_path_buf(), err) + })); + if md.file_type().is_dir() { + itry!(self.push(&dent)); + } + } + if is_normal_dir && self.opts.contents_first { + self.deferred_dirs.push(dent); + None + } else if self.skippable() { + None + } else { + Some(Ok(dent)) + } + } + + fn get_deferred_dir(&mut self) -> Option { + if self.opts.contents_first { + if self.depth < self.deferred_dirs.len() { + // Unwrap is safe here because we've guaranteed that + // `self.deferred_dirs.len()` can never be less than 1 + let deferred: DirEntry = self + .deferred_dirs + .pop() + .expect("BUG: deferred_dirs should be non-empty"); + if !self.skippable() { + return Some(deferred); + } + } + } + None + } + + fn push(&mut self, dent: &DirEntry) -> Result<()> { + // Make room for another open file descriptor if we've hit the max. + let free = + self.stack_list.len().checked_sub(self.oldest_opened).unwrap(); + if free == self.opts.max_open { + self.stack_list[self.oldest_opened].close(); + } + // Open a handle to reading the directory's entries. + let rd = fs::read_dir(dent.path()).map_err(|err| { + Some(Error::from_path(self.depth, dent.path().to_path_buf(), err)) + }); + let mut list = DirList::Opened { depth: self.depth, it: rd }; + if let Some(ref mut cmp) = self.opts.sorter { + let mut entries: Vec<_> = list.collect(); + entries.sort_by(|a, b| match (a, b) { + (&Ok(ref a), &Ok(ref b)) => cmp(a, b), + (&Err(_), &Err(_)) => Ordering::Equal, + (&Ok(_), &Err(_)) => Ordering::Greater, + (&Err(_), &Ok(_)) => Ordering::Less, + }); + list = DirList::Closed(entries.into_iter()); + } + if self.opts.follow_links { + let ancestor = Ancestor::new(&dent) + .map_err(|err| Error::from_io(self.depth, err))?; + self.stack_path.push(ancestor); + } + // We push this after stack_path since creating the Ancestor can fail. + // If it fails, then we return the error and won't descend. + self.stack_list.push(list); + // If we had to close out a previous directory stream, then we need to + // increment our index the oldest still-open stream. We do this only + // after adding to our stack, in order to ensure that the oldest_opened + // index remains valid. The worst that can happen is that an already + // closed stream will be closed again, which is a no-op. + // + // We could move the close of the stream above into this if-body, but + // then we would have more than the maximum number of file descriptors + // open at a particular point in time. + if free == self.opts.max_open { + // Unwrap is safe here because self.oldest_opened is guaranteed to + // never be greater than `self.stack_list.len()`, which implies + // that the subtraction won't underflow and that adding 1 will + // never overflow. + self.oldest_opened = self.oldest_opened.checked_add(1).unwrap(); + } + Ok(()) + } + + fn pop(&mut self) { + self.stack_list.pop().expect("BUG: cannot pop from empty stack"); + if self.opts.follow_links { + self.stack_path.pop().expect("BUG: list/path stacks out of sync"); + } + // If everything in the stack is already closed, then there is + // room for at least one more open descriptor and it will + // always be at the top of the stack. + self.oldest_opened = min(self.oldest_opened, self.stack_list.len()); + } + + fn follow(&self, mut dent: DirEntry) -> Result { + dent = + DirEntry::from_path(self.depth, dent.path().to_path_buf(), true)?; + // The only way a symlink can cause a loop is if it points + // to a directory. Otherwise, it always points to a leaf + // and we can omit any loop checks. + if dent.is_dir() { + self.check_loop(dent.path())?; + } + Ok(dent) + } + + fn check_loop>(&self, child: P) -> Result<()> { + let hchild = Handle::from_path(&child) + .map_err(|err| Error::from_io(self.depth, err))?; + for ancestor in self.stack_path.iter().rev() { + let is_same = ancestor + .is_same(&hchild) + .map_err(|err| Error::from_io(self.depth, err))?; + if is_same { + return Err(Error::from_loop( + self.depth, + &ancestor.path, + child.as_ref(), + )); + } + } + Ok(()) + } + + fn is_same_file_system(&mut self, dent: &DirEntry) -> Result { + let dent_device = util::device_num(dent.path()) + .map_err(|err| Error::from_entry(dent, err))?; + Ok(self + .root_device + .map(|d| d == dent_device) + .expect("BUG: called is_same_file_system without root device")) + } + + fn skippable(&self) -> bool { + self.depth < self.opts.min_depth || self.depth > self.opts.max_depth + } +} + +impl DirList { + fn close(&mut self) { + if let DirList::Opened { .. } = *self { + *self = DirList::Closed(self.collect::>().into_iter()); + } + } +} + +impl Iterator for DirList { + type Item = Result; + + #[inline(always)] + fn next(&mut self) -> Option> { + match *self { + DirList::Closed(ref mut it) => it.next(), + DirList::Opened { depth, ref mut it } => match *it { + Err(ref mut err) => err.take().map(Err), + Ok(ref mut rd) => rd.next().map(|r| match r { + Ok(r) => DirEntry::from_entry(depth + 1, &r), + Err(err) => Err(Error::from_io(depth + 1, err)), + }), + }, + } + } +} + +/// A recursive directory iterator that skips entries. +/// +/// Values of this type are created by calling [`.filter_entry()`] on an +/// `IntoIter`, which is formed by calling [`.into_iter()`] on a `WalkDir`. +/// +/// Directories that fail the predicate `P` are skipped. Namely, they are +/// never yielded and never descended into. +/// +/// Entries that are skipped with the [`min_depth`] and [`max_depth`] options +/// are not passed through this filter. +/// +/// If opening a handle to a directory resulted in an error, then it is yielded +/// and no corresponding call to the predicate is made. +/// +/// Type parameter `I` refers to the underlying iterator and `P` refers to the +/// predicate, which is usually `FnMut(&DirEntry) -> bool`. +/// +/// [`.filter_entry()`]: struct.IntoIter.html#method.filter_entry +/// [`.into_iter()`]: struct.WalkDir.html#into_iter.v +/// [`min_depth`]: struct.WalkDir.html#method.min_depth +/// [`max_depth`]: struct.WalkDir.html#method.max_depth +#[derive(Debug)] +pub struct FilterEntry { + it: I, + predicate: P, +} + +impl

Iterator for FilterEntry +where + P: FnMut(&DirEntry) -> bool, +{ + type Item = Result; + + /// Advances the iterator and returns the next value. + /// + /// # Errors + /// + /// If the iterator fails to retrieve the next value, this method returns + /// an error value. The error will be wrapped in an `Option::Some`. + fn next(&mut self) -> Option> { + loop { + let dent = match self.it.next() { + None => return None, + Some(result) => itry!(result), + }; + if !(self.predicate)(&dent) { + if dent.is_dir() { + self.it.skip_current_dir(); + } + continue; + } + return Some(Ok(dent)); + } + } +} + +impl

FilterEntry +where + P: FnMut(&DirEntry) -> bool, +{ + /// Yields only entries which satisfy the given predicate and skips + /// descending into directories that do not satisfy the given predicate. + /// + /// The predicate is applied to all entries. If the predicate is + /// true, iteration carries on as normal. If the predicate is false, the + /// entry is ignored and if it is a directory, it is not descended into. + /// + /// This is often more convenient to use than [`skip_current_dir`]. For + /// example, to skip hidden files and directories efficiently on unix + /// systems: + /// + /// ```no_run + /// use walkdir::{DirEntry, WalkDir}; + /// # use walkdir::Error; + /// + /// fn is_hidden(entry: &DirEntry) -> bool { + /// entry.file_name() + /// .to_str() + /// .map(|s| s.starts_with(".")) + /// .unwrap_or(false) + /// } + /// + /// # fn try_main() -> Result<(), Error> { + /// for entry in WalkDir::new("foo") + /// .into_iter() + /// .filter_entry(|e| !is_hidden(e)) { + /// println!("{}", entry?.path().display()); + /// } + /// # Ok(()) + /// # } + /// ``` + /// + /// Note that the iterator will still yield errors for reading entries that + /// may not satisfy the predicate. + /// + /// Note that entries skipped with [`min_depth`] and [`max_depth`] are not + /// passed to this predicate. + /// + /// Note that if the iterator has `contents_first` enabled, then this + /// method is no different than calling the standard `Iterator::filter` + /// method (because directory entries are yielded after they've been + /// descended into). + /// + /// [`skip_current_dir`]: #method.skip_current_dir + /// [`min_depth`]: struct.WalkDir.html#method.min_depth + /// [`max_depth`]: struct.WalkDir.html#method.max_depth + pub fn filter_entry(self, predicate: P) -> FilterEntry { + FilterEntry { it: self, predicate: predicate } + } + + /// Skips the current directory. + /// + /// This causes the iterator to stop traversing the contents of the least + /// recently yielded directory. This means any remaining entries in that + /// directory will be skipped (including sub-directories). + /// + /// Note that the ergonomics of this method are questionable since it + /// borrows the iterator mutably. Namely, you must write out the looping + /// condition manually. For example, to skip hidden entries efficiently on + /// unix systems: + /// + /// ```no_run + /// use walkdir::{DirEntry, WalkDir}; + /// + /// fn is_hidden(entry: &DirEntry) -> bool { + /// entry.file_name() + /// .to_str() + /// .map(|s| s.starts_with(".")) + /// .unwrap_or(false) + /// } + /// + /// let mut it = WalkDir::new("foo").into_iter(); + /// loop { + /// let entry = match it.next() { + /// None => break, + /// Some(Err(err)) => panic!("ERROR: {}", err), + /// Some(Ok(entry)) => entry, + /// }; + /// if is_hidden(&entry) { + /// if entry.file_type().is_dir() { + /// it.skip_current_dir(); + /// } + /// continue; + /// } + /// println!("{}", entry.path().display()); + /// } + /// ``` + /// + /// You may find it more convenient to use the [`filter_entry`] iterator + /// adapter. (See its documentation for the same example functionality as + /// above.) + /// + /// [`filter_entry`]: #method.filter_entry + pub fn skip_current_dir(&mut self) { + self.it.skip_current_dir(); + } +} diff --git a/third_party/rust/walkdir/src/tests/mod.rs b/third_party/rust/walkdir/src/tests/mod.rs new file mode 100644 index 0000000000..ebf952dfc5 --- /dev/null +++ b/third_party/rust/walkdir/src/tests/mod.rs @@ -0,0 +1,4 @@ +#[macro_use] +mod util; + +mod recursive; diff --git a/third_party/rust/walkdir/src/tests/recursive.rs b/third_party/rust/walkdir/src/tests/recursive.rs new file mode 100644 index 0000000000..4119f461d3 --- /dev/null +++ b/third_party/rust/walkdir/src/tests/recursive.rs @@ -0,0 +1,1023 @@ +use std::fs; +use std::path::PathBuf; + +use crate::tests::util::Dir; +use crate::WalkDir; + +#[test] +fn send_sync_traits() { + use crate::{FilterEntry, IntoIter}; + + fn assert_send() {} + fn assert_sync() {} + + assert_send::(); + assert_sync::(); + assert_send::(); + assert_sync::(); + assert_send::>(); + assert_sync::>(); +} + +#[test] +fn empty() { + let dir = Dir::tmp(); + let wd = WalkDir::new(dir.path()); + let r = dir.run_recursive(wd); + r.assert_no_errors(); + + assert_eq!(1, r.ents().len()); + let ent = &r.ents()[0]; + assert!(ent.file_type().is_dir()); + assert!(!ent.path_is_symlink()); + assert_eq!(0, ent.depth()); + assert_eq!(dir.path(), ent.path()); + assert_eq!(dir.path().file_name().unwrap(), ent.file_name()); +} + +#[test] +fn empty_follow() { + let dir = Dir::tmp(); + let wd = WalkDir::new(dir.path()).follow_links(true); + let r = dir.run_recursive(wd); + r.assert_no_errors(); + + assert_eq!(1, r.ents().len()); + let ent = &r.ents()[0]; + assert!(ent.file_type().is_dir()); + assert!(!ent.path_is_symlink()); + assert_eq!(0, ent.depth()); + assert_eq!(dir.path(), ent.path()); + assert_eq!(dir.path().file_name().unwrap(), ent.file_name()); +} + +#[test] +fn empty_file() { + let dir = Dir::tmp(); + dir.touch("a"); + + let wd = WalkDir::new(dir.path().join("a")); + let r = dir.run_recursive(wd); + r.assert_no_errors(); + + assert_eq!(1, r.ents().len()); + let ent = &r.ents()[0]; + assert!(ent.file_type().is_file()); + assert!(!ent.path_is_symlink()); + assert_eq!(0, ent.depth()); + assert_eq!(dir.join("a"), ent.path()); + assert_eq!("a", ent.file_name()); +} + +#[test] +fn empty_file_follow() { + let dir = Dir::tmp(); + dir.touch("a"); + + let wd = WalkDir::new(dir.path().join("a")).follow_links(true); + let r = dir.run_recursive(wd); + r.assert_no_errors(); + + assert_eq!(1, r.ents().len()); + let ent = &r.ents()[0]; + assert!(ent.file_type().is_file()); + assert!(!ent.path_is_symlink()); + assert_eq!(0, ent.depth()); + assert_eq!(dir.join("a"), ent.path()); + assert_eq!("a", ent.file_name()); +} + +#[test] +fn one_dir() { + let dir = Dir::tmp(); + dir.mkdirp("a"); + + let wd = WalkDir::new(dir.path()); + let r = dir.run_recursive(wd); + r.assert_no_errors(); + + let ents = r.ents(); + assert_eq!(2, ents.len()); + let ent = &ents[1]; + assert_eq!(dir.join("a"), ent.path()); + assert_eq!(1, ent.depth()); + assert_eq!("a", ent.file_name()); + assert!(ent.file_type().is_dir()); +} + +#[test] +fn one_file() { + let dir = Dir::tmp(); + dir.touch("a"); + + let wd = WalkDir::new(dir.path()); + let r = dir.run_recursive(wd); + r.assert_no_errors(); + + let ents = r.ents(); + assert_eq!(2, ents.len()); + let ent = &ents[1]; + assert_eq!(dir.join("a"), ent.path()); + assert_eq!(1, ent.depth()); + assert_eq!("a", ent.file_name()); + assert!(ent.file_type().is_file()); +} + +#[test] +fn one_dir_one_file() { + let dir = Dir::tmp(); + dir.mkdirp("foo"); + dir.touch("foo/a"); + + let wd = WalkDir::new(dir.path()); + let r = dir.run_recursive(wd); + r.assert_no_errors(); + + let expected = vec![ + dir.path().to_path_buf(), + dir.join("foo"), + dir.join("foo").join("a"), + ]; + assert_eq!(expected, r.sorted_paths()); +} + +#[test] +fn many_files() { + let dir = Dir::tmp(); + dir.mkdirp("foo"); + dir.touch_all(&["foo/a", "foo/b", "foo/c"]); + + let wd = WalkDir::new(dir.path()); + let r = dir.run_recursive(wd); + r.assert_no_errors(); + + let expected = vec![ + dir.path().to_path_buf(), + dir.join("foo"), + dir.join("foo").join("a"), + dir.join("foo").join("b"), + dir.join("foo").join("c"), + ]; + assert_eq!(expected, r.sorted_paths()); +} + +#[test] +fn many_dirs() { + let dir = Dir::tmp(); + dir.mkdirp("foo/a"); + dir.mkdirp("foo/b"); + dir.mkdirp("foo/c"); + + let wd = WalkDir::new(dir.path()); + let r = dir.run_recursive(wd); + r.assert_no_errors(); + + let expected = vec![ + dir.path().to_path_buf(), + dir.join("foo"), + dir.join("foo").join("a"), + dir.join("foo").join("b"), + dir.join("foo").join("c"), + ]; + assert_eq!(expected, r.sorted_paths()); +} + +#[test] +fn many_mixed() { + let dir = Dir::tmp(); + dir.mkdirp("foo/a"); + dir.mkdirp("foo/c"); + dir.mkdirp("foo/e"); + dir.touch_all(&["foo/b", "foo/d", "foo/f"]); + + let wd = WalkDir::new(dir.path()); + let r = dir.run_recursive(wd); + r.assert_no_errors(); + + let expected = vec![ + dir.path().to_path_buf(), + dir.join("foo"), + dir.join("foo").join("a"), + dir.join("foo").join("b"), + dir.join("foo").join("c"), + dir.join("foo").join("d"), + dir.join("foo").join("e"), + dir.join("foo").join("f"), + ]; + assert_eq!(expected, r.sorted_paths()); +} + +#[test] +fn nested() { + let nested = + PathBuf::from("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z"); + let dir = Dir::tmp(); + dir.mkdirp(&nested); + dir.touch(nested.join("A")); + + let wd = WalkDir::new(dir.path()); + let r = dir.run_recursive(wd); + r.assert_no_errors(); + + let expected = vec![ + dir.path().to_path_buf(), + dir.join("a"), + dir.join("a/b"), + dir.join("a/b/c"), + dir.join("a/b/c/d"), + dir.join("a/b/c/d/e"), + dir.join("a/b/c/d/e/f"), + dir.join("a/b/c/d/e/f/g"), + dir.join("a/b/c/d/e/f/g/h"), + dir.join("a/b/c/d/e/f/g/h/i"), + dir.join("a/b/c/d/e/f/g/h/i/j"), + dir.join("a/b/c/d/e/f/g/h/i/j/k"), + dir.join("a/b/c/d/e/f/g/h/i/j/k/l"), + dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m"), + dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n"), + dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o"), + dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p"), + dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q"), + dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r"), + dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s"), + dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t"), + dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u"), + dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v"), + dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w"), + dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x"), + dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y"), + dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z"), + dir.join(&nested).join("A"), + ]; + assert_eq!(expected, r.sorted_paths()); +} + +#[test] +fn nested_small_max_open() { + let nested = + PathBuf::from("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z"); + let dir = Dir::tmp(); + dir.mkdirp(&nested); + dir.touch(nested.join("A")); + + let wd = WalkDir::new(dir.path()).max_open(1); + let r = dir.run_recursive(wd); + r.assert_no_errors(); + + let expected = vec![ + dir.path().to_path_buf(), + dir.join("a"), + dir.join("a/b"), + dir.join("a/b/c"), + dir.join("a/b/c/d"), + dir.join("a/b/c/d/e"), + dir.join("a/b/c/d/e/f"), + dir.join("a/b/c/d/e/f/g"), + dir.join("a/b/c/d/e/f/g/h"), + dir.join("a/b/c/d/e/f/g/h/i"), + dir.join("a/b/c/d/e/f/g/h/i/j"), + dir.join("a/b/c/d/e/f/g/h/i/j/k"), + dir.join("a/b/c/d/e/f/g/h/i/j/k/l"), + dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m"), + dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n"), + dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o"), + dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p"), + dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q"), + dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r"), + dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s"), + dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t"), + dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u"), + dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v"), + dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w"), + dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x"), + dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y"), + dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z"), + dir.join(&nested).join("A"), + ]; + assert_eq!(expected, r.sorted_paths()); +} + +#[test] +fn siblings() { + let dir = Dir::tmp(); + dir.mkdirp("foo"); + dir.mkdirp("bar"); + dir.touch_all(&["foo/a", "foo/b"]); + dir.touch_all(&["bar/a", "bar/b"]); + + let wd = WalkDir::new(dir.path()); + let r = dir.run_recursive(wd); + r.assert_no_errors(); + + let expected = vec![ + dir.path().to_path_buf(), + dir.join("bar"), + dir.join("bar").join("a"), + dir.join("bar").join("b"), + dir.join("foo"), + dir.join("foo").join("a"), + dir.join("foo").join("b"), + ]; + assert_eq!(expected, r.sorted_paths()); +} + +#[test] +fn sym_root_file_nofollow() { + let dir = Dir::tmp(); + dir.touch("a"); + dir.symlink_file("a", "a-link"); + + let wd = WalkDir::new(dir.join("a-link")); + let r = dir.run_recursive(wd); + r.assert_no_errors(); + + let ents = r.sorted_ents(); + assert_eq!(1, ents.len()); + let link = &ents[0]; + + assert_eq!(dir.join("a-link"), link.path()); + + assert!(link.path_is_symlink()); + + assert_eq!(dir.join("a"), fs::read_link(link.path()).unwrap()); + + assert_eq!(0, link.depth()); + + assert!(link.file_type().is_symlink()); + assert!(!link.file_type().is_file()); + assert!(!link.file_type().is_dir()); + + assert!(link.metadata().unwrap().file_type().is_symlink()); + assert!(!link.metadata().unwrap().is_file()); + assert!(!link.metadata().unwrap().is_dir()); +} + +#[test] +fn sym_root_file_follow() { + let dir = Dir::tmp(); + dir.touch("a"); + dir.symlink_file("a", "a-link"); + + let wd = WalkDir::new(dir.join("a-link")).follow_links(true); + let r = dir.run_recursive(wd); + r.assert_no_errors(); + + let ents = r.sorted_ents(); + let link = &ents[0]; + + assert_eq!(dir.join("a-link"), link.path()); + + assert!(link.path_is_symlink()); + + assert_eq!(dir.join("a"), fs::read_link(link.path()).unwrap()); + + assert_eq!(0, link.depth()); + + assert!(!link.file_type().is_symlink()); + assert!(link.file_type().is_file()); + assert!(!link.file_type().is_dir()); + + assert!(!link.metadata().unwrap().file_type().is_symlink()); + assert!(link.metadata().unwrap().is_file()); + assert!(!link.metadata().unwrap().is_dir()); +} + +#[test] +fn sym_root_dir_nofollow() { + let dir = Dir::tmp(); + dir.mkdirp("a"); + dir.symlink_dir("a", "a-link"); + dir.touch("a/zzz"); + + let wd = WalkDir::new(dir.join("a-link")); + let r = dir.run_recursive(wd); + r.assert_no_errors(); + + let ents = r.sorted_ents(); + assert_eq!(2, ents.len()); + let link = &ents[0]; + + assert_eq!(dir.join("a-link"), link.path()); + + assert!(link.path_is_symlink()); + + assert_eq!(dir.join("a"), fs::read_link(link.path()).unwrap()); + + assert_eq!(0, link.depth()); + + assert!(link.file_type().is_symlink()); + assert!(!link.file_type().is_file()); + assert!(!link.file_type().is_dir()); + + assert!(link.metadata().unwrap().file_type().is_symlink()); + assert!(!link.metadata().unwrap().is_file()); + assert!(!link.metadata().unwrap().is_dir()); + + let link_zzz = &ents[1]; + assert_eq!(dir.join("a-link").join("zzz"), link_zzz.path()); + assert!(!link_zzz.path_is_symlink()); +} + +#[test] +fn sym_root_dir_follow() { + let dir = Dir::tmp(); + dir.mkdirp("a"); + dir.symlink_dir("a", "a-link"); + dir.touch("a/zzz"); + + let wd = WalkDir::new(dir.join("a-link")).follow_links(true); + let r = dir.run_recursive(wd); + r.assert_no_errors(); + + let ents = r.sorted_ents(); + assert_eq!(2, ents.len()); + let link = &ents[0]; + + assert_eq!(dir.join("a-link"), link.path()); + + assert!(link.path_is_symlink()); + + assert_eq!(dir.join("a"), fs::read_link(link.path()).unwrap()); + + assert_eq!(0, link.depth()); + + assert!(!link.file_type().is_symlink()); + assert!(!link.file_type().is_file()); + assert!(link.file_type().is_dir()); + + assert!(!link.metadata().unwrap().file_type().is_symlink()); + assert!(!link.metadata().unwrap().is_file()); + assert!(link.metadata().unwrap().is_dir()); + + let link_zzz = &ents[1]; + assert_eq!(dir.join("a-link").join("zzz"), link_zzz.path()); + assert!(!link_zzz.path_is_symlink()); +} + +#[test] +fn sym_file_nofollow() { + let dir = Dir::tmp(); + dir.touch("a"); + dir.symlink_file("a", "a-link"); + + let wd = WalkDir::new(dir.path()); + let r = dir.run_recursive(wd); + r.assert_no_errors(); + + let ents = r.sorted_ents(); + assert_eq!(3, ents.len()); + let (src, link) = (&ents[1], &ents[2]); + + assert_eq!(dir.join("a"), src.path()); + assert_eq!(dir.join("a-link"), link.path()); + + assert!(!src.path_is_symlink()); + assert!(link.path_is_symlink()); + + assert_eq!(dir.join("a"), fs::read_link(link.path()).unwrap()); + + assert_eq!(1, src.depth()); + assert_eq!(1, link.depth()); + + assert!(src.file_type().is_file()); + assert!(link.file_type().is_symlink()); + assert!(!link.file_type().is_file()); + assert!(!link.file_type().is_dir()); + + assert!(src.metadata().unwrap().is_file()); + assert!(link.metadata().unwrap().file_type().is_symlink()); + assert!(!link.metadata().unwrap().is_file()); + assert!(!link.metadata().unwrap().is_dir()); +} + +#[test] +fn sym_file_follow() { + let dir = Dir::tmp(); + dir.touch("a"); + dir.symlink_file("a", "a-link"); + + let wd = WalkDir::new(dir.path()).follow_links(true); + let r = dir.run_recursive(wd); + r.assert_no_errors(); + + let ents = r.sorted_ents(); + assert_eq!(3, ents.len()); + let (src, link) = (&ents[1], &ents[2]); + + assert_eq!(dir.join("a"), src.path()); + assert_eq!(dir.join("a-link"), link.path()); + + assert!(!src.path_is_symlink()); + assert!(link.path_is_symlink()); + + assert_eq!(dir.join("a"), fs::read_link(link.path()).unwrap()); + + assert_eq!(1, src.depth()); + assert_eq!(1, link.depth()); + + assert!(src.file_type().is_file()); + assert!(!link.file_type().is_symlink()); + assert!(link.file_type().is_file()); + assert!(!link.file_type().is_dir()); + + assert!(src.metadata().unwrap().is_file()); + assert!(!link.metadata().unwrap().file_type().is_symlink()); + assert!(link.metadata().unwrap().is_file()); + assert!(!link.metadata().unwrap().is_dir()); +} + +#[test] +fn sym_dir_nofollow() { + let dir = Dir::tmp(); + dir.mkdirp("a"); + dir.symlink_dir("a", "a-link"); + dir.touch("a/zzz"); + + let wd = WalkDir::new(dir.path()); + let r = dir.run_recursive(wd); + r.assert_no_errors(); + + let ents = r.sorted_ents(); + assert_eq!(4, ents.len()); + let (src, link) = (&ents[1], &ents[3]); + + assert_eq!(dir.join("a"), src.path()); + assert_eq!(dir.join("a-link"), link.path()); + + assert!(!src.path_is_symlink()); + assert!(link.path_is_symlink()); + + assert_eq!(dir.join("a"), fs::read_link(link.path()).unwrap()); + + assert_eq!(1, src.depth()); + assert_eq!(1, link.depth()); + + assert!(src.file_type().is_dir()); + assert!(link.file_type().is_symlink()); + assert!(!link.file_type().is_file()); + assert!(!link.file_type().is_dir()); + + assert!(src.metadata().unwrap().is_dir()); + assert!(link.metadata().unwrap().file_type().is_symlink()); + assert!(!link.metadata().unwrap().is_file()); + assert!(!link.metadata().unwrap().is_dir()); +} + +#[test] +fn sym_dir_follow() { + let dir = Dir::tmp(); + dir.mkdirp("a"); + dir.symlink_dir("a", "a-link"); + dir.touch("a/zzz"); + + let wd = WalkDir::new(dir.path()).follow_links(true); + let r = dir.run_recursive(wd); + r.assert_no_errors(); + + let ents = r.sorted_ents(); + assert_eq!(5, ents.len()); + let (src, link) = (&ents[1], &ents[3]); + + assert_eq!(dir.join("a"), src.path()); + assert_eq!(dir.join("a-link"), link.path()); + + assert!(!src.path_is_symlink()); + assert!(link.path_is_symlink()); + + assert_eq!(dir.join("a"), fs::read_link(link.path()).unwrap()); + + assert_eq!(1, src.depth()); + assert_eq!(1, link.depth()); + + assert!(src.file_type().is_dir()); + assert!(!link.file_type().is_symlink()); + assert!(!link.file_type().is_file()); + assert!(link.file_type().is_dir()); + + assert!(src.metadata().unwrap().is_dir()); + assert!(!link.metadata().unwrap().file_type().is_symlink()); + assert!(!link.metadata().unwrap().is_file()); + assert!(link.metadata().unwrap().is_dir()); + + let (src_zzz, link_zzz) = (&ents[2], &ents[4]); + assert_eq!(dir.join("a").join("zzz"), src_zzz.path()); + assert_eq!(dir.join("a-link").join("zzz"), link_zzz.path()); + assert!(!src_zzz.path_is_symlink()); + assert!(!link_zzz.path_is_symlink()); +} + +#[test] +fn sym_noloop() { + let dir = Dir::tmp(); + dir.mkdirp("a/b/c"); + dir.symlink_dir("a", "a/b/c/a-link"); + + let wd = WalkDir::new(dir.path()); + let r = dir.run_recursive(wd); + // There's no loop if we aren't following symlinks. + r.assert_no_errors(); + + assert_eq!(5, r.ents().len()); +} + +#[test] +fn sym_loop_detect() { + let dir = Dir::tmp(); + dir.mkdirp("a/b/c"); + dir.symlink_dir("a", "a/b/c/a-link"); + + let wd = WalkDir::new(dir.path()).follow_links(true); + let r = dir.run_recursive(wd); + + let (ents, errs) = (r.sorted_ents(), r.errs()); + assert_eq!(4, ents.len()); + assert_eq!(1, errs.len()); + + let err = &errs[0]; + + let expected = dir.join("a/b/c/a-link"); + assert_eq!(Some(&*expected), err.path()); + + let expected = dir.join("a"); + assert_eq!(Some(&*expected), err.loop_ancestor()); + + assert_eq!(4, err.depth()); + assert!(err.io_error().is_none()); +} + +#[test] +fn sym_self_loop_no_error() { + let dir = Dir::tmp(); + dir.symlink_file("a", "a"); + + let wd = WalkDir::new(dir.path()); + let r = dir.run_recursive(wd); + // No errors occur because even though the symlink points to nowhere, it + // is never followed, and thus no error occurs. + r.assert_no_errors(); + assert_eq!(2, r.ents().len()); + + let ent = &r.ents()[1]; + assert_eq!(dir.join("a"), ent.path()); + assert!(ent.path_is_symlink()); + + assert!(ent.file_type().is_symlink()); + assert!(!ent.file_type().is_file()); + assert!(!ent.file_type().is_dir()); + + assert!(ent.metadata().unwrap().file_type().is_symlink()); + assert!(!ent.metadata().unwrap().file_type().is_file()); + assert!(!ent.metadata().unwrap().file_type().is_dir()); +} + +#[test] +fn sym_file_self_loop_io_error() { + let dir = Dir::tmp(); + dir.symlink_file("a", "a"); + + let wd = WalkDir::new(dir.path()).follow_links(true); + let r = dir.run_recursive(wd); + + let (ents, errs) = (r.sorted_ents(), r.errs()); + assert_eq!(1, ents.len()); + assert_eq!(1, errs.len()); + + let err = &errs[0]; + + let expected = dir.join("a"); + assert_eq!(Some(&*expected), err.path()); + assert_eq!(1, err.depth()); + assert!(err.loop_ancestor().is_none()); + assert!(err.io_error().is_some()); +} + +#[test] +fn sym_dir_self_loop_io_error() { + let dir = Dir::tmp(); + dir.symlink_dir("a", "a"); + + let wd = WalkDir::new(dir.path()).follow_links(true); + let r = dir.run_recursive(wd); + + let (ents, errs) = (r.sorted_ents(), r.errs()); + assert_eq!(1, ents.len()); + assert_eq!(1, errs.len()); + + let err = &errs[0]; + + let expected = dir.join("a"); + assert_eq!(Some(&*expected), err.path()); + assert_eq!(1, err.depth()); + assert!(err.loop_ancestor().is_none()); + assert!(err.io_error().is_some()); +} + +#[test] +fn min_depth_1() { + let dir = Dir::tmp(); + dir.mkdirp("a/b"); + + let wd = WalkDir::new(dir.path()).min_depth(1); + let r = dir.run_recursive(wd); + r.assert_no_errors(); + + let expected = vec![dir.join("a"), dir.join("a").join("b")]; + assert_eq!(expected, r.sorted_paths()); +} + +#[test] +fn min_depth_2() { + let dir = Dir::tmp(); + dir.mkdirp("a/b"); + + let wd = WalkDir::new(dir.path()).min_depth(2); + let r = dir.run_recursive(wd); + r.assert_no_errors(); + + let expected = vec![dir.join("a").join("b")]; + assert_eq!(expected, r.sorted_paths()); +} + +#[test] +fn max_depth_0() { + let dir = Dir::tmp(); + dir.mkdirp("a/b"); + + let wd = WalkDir::new(dir.path()).max_depth(0); + let r = dir.run_recursive(wd); + r.assert_no_errors(); + + let expected = vec![dir.path().to_path_buf()]; + assert_eq!(expected, r.sorted_paths()); +} + +#[test] +fn max_depth_1() { + let dir = Dir::tmp(); + dir.mkdirp("a/b"); + + let wd = WalkDir::new(dir.path()).max_depth(1); + let r = dir.run_recursive(wd); + r.assert_no_errors(); + + let expected = vec![dir.path().to_path_buf(), dir.join("a")]; + assert_eq!(expected, r.sorted_paths()); +} + +#[test] +fn max_depth_2() { + let dir = Dir::tmp(); + dir.mkdirp("a/b"); + + let wd = WalkDir::new(dir.path()).max_depth(2); + let r = dir.run_recursive(wd); + r.assert_no_errors(); + + let expected = + vec![dir.path().to_path_buf(), dir.join("a"), dir.join("a").join("b")]; + assert_eq!(expected, r.sorted_paths()); +} + +// FIXME: This test seems wrong. It should return nothing! +#[test] +fn min_max_depth_diff_nada() { + let dir = Dir::tmp(); + dir.mkdirp("a/b/c"); + + let wd = WalkDir::new(dir.path()).min_depth(3).max_depth(2); + let r = dir.run_recursive(wd); + r.assert_no_errors(); + + let expected = vec![dir.join("a").join("b").join("c")]; + assert_eq!(expected, r.sorted_paths()); +} + +#[test] +fn min_max_depth_diff_0() { + let dir = Dir::tmp(); + dir.mkdirp("a/b/c"); + + let wd = WalkDir::new(dir.path()).min_depth(2).max_depth(2); + let r = dir.run_recursive(wd); + r.assert_no_errors(); + + let expected = vec![dir.join("a").join("b")]; + assert_eq!(expected, r.sorted_paths()); +} + +#[test] +fn min_max_depth_diff_1() { + let dir = Dir::tmp(); + dir.mkdirp("a/b/c"); + + let wd = WalkDir::new(dir.path()).min_depth(1).max_depth(2); + let r = dir.run_recursive(wd); + r.assert_no_errors(); + + let expected = vec![dir.join("a"), dir.join("a").join("b")]; + assert_eq!(expected, r.sorted_paths()); +} + +#[test] +fn contents_first() { + let dir = Dir::tmp(); + dir.touch("a"); + + let wd = WalkDir::new(dir.path()).contents_first(true); + let r = dir.run_recursive(wd); + r.assert_no_errors(); + + let expected = vec![dir.join("a"), dir.path().to_path_buf()]; + assert_eq!(expected, r.paths()); +} + +#[test] +fn skip_current_dir() { + let dir = Dir::tmp(); + dir.mkdirp("foo/bar/baz"); + dir.mkdirp("quux"); + + let mut paths = vec![]; + let mut it = WalkDir::new(dir.path()).into_iter(); + while let Some(result) = it.next() { + let ent = result.unwrap(); + paths.push(ent.path().to_path_buf()); + if ent.file_name() == "bar" { + it.skip_current_dir(); + } + } + paths.sort(); + + let expected = vec![ + dir.path().to_path_buf(), + dir.join("foo"), + dir.join("foo").join("bar"), + dir.join("quux"), + ]; + assert_eq!(expected, paths); +} + +#[test] +fn filter_entry() { + let dir = Dir::tmp(); + dir.mkdirp("foo/bar/baz/abc"); + dir.mkdirp("quux"); + + let wd = WalkDir::new(dir.path()) + .into_iter() + .filter_entry(|ent| ent.file_name() != "baz"); + let r = dir.run_recursive(wd); + r.assert_no_errors(); + + let expected = vec![ + dir.path().to_path_buf(), + dir.join("foo"), + dir.join("foo").join("bar"), + dir.join("quux"), + ]; + assert_eq!(expected, r.sorted_paths()); +} + +#[test] +fn sort_by() { + let dir = Dir::tmp(); + dir.mkdirp("foo/bar/baz/abc"); + dir.mkdirp("quux"); + + let wd = WalkDir::new(dir.path()) + .sort_by(|a, b| a.file_name().cmp(b.file_name()).reverse()); + let r = dir.run_recursive(wd); + r.assert_no_errors(); + + let expected = vec![ + dir.path().to_path_buf(), + dir.join("quux"), + dir.join("foo"), + dir.join("foo").join("bar"), + dir.join("foo").join("bar").join("baz"), + dir.join("foo").join("bar").join("baz").join("abc"), + ]; + assert_eq!(expected, r.paths()); +} + +#[test] +fn sort_by_key() { + let dir = Dir::tmp(); + dir.mkdirp("foo/bar/baz/abc"); + dir.mkdirp("quux"); + + let wd = + WalkDir::new(dir.path()).sort_by_key(|a| a.file_name().to_owned()); + let r = dir.run_recursive(wd); + r.assert_no_errors(); + + let expected = vec![ + dir.path().to_path_buf(), + dir.join("foo"), + dir.join("foo").join("bar"), + dir.join("foo").join("bar").join("baz"), + dir.join("foo").join("bar").join("baz").join("abc"), + dir.join("quux"), + ]; + assert_eq!(expected, r.paths()); +} + +#[test] +fn sort_by_file_name() { + let dir = Dir::tmp(); + dir.mkdirp("foo/bar/baz/abc"); + dir.mkdirp("quux"); + + let wd = WalkDir::new(dir.path()).sort_by_file_name(); + let r = dir.run_recursive(wd); + r.assert_no_errors(); + + let expected = vec![ + dir.path().to_path_buf(), + dir.join("foo"), + dir.join("foo").join("bar"), + dir.join("foo").join("bar").join("baz"), + dir.join("foo").join("bar").join("baz").join("abc"), + dir.join("quux"), + ]; + assert_eq!(expected, r.paths()); +} + +#[test] +fn sort_max_open() { + let dir = Dir::tmp(); + dir.mkdirp("foo/bar/baz/abc"); + dir.mkdirp("quux"); + + let wd = WalkDir::new(dir.path()) + .max_open(1) + .sort_by(|a, b| a.file_name().cmp(b.file_name()).reverse()); + let r = dir.run_recursive(wd); + r.assert_no_errors(); + + let expected = vec![ + dir.path().to_path_buf(), + dir.join("quux"), + dir.join("foo"), + dir.join("foo").join("bar"), + dir.join("foo").join("bar").join("baz"), + dir.join("foo").join("bar").join("baz").join("abc"), + ]; + assert_eq!(expected, r.paths()); +} + +#[cfg(target_os = "linux")] +#[test] +fn same_file_system() { + use std::path::Path; + + // This test is a little weird since it's not clear whether it's a good + // idea to setup a distinct mounted volume in these tests. Instead, we + // probe for an existing one. + if !Path::new("/sys").is_dir() { + return; + } + + let dir = Dir::tmp(); + dir.touch("a"); + dir.symlink_dir("/sys", "sys-link"); + + // First, do a sanity check that things work without following symlinks. + let wd = WalkDir::new(dir.path()); + let r = dir.run_recursive(wd); + r.assert_no_errors(); + + let expected = + vec![dir.path().to_path_buf(), dir.join("a"), dir.join("sys-link")]; + assert_eq!(expected, r.sorted_paths()); + + // ... now follow symlinks and ensure we don't descend into /sys. + let wd = + WalkDir::new(dir.path()).same_file_system(true).follow_links(true); + let r = dir.run_recursive(wd); + r.assert_no_errors(); + + let expected = + vec![dir.path().to_path_buf(), dir.join("a"), dir.join("sys-link")]; + assert_eq!(expected, r.sorted_paths()); +} + +// Tests that skip_current_dir doesn't destroy internal invariants. +// +// See: https://github.com/BurntSushi/walkdir/issues/118 +#[test] +fn regression_skip_current_dir() { + let dir = Dir::tmp(); + dir.mkdirp("foo/a/b"); + dir.mkdirp("foo/1/2"); + + let mut wd = WalkDir::new(dir.path()).max_open(1).into_iter(); + wd.next(); + wd.next(); + wd.next(); + wd.next(); + + wd.skip_current_dir(); + wd.skip_current_dir(); + wd.next(); +} diff --git a/third_party/rust/walkdir/src/tests/util.rs b/third_party/rust/walkdir/src/tests/util.rs new file mode 100644 index 0000000000..fdf06f555e --- /dev/null +++ b/third_party/rust/walkdir/src/tests/util.rs @@ -0,0 +1,252 @@ +use std::env; +use std::error; +use std::fs::{self, File}; +use std::io; +use std::path::{Path, PathBuf}; +use std::result; + +use crate::{DirEntry, Error}; + +/// Create an error from a format!-like syntax. +#[macro_export] +macro_rules! err { + ($($tt:tt)*) => { + Box::::from(format!($($tt)*)) + } +} + +/// A convenient result type alias. +pub type Result = result::Result>; + +/// The result of running a recursive directory iterator on a single directory. +#[derive(Debug)] +pub struct RecursiveResults { + ents: Vec, + errs: Vec, +} + +impl RecursiveResults { + /// Return all of the errors encountered during traversal. + pub fn errs(&self) -> &[Error] { + &self.errs + } + + /// Assert that no errors have occurred. + pub fn assert_no_errors(&self) { + assert!( + self.errs.is_empty(), + "expected to find no errors, but found: {:?}", + self.errs + ); + } + + /// Return all the successfully retrieved directory entries in the order + /// in which they were retrieved. + pub fn ents(&self) -> &[DirEntry] { + &self.ents + } + + /// Return all paths from all successfully retrieved directory entries. + /// + /// This does not include paths that correspond to an error. + pub fn paths(&self) -> Vec { + self.ents.iter().map(|d| d.path().to_path_buf()).collect() + } + + /// Return all the successfully retrieved directory entries, sorted + /// lexicographically by their full file path. + pub fn sorted_ents(&self) -> Vec { + let mut ents = self.ents.clone(); + ents.sort_by(|e1, e2| e1.path().cmp(e2.path())); + ents + } + + /// Return all paths from all successfully retrieved directory entries, + /// sorted lexicographically. + /// + /// This does not include paths that correspond to an error. + pub fn sorted_paths(&self) -> Vec { + self.sorted_ents().into_iter().map(|d| d.into_path()).collect() + } +} + +/// A helper for managing a directory in which to run tests. +/// +/// When manipulating paths within this directory, paths are interpreted +/// relative to this directory. +#[derive(Debug)] +pub struct Dir { + dir: TempDir, +} + +impl Dir { + /// Create a new empty temporary directory. + pub fn tmp() -> Dir { + let dir = TempDir::new().unwrap(); + Dir { dir } + } + + /// Return the path to this directory. + pub fn path(&self) -> &Path { + self.dir.path() + } + + /// Return a path joined to the path to this directory. + pub fn join>(&self, path: P) -> PathBuf { + self.path().join(path) + } + + /// Run the given iterator and return the result as a distinct collection + /// of directory entries and errors. + pub fn run_recursive(&self, it: I) -> RecursiveResults + where + I: IntoIterator>, + { + let mut results = RecursiveResults { ents: vec![], errs: vec![] }; + for result in it { + match result { + Ok(ent) => results.ents.push(ent), + Err(err) => results.errs.push(err), + } + } + results + } + + /// Create a directory at the given path, while creating all intermediate + /// directories as needed. + pub fn mkdirp>(&self, path: P) { + let full = self.join(path); + fs::create_dir_all(&full) + .map_err(|e| { + err!("failed to create directory {}: {}", full.display(), e) + }) + .unwrap(); + } + + /// Create an empty file at the given path. All ancestor directories must + /// already exists. + pub fn touch>(&self, path: P) { + let full = self.join(path); + File::create(&full) + .map_err(|e| { + err!("failed to create file {}: {}", full.display(), e) + }) + .unwrap(); + } + + /// Create empty files at the given paths. All ancestor directories must + /// already exists. + pub fn touch_all>(&self, paths: &[P]) { + for p in paths { + self.touch(p); + } + } + + /// Create a file symlink to the given src with the given link name. + pub fn symlink_file, P2: AsRef>( + &self, + src: P1, + link_name: P2, + ) { + #[cfg(windows)] + fn imp(src: &Path, link_name: &Path) -> io::Result<()> { + use std::os::windows::fs::symlink_file; + symlink_file(src, link_name) + } + + #[cfg(unix)] + fn imp(src: &Path, link_name: &Path) -> io::Result<()> { + use std::os::unix::fs::symlink; + symlink(src, link_name) + } + + let (src, link_name) = (self.join(src), self.join(link_name)); + imp(&src, &link_name) + .map_err(|e| { + err!( + "failed to symlink file {} with target {}: {}", + src.display(), + link_name.display(), + e + ) + }) + .unwrap() + } + + /// Create a directory symlink to the given src with the given link name. + pub fn symlink_dir, P2: AsRef>( + &self, + src: P1, + link_name: P2, + ) { + #[cfg(windows)] + fn imp(src: &Path, link_name: &Path) -> io::Result<()> { + use std::os::windows::fs::symlink_dir; + symlink_dir(src, link_name) + } + + #[cfg(unix)] + fn imp(src: &Path, link_name: &Path) -> io::Result<()> { + use std::os::unix::fs::symlink; + symlink(src, link_name) + } + + let (src, link_name) = (self.join(src), self.join(link_name)); + imp(&src, &link_name) + .map_err(|e| { + err!( + "failed to symlink directory {} with target {}: {}", + src.display(), + link_name.display(), + e + ) + }) + .unwrap() + } +} + +/// A simple wrapper for creating a temporary directory that is automatically +/// deleted when it's dropped. +/// +/// We use this in lieu of tempfile because tempfile brings in too many +/// dependencies. +#[derive(Debug)] +pub struct TempDir(PathBuf); + +impl Drop for TempDir { + fn drop(&mut self) { + fs::remove_dir_all(&self.0).unwrap(); + } +} + +impl TempDir { + /// Create a new empty temporary directory under the system's configured + /// temporary directory. + pub fn new() -> Result { + #[allow(deprecated)] + use std::sync::atomic::{AtomicUsize, Ordering, ATOMIC_USIZE_INIT}; + + static TRIES: usize = 100; + #[allow(deprecated)] + static COUNTER: AtomicUsize = ATOMIC_USIZE_INIT; + + let tmpdir = env::temp_dir(); + for _ in 0..TRIES { + let count = COUNTER.fetch_add(1, Ordering::SeqCst); + let path = tmpdir.join("rust-walkdir").join(count.to_string()); + if path.is_dir() { + continue; + } + fs::create_dir_all(&path).map_err(|e| { + err!("failed to create {}: {}", path.display(), e) + })?; + return Ok(TempDir(path)); + } + Err(err!("failed to create temp dir after {} tries", TRIES)) + } + + /// Return the underlying path to this temporary directory. + pub fn path(&self) -> &Path { + &self.0 + } +} diff --git a/third_party/rust/walkdir/src/util.rs b/third_party/rust/walkdir/src/util.rs new file mode 100644 index 0000000000..b9fcad8bf2 --- /dev/null +++ b/third_party/rust/walkdir/src/util.rs @@ -0,0 +1,25 @@ +use std::io; +use std::path::Path; + +#[cfg(unix)] +pub fn device_num>(path: P) -> io::Result { + use std::os::unix::fs::MetadataExt; + + path.as_ref().metadata().map(|md| md.dev()) +} + +#[cfg(windows)] +pub fn device_num>(path: P) -> io::Result { + use winapi_util::{file, Handle}; + + let h = Handle::from_path_any(path)?; + file::information(h).map(|info| info.volume_serial_number()) +} + +#[cfg(not(any(unix, windows)))] +pub fn device_num>(_: P) -> io::Result { + Err(io::Error::new( + io::ErrorKind::Other, + "walkdir: same_file_system option not supported on this platform", + )) +} -- cgit v1.2.3