summaryrefslogtreecommitdiffstats
path: root/vendor/ignore/src
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:47:55 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:47:55 +0000
commit2aadc03ef15cb5ca5cc2af8a7c08e070742f0ac4 (patch)
tree033cc839730fda84ff08db877037977be94e5e3a /vendor/ignore/src
parentInitial commit. (diff)
downloadcargo-2aadc03ef15cb5ca5cc2af8a7c08e070742f0ac4.tar.xz
cargo-2aadc03ef15cb5ca5cc2af8a7c08e070742f0ac4.zip
Adding upstream version 0.70.1+ds1.upstream/0.70.1+ds1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/ignore/src')
-rw-r--r--vendor/ignore/src/default_types.rs316
-rw-r--r--vendor/ignore/src/dir.rs1188
-rw-r--r--vendor/ignore/src/gitignore.rs789
-rw-r--r--vendor/ignore/src/lib.rs550
-rw-r--r--vendor/ignore/src/overrides.rs263
-rw-r--r--vendor/ignore/src/pathutil.rs142
-rw-r--r--vendor/ignore/src/types.rs583
-rw-r--r--vendor/ignore/src/walk.rs2251
8 files changed, 6082 insertions, 0 deletions
diff --git a/vendor/ignore/src/default_types.rs b/vendor/ignore/src/default_types.rs
new file mode 100644
index 0000000..e6a3a8f
--- /dev/null
+++ b/vendor/ignore/src/default_types.rs
@@ -0,0 +1,316 @@
+/// This list represents the default file types that ripgrep ships with. In
+/// general, any file format is fair game, although it should generally be
+/// limited to reasonably popular open formats. For other cases, you can add
+/// types to each invocation of ripgrep with the '--type-add' flag.
+///
+/// If you would like to add or improve this list, please file a PR:
+/// <https://github.com/BurntSushi/ripgrep>.
+///
+/// Please try to keep this list sorted lexicographically and wrapped to 79
+/// columns (inclusive).
+#[rustfmt::skip]
+pub const DEFAULT_TYPES: &[(&str, &[&str])] = &[
+ ("agda", &["*.agda", "*.lagda"]),
+ ("aidl", &["*.aidl"]),
+ ("amake", &["*.mk", "*.bp"]),
+ ("asciidoc", &["*.adoc", "*.asc", "*.asciidoc"]),
+ ("asm", &["*.asm", "*.s", "*.S"]),
+ ("asp", &[
+ "*.aspx", "*.aspx.cs", "*.aspx.vb", "*.ascx", "*.ascx.cs",
+ "*.ascx.vb", "*.asp"
+ ]),
+ ("ats", &["*.ats", "*.dats", "*.sats", "*.hats"]),
+ ("avro", &["*.avdl", "*.avpr", "*.avsc"]),
+ ("awk", &["*.awk"]),
+ ("bazel", &[
+ "*.bazel", "*.bzl", "*.BUILD", "*.bazelrc", "BUILD", "MODULE.bazel",
+ "WORKSPACE", "WORKSPACE.bazel",
+ ]),
+ ("bitbake", &["*.bb", "*.bbappend", "*.bbclass", "*.conf", "*.inc"]),
+ ("brotli", &["*.br"]),
+ ("buildstream", &["*.bst"]),
+ ("bzip2", &["*.bz2", "*.tbz2"]),
+ ("c", &["*.[chH]", "*.[chH].in", "*.cats"]),
+ ("cabal", &["*.cabal"]),
+ ("candid", &["*.did"]),
+ ("carp", &["*.carp"]),
+ ("cbor", &["*.cbor"]),
+ ("ceylon", &["*.ceylon"]),
+ ("clojure", &["*.clj", "*.cljc", "*.cljs", "*.cljx"]),
+ ("cmake", &["*.cmake", "CMakeLists.txt"]),
+ ("coffeescript", &["*.coffee"]),
+ ("config", &["*.cfg", "*.conf", "*.config", "*.ini"]),
+ ("coq", &["*.v"]),
+ ("cpp", &[
+ "*.[ChH]", "*.cc", "*.[ch]pp", "*.[ch]xx", "*.hh", "*.inl",
+ "*.[ChH].in", "*.cc.in", "*.[ch]pp.in", "*.[ch]xx.in", "*.hh.in",
+ ]),
+ ("creole", &["*.creole"]),
+ ("crystal", &["Projectfile", "*.cr", "*.ecr", "shard.yml"]),
+ ("cs", &["*.cs"]),
+ ("csharp", &["*.cs"]),
+ ("cshtml", &["*.cshtml"]),
+ ("css", &["*.css", "*.scss"]),
+ ("csv", &["*.csv"]),
+ ("cuda", &["*.cu", "*.cuh"]),
+ ("cython", &["*.pyx", "*.pxi", "*.pxd"]),
+ ("d", &["*.d"]),
+ ("dart", &["*.dart"]),
+ ("devicetree", &["*.dts", "*.dtsi"]),
+ ("dhall", &["*.dhall"]),
+ ("diff", &["*.patch", "*.diff"]),
+ ("docker", &["*Dockerfile*"]),
+ ("dts", &["*.dts", "*.dtsi"]),
+ ("dvc", &["Dvcfile", "*.dvc"]),
+ ("ebuild", &["*.ebuild"]),
+ ("edn", &["*.edn"]),
+ ("elisp", &["*.el"]),
+ ("elixir", &["*.ex", "*.eex", "*.exs"]),
+ ("elm", &["*.elm"]),
+ ("erb", &["*.erb"]),
+ ("erlang", &["*.erl", "*.hrl"]),
+ ("fennel", &["*.fnl"]),
+ ("fidl", &["*.fidl"]),
+ ("fish", &["*.fish"]),
+ ("flatbuffers", &["*.fbs"]),
+ ("fortran", &[
+ "*.f", "*.F", "*.f77", "*.F77", "*.pfo",
+ "*.f90", "*.F90", "*.f95", "*.F95",
+ ]),
+ ("fsharp", &["*.fs", "*.fsx", "*.fsi"]),
+ ("fut", &["*.fut"]),
+ ("gap", &["*.g", "*.gap", "*.gi", "*.gd", "*.tst"]),
+ ("gn", &["*.gn", "*.gni"]),
+ ("go", &["*.go"]),
+ ("gradle", &["*.gradle"]),
+ ("groovy", &["*.groovy", "*.gradle"]),
+ ("gzip", &["*.gz", "*.tgz"]),
+ ("h", &["*.h", "*.hh", "*.hpp"]),
+ ("haml", &["*.haml"]),
+ ("hare", &["*.ha"]),
+ ("haskell", &["*.hs", "*.lhs", "*.cpphs", "*.c2hs", "*.hsc"]),
+ ("hbs", &["*.hbs"]),
+ ("hs", &["*.hs", "*.lhs"]),
+ ("html", &["*.htm", "*.html", "*.ejs"]),
+ ("hy", &["*.hy"]),
+ ("idris", &["*.idr", "*.lidr"]),
+ ("janet", &["*.janet"]),
+ ("java", &["*.java", "*.jsp", "*.jspx", "*.properties"]),
+ ("jinja", &["*.j2", "*.jinja", "*.jinja2"]),
+ ("jl", &["*.jl"]),
+ ("js", &["*.js", "*.jsx", "*.vue", "*.cjs", "*.mjs"]),
+ ("json", &["*.json", "composer.lock"]),
+ ("jsonl", &["*.jsonl"]),
+ ("julia", &["*.jl"]),
+ ("jupyter", &["*.ipynb", "*.jpynb"]),
+ ("k", &["*.k"]),
+ ("kotlin", &["*.kt", "*.kts"]),
+ ("less", &["*.less"]),
+ ("license", &[
+ // General
+ "COPYING", "COPYING[.-]*",
+ "COPYRIGHT", "COPYRIGHT[.-]*",
+ "EULA", "EULA[.-]*",
+ "licen[cs]e", "licen[cs]e.*",
+ "LICEN[CS]E", "LICEN[CS]E[.-]*", "*[.-]LICEN[CS]E*",
+ "NOTICE", "NOTICE[.-]*",
+ "PATENTS", "PATENTS[.-]*",
+ "UNLICEN[CS]E", "UNLICEN[CS]E[.-]*",
+ // GPL (gpl.txt, etc.)
+ "agpl[.-]*",
+ "gpl[.-]*",
+ "lgpl[.-]*",
+ // Other license-specific (APACHE-2.0.txt, etc.)
+ "AGPL-*[0-9]*",
+ "APACHE-*[0-9]*",
+ "BSD-*[0-9]*",
+ "CC-BY-*",
+ "GFDL-*[0-9]*",
+ "GNU-*[0-9]*",
+ "GPL-*[0-9]*",
+ "LGPL-*[0-9]*",
+ "MIT-*[0-9]*",
+ "MPL-*[0-9]*",
+ "OFL-*[0-9]*",
+ ]),
+ ("lilypond", &["*.ly", "*.ily"]),
+ ("lisp", &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]),
+ ("lock", &["*.lock", "package-lock.json"]),
+ ("log", &["*.log"]),
+ ("lua", &["*.lua"]),
+ ("lz4", &["*.lz4"]),
+ ("lzma", &["*.lzma"]),
+ ("m4", &["*.ac", "*.m4"]),
+ ("make", &[
+ "[Gg][Nn][Uu]makefile", "[Mm]akefile",
+ "[Gg][Nn][Uu]makefile.am", "[Mm]akefile.am",
+ "[Gg][Nn][Uu]makefile.in", "[Mm]akefile.in",
+ "*.mk", "*.mak"
+ ]),
+ ("mako", &["*.mako", "*.mao"]),
+ ("man", &["*.[0-9lnpx]", "*.[0-9][cEFMmpSx]"]),
+ ("markdown", &["*.markdown", "*.md", "*.mdown", "*.mkd", "*.mkdn"]),
+ ("matlab", &["*.m"]),
+ ("md", &["*.markdown", "*.md", "*.mdown", "*.mkd", "*.mkdn"]),
+ ("meson", &["meson.build", "meson_options.txt"]),
+ ("minified", &["*.min.html", "*.min.css", "*.min.js"]),
+ ("mint", &["*.mint"]),
+ ("mk", &["mkfile"]),
+ ("ml", &["*.ml"]),
+ ("motoko", &["*.mo"]),
+ ("msbuild", &[
+ "*.csproj", "*.fsproj", "*.vcxproj", "*.proj", "*.props", "*.targets",
+ ]),
+ ("nim", &["*.nim", "*.nimf", "*.nimble", "*.nims"]),
+ ("nix", &["*.nix"]),
+ ("objc", &["*.h", "*.m"]),
+ ("objcpp", &["*.h", "*.mm"]),
+ ("ocaml", &["*.ml", "*.mli", "*.mll", "*.mly"]),
+ ("org", &["*.org", "*.org_archive"]),
+ ("pants", &["BUILD"]),
+ ("pascal", &["*.pas", "*.dpr", "*.lpr", "*.pp", "*.inc"]),
+ ("pdf", &["*.pdf"]),
+ ("perl", &["*.perl", "*.pl", "*.PL", "*.plh", "*.plx", "*.pm", "*.t"]),
+ ("php", &[
+ // note that PHP 6 doesn't exist
+ // See: https://wiki.php.net/rfc/php6
+ "*.php", "*.php3", "*.php4", "*.php5", "*.php7", "*.php8",
+ "*.pht", "*.phtml"
+ ]),
+ ("po", &["*.po"]),
+ ("pod", &["*.pod"]),
+ ("postscript", &["*.eps", "*.ps"]),
+ ("protobuf", &["*.proto"]),
+ ("ps", &["*.cdxml", "*.ps1", "*.ps1xml", "*.psd1", "*.psm1"]),
+ ("puppet", &["*.epp", "*.erb", "*.pp", "*.rb"]),
+ ("purs", &["*.purs"]),
+ ("py", &["*.py"]),
+ ("qmake", &["*.pro", "*.pri", "*.prf"]),
+ ("qml", &["*.qml"]),
+ ("r", &["*.R", "*.r", "*.Rmd", "*.Rnw"]),
+ ("racket", &["*.rkt"]),
+ ("rdoc", &["*.rdoc"]),
+ ("readme", &["README*", "*README"]),
+ ("reasonml", &["*.re", "*.rei"]),
+ ("red", &["*.r", "*.red", "*.reds"]),
+ ("rescript", &["*.res", "*.resi"]),
+ ("robot", &["*.robot"]),
+ ("rst", &["*.rst"]),
+ ("ruby", &[
+ // Idiomatic files
+ "config.ru", "Gemfile", ".irbrc", "Rakefile",
+ // Extensions
+ "*.gemspec", "*.rb", "*.rbw"
+ ]),
+ ("rust", &["*.rs"]),
+ ("sass", &["*.sass", "*.scss"]),
+ ("scala", &["*.scala", "*.sbt"]),
+ ("sh", &[
+ // Portable/misc. init files
+ ".login", ".logout", ".profile", "profile",
+ // bash-specific init files
+ ".bash_login", "bash_login",
+ ".bash_logout", "bash_logout",
+ ".bash_profile", "bash_profile",
+ ".bashrc", "bashrc", "*.bashrc",
+ // csh-specific init files
+ ".cshrc", "*.cshrc",
+ // ksh-specific init files
+ ".kshrc", "*.kshrc",
+ // tcsh-specific init files
+ ".tcshrc",
+ // zsh-specific init files
+ ".zshenv", "zshenv",
+ ".zlogin", "zlogin",
+ ".zlogout", "zlogout",
+ ".zprofile", "zprofile",
+ ".zshrc", "zshrc",
+ // Extensions
+ "*.bash", "*.csh", "*.ksh", "*.sh", "*.tcsh", "*.zsh",
+ ]),
+ ("slim", &["*.skim", "*.slim", "*.slime"]),
+ ("smarty", &["*.tpl"]),
+ ("sml", &["*.sml", "*.sig"]),
+ ("solidity", &["*.sol"]),
+ ("soy", &["*.soy"]),
+ ("spark", &["*.spark"]),
+ ("spec", &["*.spec"]),
+ ("sql", &["*.sql", "*.psql"]),
+ ("stylus", &["*.styl"]),
+ ("sv", &["*.v", "*.vg", "*.sv", "*.svh", "*.h"]),
+ ("svg", &["*.svg"]),
+ ("swift", &["*.swift"]),
+ ("swig", &["*.def", "*.i"]),
+ ("systemd", &[
+ "*.automount", "*.conf", "*.device", "*.link", "*.mount", "*.path",
+ "*.scope", "*.service", "*.slice", "*.socket", "*.swap", "*.target",
+ "*.timer",
+ ]),
+ ("taskpaper", &["*.taskpaper"]),
+ ("tcl", &["*.tcl"]),
+ ("tex", &["*.tex", "*.ltx", "*.cls", "*.sty", "*.bib", "*.dtx", "*.ins"]),
+ ("texinfo", &["*.texi"]),
+ ("textile", &["*.textile"]),
+ ("tf", &["*.tf"]),
+ ("thrift", &["*.thrift"]),
+ ("toml", &["*.toml", "Cargo.lock"]),
+ ("ts", &["*.ts", "*.tsx", "*.cts", "*.mts"]),
+ ("twig", &["*.twig"]),
+ ("txt", &["*.txt"]),
+ ("typoscript", &["*.typoscript", "*.ts"]),
+ ("vala", &["*.vala"]),
+ ("vb", &["*.vb"]),
+ ("vcl", &["*.vcl"]),
+ ("verilog", &["*.v", "*.vh", "*.sv", "*.svh"]),
+ ("vhdl", &["*.vhd", "*.vhdl"]),
+ ("vim", &[
+ "*.vim", ".vimrc", ".gvimrc", "vimrc", "gvimrc", "_vimrc", "_gvimrc",
+ ]),
+ ("vimscript", &[
+ "*.vim", ".vimrc", ".gvimrc", "vimrc", "gvimrc", "_vimrc", "_gvimrc",
+ ]),
+ ("webidl", &["*.idl", "*.webidl", "*.widl"]),
+ ("wiki", &["*.mediawiki", "*.wiki"]),
+ ("xml", &[
+ "*.xml", "*.xml.dist", "*.dtd", "*.xsl", "*.xslt", "*.xsd", "*.xjb",
+ "*.rng", "*.sch", "*.xhtml",
+ ]),
+ ("xz", &["*.xz", "*.txz"]),
+ ("yacc", &["*.y"]),
+ ("yaml", &["*.yaml", "*.yml"]),
+ ("yang", &["*.yang"]),
+ ("z", &["*.Z"]),
+ ("zig", &["*.zig"]),
+ ("zsh", &[
+ ".zshenv", "zshenv",
+ ".zlogin", "zlogin",
+ ".zlogout", "zlogout",
+ ".zprofile", "zprofile",
+ ".zshrc", "zshrc",
+ "*.zsh",
+ ]),
+ ("zstd", &["*.zst", "*.zstd"]),
+];
+
+#[cfg(test)]
+mod tests {
+ use super::DEFAULT_TYPES;
+
+ #[test]
+ fn default_types_are_sorted() {
+ let mut names = DEFAULT_TYPES.iter().map(|(name, _exts)| name);
+
+ let Some(mut previous_name) = names.next() else { return; };
+
+ for name in names {
+ assert!(
+ name > previous_name,
+ r#""{}" should be sorted before "{}" in `DEFAULT_TYPES`"#,
+ name,
+ previous_name
+ );
+
+ previous_name = name;
+ }
+ }
+}
diff --git a/vendor/ignore/src/dir.rs b/vendor/ignore/src/dir.rs
new file mode 100644
index 0000000..2577665
--- /dev/null
+++ b/vendor/ignore/src/dir.rs
@@ -0,0 +1,1188 @@
+// This module provides a data structure, `Ignore`, that connects "directory
+// traversal" with "ignore matchers." Specifically, it knows about gitignore
+// semantics and precedence, and is organized based on directory hierarchy.
+// Namely, every matcher logically corresponds to ignore rules from a single
+// directory, and points to the matcher for its corresponding parent directory.
+// In this sense, `Ignore` is a *persistent* data structure.
+//
+// This design was specifically chosen to make it possible to use this data
+// structure in a parallel directory iterator.
+//
+// My initial intention was to expose this module as part of this crate's
+// public API, but I think the data structure's public API is too complicated
+// with non-obvious failure modes. Alas, such things haven't been documented
+// well.
+
+use std::collections::HashMap;
+use std::ffi::{OsStr, OsString};
+use std::fs::{File, FileType};
+use std::io::{self, BufRead};
+use std::path::{Path, PathBuf};
+use std::sync::{Arc, RwLock};
+
+use crate::gitignore::{self, Gitignore, GitignoreBuilder};
+use crate::overrides::{self, Override};
+use crate::pathutil::{is_hidden, strip_prefix};
+use crate::types::{self, Types};
+use crate::walk::DirEntry;
+use crate::{Error, Match, PartialErrorBuilder};
+
+/// IgnoreMatch represents information about where a match came from when using
+/// the `Ignore` matcher.
+#[derive(Clone, Debug)]
+pub struct IgnoreMatch<'a>(IgnoreMatchInner<'a>);
+
+/// IgnoreMatchInner describes precisely where the match information came from.
+/// This is private to allow expansion to more matchers in the future.
+#[derive(Clone, Debug)]
+enum IgnoreMatchInner<'a> {
+ Override(overrides::Glob<'a>),
+ Gitignore(&'a gitignore::Glob),
+ Types(types::Glob<'a>),
+ Hidden,
+}
+
+impl<'a> IgnoreMatch<'a> {
+ fn overrides(x: overrides::Glob<'a>) -> IgnoreMatch<'a> {
+ IgnoreMatch(IgnoreMatchInner::Override(x))
+ }
+
+ fn gitignore(x: &'a gitignore::Glob) -> IgnoreMatch<'a> {
+ IgnoreMatch(IgnoreMatchInner::Gitignore(x))
+ }
+
+ fn types(x: types::Glob<'a>) -> IgnoreMatch<'a> {
+ IgnoreMatch(IgnoreMatchInner::Types(x))
+ }
+
+ fn hidden() -> IgnoreMatch<'static> {
+ IgnoreMatch(IgnoreMatchInner::Hidden)
+ }
+}
+
+/// Options for the ignore matcher, shared between the matcher itself and the
+/// builder.
+#[derive(Clone, Copy, Debug)]
+struct IgnoreOptions {
+ /// Whether to ignore hidden file paths or not.
+ hidden: bool,
+ /// Whether to read .ignore files.
+ ignore: bool,
+ /// Whether to respect any ignore files in parent directories.
+ parents: bool,
+ /// Whether to read git's global gitignore file.
+ git_global: bool,
+ /// Whether to read .gitignore files.
+ git_ignore: bool,
+ /// Whether to read .git/info/exclude files.
+ git_exclude: bool,
+ /// Whether to ignore files case insensitively
+ ignore_case_insensitive: bool,
+ /// Whether a git repository must be present in order to apply any
+ /// git-related ignore rules.
+ require_git: bool,
+}
+
+/// Ignore is a matcher useful for recursively walking one or more directories.
+#[derive(Clone, Debug)]
+pub struct Ignore(Arc<IgnoreInner>);
+
+#[derive(Clone, Debug)]
+struct IgnoreInner {
+ /// A map of all existing directories that have already been
+ /// compiled into matchers.
+ ///
+ /// Note that this is never used during matching, only when adding new
+ /// parent directory matchers. This avoids needing to rebuild glob sets for
+ /// parent directories if many paths are being searched.
+ compiled: Arc<RwLock<HashMap<OsString, Ignore>>>,
+ /// The path to the directory that this matcher was built from.
+ dir: PathBuf,
+ /// An override matcher (default is empty).
+ overrides: Arc<Override>,
+ /// A file type matcher.
+ types: Arc<Types>,
+ /// The parent directory to match next.
+ ///
+ /// If this is the root directory or there are otherwise no more
+ /// directories to match, then `parent` is `None`.
+ parent: Option<Ignore>,
+ /// Whether this is an absolute parent matcher, as added by add_parent.
+ is_absolute_parent: bool,
+ /// The absolute base path of this matcher. Populated only if parent
+ /// directories are added.
+ absolute_base: Option<Arc<PathBuf>>,
+ /// Explicit global ignore matchers specified by the caller.
+ explicit_ignores: Arc<Vec<Gitignore>>,
+ /// Ignore files used in addition to `.ignore`
+ custom_ignore_filenames: Arc<Vec<OsString>>,
+ /// The matcher for custom ignore files
+ custom_ignore_matcher: Gitignore,
+ /// The matcher for .ignore files.
+ ignore_matcher: Gitignore,
+ /// A global gitignore matcher, usually from $XDG_CONFIG_HOME/git/ignore.
+ git_global_matcher: Arc<Gitignore>,
+ /// The matcher for .gitignore files.
+ git_ignore_matcher: Gitignore,
+ /// Special matcher for `.git/info/exclude` files.
+ git_exclude_matcher: Gitignore,
+ /// Whether this directory contains a .git sub-directory.
+ has_git: bool,
+ /// Ignore config.
+ opts: IgnoreOptions,
+}
+
+impl Ignore {
+ /// Return the directory path of this matcher.
+ pub fn path(&self) -> &Path {
+ &self.0.dir
+ }
+
+ /// Return true if this matcher has no parent.
+ pub fn is_root(&self) -> bool {
+ self.0.parent.is_none()
+ }
+
+ /// Returns true if this matcher was added via the `add_parents` method.
+ pub fn is_absolute_parent(&self) -> bool {
+ self.0.is_absolute_parent
+ }
+
+ /// Return this matcher's parent, if one exists.
+ pub fn parent(&self) -> Option<Ignore> {
+ self.0.parent.clone()
+ }
+
+ /// Create a new `Ignore` matcher with the parent directories of `dir`.
+ ///
+ /// Note that this can only be called on an `Ignore` matcher with no
+ /// parents (i.e., `is_root` returns `true`). This will panic otherwise.
+ pub fn add_parents<P: AsRef<Path>>(
+ &self,
+ path: P,
+ ) -> (Ignore, Option<Error>) {
+ if !self.0.opts.parents
+ && !self.0.opts.git_ignore
+ && !self.0.opts.git_exclude
+ && !self.0.opts.git_global
+ {
+ // If we never need info from parent directories, then don't do
+ // anything.
+ return (self.clone(), None);
+ }
+ if !self.is_root() {
+ panic!("Ignore::add_parents called on non-root matcher");
+ }
+ let absolute_base = match path.as_ref().canonicalize() {
+ Ok(path) => Arc::new(path),
+ Err(_) => {
+ // There's not much we can do here, so just return our
+ // existing matcher. We drop the error to be consistent
+ // with our general pattern of ignoring I/O errors when
+ // processing ignore files.
+ return (self.clone(), None);
+ }
+ };
+ // List of parents, from child to root.
+ let mut parents = vec![];
+ let mut path = &**absolute_base;
+ while let Some(parent) = path.parent() {
+ parents.push(parent);
+ path = parent;
+ }
+ let mut errs = PartialErrorBuilder::default();
+ let mut ig = self.clone();
+ for parent in parents.into_iter().rev() {
+ let mut compiled = self.0.compiled.write().unwrap();
+ if let Some(prebuilt) = compiled.get(parent.as_os_str()) {
+ ig = prebuilt.clone();
+ continue;
+ }
+ let (mut igtmp, err) = ig.add_child_path(parent);
+ errs.maybe_push(err);
+ igtmp.is_absolute_parent = true;
+ igtmp.absolute_base = Some(absolute_base.clone());
+ igtmp.has_git =
+ if self.0.opts.require_git && self.0.opts.git_ignore {
+ parent.join(".git").exists()
+ } else {
+ false
+ };
+ ig = Ignore(Arc::new(igtmp));
+ compiled.insert(parent.as_os_str().to_os_string(), ig.clone());
+ }
+ (ig, errs.into_error_option())
+ }
+
+ /// Create a new `Ignore` matcher for the given child directory.
+ ///
+ /// Since building the matcher may require reading from multiple
+ /// files, it's possible that this method partially succeeds. Therefore,
+ /// a matcher is always returned (which may match nothing) and an error is
+ /// returned if it exists.
+ ///
+ /// Note that all I/O errors are completely ignored.
+ pub fn add_child<P: AsRef<Path>>(
+ &self,
+ dir: P,
+ ) -> (Ignore, Option<Error>) {
+ let (ig, err) = self.add_child_path(dir.as_ref());
+ (Ignore(Arc::new(ig)), err)
+ }
+
+ /// Like add_child, but takes a full path and returns an IgnoreInner.
+ fn add_child_path(&self, dir: &Path) -> (IgnoreInner, Option<Error>) {
+ let git_type = if self.0.opts.require_git
+ && (self.0.opts.git_ignore || self.0.opts.git_exclude)
+ {
+ dir.join(".git").metadata().ok().map(|md| md.file_type())
+ } else {
+ None
+ };
+ let has_git = git_type.map(|_| true).unwrap_or(false);
+
+ let mut errs = PartialErrorBuilder::default();
+ let custom_ig_matcher = if self.0.custom_ignore_filenames.is_empty() {
+ Gitignore::empty()
+ } else {
+ let (m, err) = create_gitignore(
+ &dir,
+ &dir,
+ &self.0.custom_ignore_filenames,
+ self.0.opts.ignore_case_insensitive,
+ );
+ errs.maybe_push(err);
+ m
+ };
+ let ig_matcher = if !self.0.opts.ignore {
+ Gitignore::empty()
+ } else {
+ let (m, err) = create_gitignore(
+ &dir,
+ &dir,
+ &[".ignore"],
+ self.0.opts.ignore_case_insensitive,
+ );
+ errs.maybe_push(err);
+ m
+ };
+ let gi_matcher = if !self.0.opts.git_ignore {
+ Gitignore::empty()
+ } else {
+ let (m, err) = create_gitignore(
+ &dir,
+ &dir,
+ &[".gitignore"],
+ self.0.opts.ignore_case_insensitive,
+ );
+ errs.maybe_push(err);
+ m
+ };
+ let gi_exclude_matcher = if !self.0.opts.git_exclude {
+ Gitignore::empty()
+ } else {
+ match resolve_git_commondir(dir, git_type) {
+ Ok(git_dir) => {
+ let (m, err) = create_gitignore(
+ &dir,
+ &git_dir,
+ &["info/exclude"],
+ self.0.opts.ignore_case_insensitive,
+ );
+ errs.maybe_push(err);
+ m
+ }
+ Err(err) => {
+ errs.maybe_push(err);
+ Gitignore::empty()
+ }
+ }
+ };
+ let ig = IgnoreInner {
+ compiled: self.0.compiled.clone(),
+ dir: dir.to_path_buf(),
+ overrides: self.0.overrides.clone(),
+ types: self.0.types.clone(),
+ parent: Some(self.clone()),
+ is_absolute_parent: false,
+ absolute_base: self.0.absolute_base.clone(),
+ explicit_ignores: self.0.explicit_ignores.clone(),
+ custom_ignore_filenames: self.0.custom_ignore_filenames.clone(),
+ custom_ignore_matcher: custom_ig_matcher,
+ ignore_matcher: ig_matcher,
+ git_global_matcher: self.0.git_global_matcher.clone(),
+ git_ignore_matcher: gi_matcher,
+ git_exclude_matcher: gi_exclude_matcher,
+ has_git,
+ opts: self.0.opts,
+ };
+ (ig, errs.into_error_option())
+ }
+
+ /// Returns true if at least one type of ignore rule should be matched.
+ fn has_any_ignore_rules(&self) -> bool {
+ let opts = self.0.opts;
+ let has_custom_ignore_files =
+ !self.0.custom_ignore_filenames.is_empty();
+ let has_explicit_ignores = !self.0.explicit_ignores.is_empty();
+
+ opts.ignore
+ || opts.git_global
+ || opts.git_ignore
+ || opts.git_exclude
+ || has_custom_ignore_files
+ || has_explicit_ignores
+ }
+
+ /// Like `matched`, but works with a directory entry instead.
+ pub fn matched_dir_entry<'a>(
+ &'a self,
+ dent: &DirEntry,
+ ) -> Match<IgnoreMatch<'a>> {
+ let m = self.matched(dent.path(), dent.is_dir());
+ if m.is_none() && self.0.opts.hidden && is_hidden(dent) {
+ return Match::Ignore(IgnoreMatch::hidden());
+ }
+ m
+ }
+
+ /// Returns a match indicating whether the given file path should be
+ /// ignored or not.
+ ///
+ /// The match contains information about its origin.
+ fn matched<'a, P: AsRef<Path>>(
+ &'a self,
+ path: P,
+ is_dir: bool,
+ ) -> Match<IgnoreMatch<'a>> {
+ // We need to be careful with our path. If it has a leading ./, then
+ // strip it because it causes nothing but trouble.
+ let mut path = path.as_ref();
+ if let Some(p) = strip_prefix("./", path) {
+ path = p;
+ }
+ // Match against the override patterns. If an override matches
+ // regardless of whether it's whitelist/ignore, then we quit and
+ // return that result immediately. Overrides have the highest
+ // precedence.
+ if !self.0.overrides.is_empty() {
+ let mat = self
+ .0
+ .overrides
+ .matched(path, is_dir)
+ .map(IgnoreMatch::overrides);
+ if !mat.is_none() {
+ return mat;
+ }
+ }
+ let mut whitelisted = Match::None;
+ if self.has_any_ignore_rules() {
+ let mat = self.matched_ignore(path, is_dir);
+ if mat.is_ignore() {
+ return mat;
+ } else if mat.is_whitelist() {
+ whitelisted = mat;
+ }
+ }
+ if !self.0.types.is_empty() {
+ let mat =
+ self.0.types.matched(path, is_dir).map(IgnoreMatch::types);
+ if mat.is_ignore() {
+ return mat;
+ } else if mat.is_whitelist() {
+ whitelisted = mat;
+ }
+ }
+ whitelisted
+ }
+
+ /// Performs matching only on the ignore files for this directory and
+ /// all parent directories.
+ fn matched_ignore<'a>(
+ &'a self,
+ path: &Path,
+ is_dir: bool,
+ ) -> Match<IgnoreMatch<'a>> {
+ let (
+ mut m_custom_ignore,
+ mut m_ignore,
+ mut m_gi,
+ mut m_gi_exclude,
+ mut m_explicit,
+ ) = (Match::None, Match::None, Match::None, Match::None, Match::None);
+ let any_git =
+ !self.0.opts.require_git || self.parents().any(|ig| ig.0.has_git);
+ let mut saw_git = false;
+ for ig in self.parents().take_while(|ig| !ig.0.is_absolute_parent) {
+ if m_custom_ignore.is_none() {
+ m_custom_ignore =
+ ig.0.custom_ignore_matcher
+ .matched(path, is_dir)
+ .map(IgnoreMatch::gitignore);
+ }
+ if m_ignore.is_none() {
+ m_ignore =
+ ig.0.ignore_matcher
+ .matched(path, is_dir)
+ .map(IgnoreMatch::gitignore);
+ }
+ if any_git && !saw_git && m_gi.is_none() {
+ m_gi =
+ ig.0.git_ignore_matcher
+ .matched(path, is_dir)
+ .map(IgnoreMatch::gitignore);
+ }
+ if any_git && !saw_git && m_gi_exclude.is_none() {
+ m_gi_exclude =
+ ig.0.git_exclude_matcher
+ .matched(path, is_dir)
+ .map(IgnoreMatch::gitignore);
+ }
+ saw_git = saw_git || ig.0.has_git;
+ }
+ if self.0.opts.parents {
+ if let Some(abs_parent_path) = self.absolute_base() {
+ let path = abs_parent_path.join(path);
+ for ig in
+ self.parents().skip_while(|ig| !ig.0.is_absolute_parent)
+ {
+ if m_custom_ignore.is_none() {
+ m_custom_ignore =
+ ig.0.custom_ignore_matcher
+ .matched(&path, is_dir)
+ .map(IgnoreMatch::gitignore);
+ }
+ if m_ignore.is_none() {
+ m_ignore =
+ ig.0.ignore_matcher
+ .matched(&path, is_dir)
+ .map(IgnoreMatch::gitignore);
+ }
+ if any_git && !saw_git && m_gi.is_none() {
+ m_gi =
+ ig.0.git_ignore_matcher
+ .matched(&path, is_dir)
+ .map(IgnoreMatch::gitignore);
+ }
+ if any_git && !saw_git && m_gi_exclude.is_none() {
+ m_gi_exclude =
+ ig.0.git_exclude_matcher
+ .matched(&path, is_dir)
+ .map(IgnoreMatch::gitignore);
+ }
+ saw_git = saw_git || ig.0.has_git;
+ }
+ }
+ }
+ for gi in self.0.explicit_ignores.iter().rev() {
+ if !m_explicit.is_none() {
+ break;
+ }
+ m_explicit = gi.matched(&path, is_dir).map(IgnoreMatch::gitignore);
+ }
+ let m_global = if any_git {
+ self.0
+ .git_global_matcher
+ .matched(&path, is_dir)
+ .map(IgnoreMatch::gitignore)
+ } else {
+ Match::None
+ };
+
+ m_custom_ignore
+ .or(m_ignore)
+ .or(m_gi)
+ .or(m_gi_exclude)
+ .or(m_global)
+ .or(m_explicit)
+ }
+
+ /// Returns an iterator over parent ignore matchers, including this one.
+ pub fn parents(&self) -> Parents<'_> {
+ Parents(Some(self))
+ }
+
+ /// Returns the first absolute path of the first absolute parent, if
+ /// one exists.
+ fn absolute_base(&self) -> Option<&Path> {
+ self.0.absolute_base.as_ref().map(|p| &***p)
+ }
+}
+
+/// An iterator over all parents of an ignore matcher, including itself.
+///
+/// The lifetime `'a` refers to the lifetime of the initial `Ignore` matcher.
+pub struct Parents<'a>(Option<&'a Ignore>);
+
+impl<'a> Iterator for Parents<'a> {
+ type Item = &'a Ignore;
+
+ fn next(&mut self) -> Option<&'a Ignore> {
+ match self.0.take() {
+ None => None,
+ Some(ig) => {
+ self.0 = ig.0.parent.as_ref();
+ Some(ig)
+ }
+ }
+ }
+}
+
+/// A builder for creating an Ignore matcher.
+#[derive(Clone, Debug)]
+pub struct IgnoreBuilder {
+ /// The root directory path for this ignore matcher.
+ dir: PathBuf,
+ /// An override matcher (default is empty).
+ overrides: Arc<Override>,
+ /// A type matcher (default is empty).
+ types: Arc<Types>,
+ /// Explicit global ignore matchers.
+ explicit_ignores: Vec<Gitignore>,
+ /// Ignore files in addition to .ignore.
+ custom_ignore_filenames: Vec<OsString>,
+ /// Ignore config.
+ opts: IgnoreOptions,
+}
+
+impl IgnoreBuilder {
+ /// Create a new builder for an `Ignore` matcher.
+ ///
+ /// All relative file paths are resolved with respect to the current
+ /// working directory.
+ pub fn new() -> IgnoreBuilder {
+ IgnoreBuilder {
+ dir: Path::new("").to_path_buf(),
+ overrides: Arc::new(Override::empty()),
+ types: Arc::new(Types::empty()),
+ explicit_ignores: vec![],
+ custom_ignore_filenames: vec![],
+ opts: IgnoreOptions {
+ hidden: true,
+ ignore: true,
+ parents: true,
+ git_global: true,
+ git_ignore: true,
+ git_exclude: true,
+ ignore_case_insensitive: false,
+ require_git: true,
+ },
+ }
+ }
+
+ /// Builds a new `Ignore` matcher.
+ ///
+ /// The matcher returned won't match anything until ignore rules from
+ /// directories are added to it.
+ pub fn build(&self) -> Ignore {
+ let git_global_matcher = if !self.opts.git_global {
+ Gitignore::empty()
+ } else {
+ let mut builder = GitignoreBuilder::new("");
+ builder
+ .case_insensitive(self.opts.ignore_case_insensitive)
+ .unwrap();
+ let (gi, err) = builder.build_global();
+ if let Some(err) = err {
+ log::debug!("{}", err);
+ }
+ gi
+ };
+
+ Ignore(Arc::new(IgnoreInner {
+ compiled: Arc::new(RwLock::new(HashMap::new())),
+ dir: self.dir.clone(),
+ overrides: self.overrides.clone(),
+ types: self.types.clone(),
+ parent: None,
+ is_absolute_parent: true,
+ absolute_base: None,
+ explicit_ignores: Arc::new(self.explicit_ignores.clone()),
+ custom_ignore_filenames: Arc::new(
+ self.custom_ignore_filenames.clone(),
+ ),
+ custom_ignore_matcher: Gitignore::empty(),
+ ignore_matcher: Gitignore::empty(),
+ git_global_matcher: Arc::new(git_global_matcher),
+ git_ignore_matcher: Gitignore::empty(),
+ git_exclude_matcher: Gitignore::empty(),
+ has_git: false,
+ opts: self.opts,
+ }))
+ }
+
+ /// Add an override matcher.
+ ///
+ /// By default, no override matcher is used.
+ ///
+ /// This overrides any previous setting.
+ pub fn overrides(&mut self, overrides: Override) -> &mut IgnoreBuilder {
+ self.overrides = Arc::new(overrides);
+ self
+ }
+
+ /// Add a file type matcher.
+ ///
+ /// By default, no file type matcher is used.
+ ///
+ /// This overrides any previous setting.
+ pub fn types(&mut self, types: Types) -> &mut IgnoreBuilder {
+ self.types = Arc::new(types);
+ self
+ }
+
+ /// Adds a new global ignore matcher from the ignore file path given.
+ pub fn add_ignore(&mut self, ig: Gitignore) -> &mut IgnoreBuilder {
+ self.explicit_ignores.push(ig);
+ self
+ }
+
+ /// Add a custom ignore file name
+ ///
+ /// These ignore files have higher precedence than all other ignore files.
+ ///
+ /// When specifying multiple names, earlier names have lower precedence than
+ /// later names.
+ pub fn add_custom_ignore_filename<S: AsRef<OsStr>>(
+ &mut self,
+ file_name: S,
+ ) -> &mut IgnoreBuilder {
+ self.custom_ignore_filenames.push(file_name.as_ref().to_os_string());
+ self
+ }
+
+ /// Enables ignoring hidden files.
+ ///
+ /// This is enabled by default.
+ pub fn hidden(&mut self, yes: bool) -> &mut IgnoreBuilder {
+ self.opts.hidden = yes;
+ self
+ }
+
+ /// Enables reading `.ignore` files.
+ ///
+ /// `.ignore` files have the same semantics as `gitignore` files and are
+ /// supported by search tools such as ripgrep and The Silver Searcher.
+ ///
+ /// This is enabled by default.
+ pub fn ignore(&mut self, yes: bool) -> &mut IgnoreBuilder {
+ self.opts.ignore = yes;
+ self
+ }
+
+ /// Enables reading ignore files from parent directories.
+ ///
+ /// If this is enabled, then .gitignore files in parent directories of each
+ /// file path given are respected. Otherwise, they are ignored.
+ ///
+ /// This is enabled by default.
+ pub fn parents(&mut self, yes: bool) -> &mut IgnoreBuilder {
+ self.opts.parents = yes;
+ self
+ }
+
+ /// Add a global gitignore matcher.
+ ///
+ /// Its precedence is lower than both normal `.gitignore` files and
+ /// `.git/info/exclude` files.
+ ///
+ /// This overwrites any previous global gitignore setting.
+ ///
+ /// This is enabled by default.
+ pub fn git_global(&mut self, yes: bool) -> &mut IgnoreBuilder {
+ self.opts.git_global = yes;
+ self
+ }
+
+ /// Enables reading `.gitignore` files.
+ ///
+ /// `.gitignore` files have match semantics as described in the `gitignore`
+ /// man page.
+ ///
+ /// This is enabled by default.
+ pub fn git_ignore(&mut self, yes: bool) -> &mut IgnoreBuilder {
+ self.opts.git_ignore = yes;
+ self
+ }
+
+ /// Enables reading `.git/info/exclude` files.
+ ///
+ /// `.git/info/exclude` files have match semantics as described in the
+ /// `gitignore` man page.
+ ///
+ /// This is enabled by default.
+ pub fn git_exclude(&mut self, yes: bool) -> &mut IgnoreBuilder {
+ self.opts.git_exclude = yes;
+ self
+ }
+
+ /// Whether a git repository is required to apply git-related ignore
+ /// rules (global rules, .gitignore and local exclude rules).
+ ///
+ /// When disabled, git-related ignore rules are applied even when searching
+ /// outside a git repository.
+ pub fn require_git(&mut self, yes: bool) -> &mut IgnoreBuilder {
+ self.opts.require_git = yes;
+ self
+ }
+
+ /// Process ignore files case insensitively
+ ///
+ /// This is disabled by default.
+ pub fn ignore_case_insensitive(
+ &mut self,
+ yes: bool,
+ ) -> &mut IgnoreBuilder {
+ self.opts.ignore_case_insensitive = yes;
+ self
+ }
+}
+
+/// Creates a new gitignore matcher for the directory given.
+///
+/// The matcher is meant to match files below `dir`.
+/// Ignore globs are extracted from each of the file names relative to
+/// `dir_for_ignorefile` in the order given (earlier names have lower
+/// precedence than later names).
+///
+/// I/O errors are ignored.
+pub fn create_gitignore<T: AsRef<OsStr>>(
+ dir: &Path,
+ dir_for_ignorefile: &Path,
+ names: &[T],
+ case_insensitive: bool,
+) -> (Gitignore, Option<Error>) {
+ let mut builder = GitignoreBuilder::new(dir);
+ let mut errs = PartialErrorBuilder::default();
+ builder.case_insensitive(case_insensitive).unwrap();
+ for name in names {
+ let gipath = dir_for_ignorefile.join(name.as_ref());
+ // This check is not necessary, but is added for performance. Namely,
+ // a simple stat call checking for existence can often be just a bit
+ // quicker than actually trying to open a file. Since the number of
+ // directories without ignore files likely greatly exceeds the number
+ // with ignore files, this check generally makes sense.
+ //
+ // However, until demonstrated otherwise, we speculatively do not do
+ // this on Windows since Windows is notorious for having slow file
+ // system operations. Namely, it's not clear whether this analysis
+ // makes sense on Windows.
+ //
+ // For more details: https://github.com/BurntSushi/ripgrep/pull/1381
+ if cfg!(windows) || gipath.exists() {
+ errs.maybe_push_ignore_io(builder.add(gipath));
+ }
+ }
+ let gi = match builder.build() {
+ Ok(gi) => gi,
+ Err(err) => {
+ errs.push(err);
+ GitignoreBuilder::new(dir).build().unwrap()
+ }
+ };
+ (gi, errs.into_error_option())
+}
+
+/// Find the GIT_COMMON_DIR for the given git worktree.
+///
+/// This is the directory that may contain a private ignore file
+/// "info/exclude". Unlike git, this function does *not* read environment
+/// variables GIT_DIR and GIT_COMMON_DIR, because it is not clear how to use
+/// them when multiple repositories are searched.
+///
+/// Some I/O errors are ignored.
+fn resolve_git_commondir(
+ dir: &Path,
+ git_type: Option<FileType>,
+) -> Result<PathBuf, Option<Error>> {
+ let git_dir_path = || dir.join(".git");
+ let git_dir = git_dir_path();
+ if !git_type.map_or(false, |ft| ft.is_file()) {
+ return Ok(git_dir);
+ }
+ let file = match File::open(git_dir) {
+ Ok(file) => io::BufReader::new(file),
+ Err(err) => {
+ return Err(Some(Error::Io(err).with_path(git_dir_path())));
+ }
+ };
+ let dot_git_line = match file.lines().next() {
+ Some(Ok(line)) => line,
+ Some(Err(err)) => {
+ return Err(Some(Error::Io(err).with_path(git_dir_path())));
+ }
+ None => return Err(None),
+ };
+ if !dot_git_line.starts_with("gitdir: ") {
+ return Err(None);
+ }
+ let real_git_dir = PathBuf::from(&dot_git_line["gitdir: ".len()..]);
+ let git_commondir_file = || real_git_dir.join("commondir");
+ let file = match File::open(git_commondir_file()) {
+ Ok(file) => io::BufReader::new(file),
+ Err(_) => return Err(None),
+ };
+ let commondir_line = match file.lines().next() {
+ Some(Ok(line)) => line,
+ Some(Err(err)) => {
+ return Err(Some(Error::Io(err).with_path(git_commondir_file())));
+ }
+ None => return Err(None),
+ };
+ let commondir_abs = if commondir_line.starts_with(".") {
+ real_git_dir.join(commondir_line) // relative commondir
+ } else {
+ PathBuf::from(commondir_line)
+ };
+ Ok(commondir_abs)
+}
+
+#[cfg(test)]
+mod tests {
+ use std::fs::{self, File};
+ use std::io::Write;
+ use std::path::Path;
+
+ use crate::dir::IgnoreBuilder;
+ use crate::gitignore::Gitignore;
+ use crate::tests::TempDir;
+ use crate::Error;
+
+ fn wfile<P: AsRef<Path>>(path: P, contents: &str) {
+ let mut file = File::create(path).unwrap();
+ file.write_all(contents.as_bytes()).unwrap();
+ }
+
+ fn mkdirp<P: AsRef<Path>>(path: P) {
+ fs::create_dir_all(path).unwrap();
+ }
+
+ fn partial(err: Error) -> Vec<Error> {
+ match err {
+ Error::Partial(errs) => errs,
+ _ => panic!("expected partial error but got {:?}", err),
+ }
+ }
+
+ fn tmpdir() -> TempDir {
+ TempDir::new().unwrap()
+ }
+
+ #[test]
+ fn explicit_ignore() {
+ let td = tmpdir();
+ wfile(td.path().join("not-an-ignore"), "foo\n!bar");
+
+ let (gi, err) = Gitignore::new(td.path().join("not-an-ignore"));
+ assert!(err.is_none());
+ let (ig, err) =
+ IgnoreBuilder::new().add_ignore(gi).build().add_child(td.path());
+ assert!(err.is_none());
+ assert!(ig.matched("foo", false).is_ignore());
+ assert!(ig.matched("bar", false).is_whitelist());
+ assert!(ig.matched("baz", false).is_none());
+ }
+
+ #[test]
+ fn git_exclude() {
+ let td = tmpdir();
+ mkdirp(td.path().join(".git/info"));
+ wfile(td.path().join(".git/info/exclude"), "foo\n!bar");
+
+ let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
+ assert!(err.is_none());
+ assert!(ig.matched("foo", false).is_ignore());
+ assert!(ig.matched("bar", false).is_whitelist());
+ assert!(ig.matched("baz", false).is_none());
+ }
+
+ #[test]
+ fn gitignore() {
+ let td = tmpdir();
+ mkdirp(td.path().join(".git"));
+ wfile(td.path().join(".gitignore"), "foo\n!bar");
+
+ let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
+ assert!(err.is_none());
+ assert!(ig.matched("foo", false).is_ignore());
+ assert!(ig.matched("bar", false).is_whitelist());
+ assert!(ig.matched("baz", false).is_none());
+ }
+
+ #[test]
+ fn gitignore_no_git() {
+ let td = tmpdir();
+ wfile(td.path().join(".gitignore"), "foo\n!bar");
+
+ let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
+ assert!(err.is_none());
+ assert!(ig.matched("foo", false).is_none());
+ assert!(ig.matched("bar", false).is_none());
+ assert!(ig.matched("baz", false).is_none());
+ }
+
+ #[test]
+ fn gitignore_allowed_no_git() {
+ let td = tmpdir();
+ wfile(td.path().join(".gitignore"), "foo\n!bar");
+
+ let (ig, err) = IgnoreBuilder::new()
+ .require_git(false)
+ .build()
+ .add_child(td.path());
+ assert!(err.is_none());
+ assert!(ig.matched("foo", false).is_ignore());
+ assert!(ig.matched("bar", false).is_whitelist());
+ assert!(ig.matched("baz", false).is_none());
+ }
+
+ #[test]
+ fn ignore() {
+ let td = tmpdir();
+ wfile(td.path().join(".ignore"), "foo\n!bar");
+
+ let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
+ assert!(err.is_none());
+ assert!(ig.matched("foo", false).is_ignore());
+ assert!(ig.matched("bar", false).is_whitelist());
+ assert!(ig.matched("baz", false).is_none());
+ }
+
+ #[test]
+ fn custom_ignore() {
+ let td = tmpdir();
+ let custom_ignore = ".customignore";
+ wfile(td.path().join(custom_ignore), "foo\n!bar");
+
+ let (ig, err) = IgnoreBuilder::new()
+ .add_custom_ignore_filename(custom_ignore)
+ .build()
+ .add_child(td.path());
+ assert!(err.is_none());
+ assert!(ig.matched("foo", false).is_ignore());
+ assert!(ig.matched("bar", false).is_whitelist());
+ assert!(ig.matched("baz", false).is_none());
+ }
+
+ // Tests that a custom ignore file will override an .ignore.
+ #[test]
+ fn custom_ignore_over_ignore() {
+ let td = tmpdir();
+ let custom_ignore = ".customignore";
+ wfile(td.path().join(".ignore"), "foo");
+ wfile(td.path().join(custom_ignore), "!foo");
+
+ let (ig, err) = IgnoreBuilder::new()
+ .add_custom_ignore_filename(custom_ignore)
+ .build()
+ .add_child(td.path());
+ assert!(err.is_none());
+ assert!(ig.matched("foo", false).is_whitelist());
+ }
+
+ // Tests that earlier custom ignore files have lower precedence than later.
+ #[test]
+ fn custom_ignore_precedence() {
+ let td = tmpdir();
+ let custom_ignore1 = ".customignore1";
+ let custom_ignore2 = ".customignore2";
+ wfile(td.path().join(custom_ignore1), "foo");
+ wfile(td.path().join(custom_ignore2), "!foo");
+
+ let (ig, err) = IgnoreBuilder::new()
+ .add_custom_ignore_filename(custom_ignore1)
+ .add_custom_ignore_filename(custom_ignore2)
+ .build()
+ .add_child(td.path());
+ assert!(err.is_none());
+ assert!(ig.matched("foo", false).is_whitelist());
+ }
+
+ // Tests that an .ignore will override a .gitignore.
+ #[test]
+ fn ignore_over_gitignore() {
+ let td = tmpdir();
+ wfile(td.path().join(".gitignore"), "foo");
+ wfile(td.path().join(".ignore"), "!foo");
+
+ let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
+ assert!(err.is_none());
+ assert!(ig.matched("foo", false).is_whitelist());
+ }
+
+ // Tests that exclude has lower precedent than both .ignore and .gitignore.
+ #[test]
+ fn exclude_lowest() {
+ let td = tmpdir();
+ wfile(td.path().join(".gitignore"), "!foo");
+ wfile(td.path().join(".ignore"), "!bar");
+ mkdirp(td.path().join(".git/info"));
+ wfile(td.path().join(".git/info/exclude"), "foo\nbar\nbaz");
+
+ let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
+ assert!(err.is_none());
+ assert!(ig.matched("baz", false).is_ignore());
+ assert!(ig.matched("foo", false).is_whitelist());
+ assert!(ig.matched("bar", false).is_whitelist());
+ }
+
+ #[test]
+ fn errored() {
+ let td = tmpdir();
+ wfile(td.path().join(".gitignore"), "{foo");
+
+ let (_, err) = IgnoreBuilder::new().build().add_child(td.path());
+ assert!(err.is_some());
+ }
+
+ #[test]
+ fn errored_both() {
+ let td = tmpdir();
+ wfile(td.path().join(".gitignore"), "{foo");
+ wfile(td.path().join(".ignore"), "{bar");
+
+ let (_, err) = IgnoreBuilder::new().build().add_child(td.path());
+ assert_eq!(2, partial(err.expect("an error")).len());
+ }
+
+ #[test]
+ fn errored_partial() {
+ let td = tmpdir();
+ mkdirp(td.path().join(".git"));
+ wfile(td.path().join(".gitignore"), "{foo\nbar");
+
+ let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
+ assert!(err.is_some());
+ assert!(ig.matched("bar", false).is_ignore());
+ }
+
+ #[test]
+ fn errored_partial_and_ignore() {
+ let td = tmpdir();
+ wfile(td.path().join(".gitignore"), "{foo\nbar");
+ wfile(td.path().join(".ignore"), "!bar");
+
+ let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
+ assert!(err.is_some());
+ assert!(ig.matched("bar", false).is_whitelist());
+ }
+
+ #[test]
+ fn not_present_empty() {
+ let td = tmpdir();
+
+ let (_, err) = IgnoreBuilder::new().build().add_child(td.path());
+ assert!(err.is_none());
+ }
+
+ #[test]
+ fn stops_at_git_dir() {
+ // This tests that .gitignore files beyond a .git barrier aren't
+ // matched, but .ignore files are.
+ let td = tmpdir();
+ mkdirp(td.path().join(".git"));
+ mkdirp(td.path().join("foo/.git"));
+ wfile(td.path().join(".gitignore"), "foo");
+ wfile(td.path().join(".ignore"), "bar");
+
+ let ig0 = IgnoreBuilder::new().build();
+ let (ig1, err) = ig0.add_child(td.path());
+ assert!(err.is_none());
+ let (ig2, err) = ig1.add_child(ig1.path().join("foo"));
+ assert!(err.is_none());
+
+ assert!(ig1.matched("foo", false).is_ignore());
+ assert!(ig2.matched("foo", false).is_none());
+
+ assert!(ig1.matched("bar", false).is_ignore());
+ assert!(ig2.matched("bar", false).is_ignore());
+ }
+
+ #[test]
+ fn absolute_parent() {
+ let td = tmpdir();
+ mkdirp(td.path().join(".git"));
+ mkdirp(td.path().join("foo"));
+ wfile(td.path().join(".gitignore"), "bar");
+
+ // First, check that the parent gitignore file isn't detected if the
+ // parent isn't added. This establishes a baseline.
+ let ig0 = IgnoreBuilder::new().build();
+ let (ig1, err) = ig0.add_child(td.path().join("foo"));
+ assert!(err.is_none());
+ assert!(ig1.matched("bar", false).is_none());
+
+ // Second, check that adding a parent directory actually works.
+ let ig0 = IgnoreBuilder::new().build();
+ let (ig1, err) = ig0.add_parents(td.path().join("foo"));
+ assert!(err.is_none());
+ let (ig2, err) = ig1.add_child(td.path().join("foo"));
+ assert!(err.is_none());
+ assert!(ig2.matched("bar", false).is_ignore());
+ }
+
+ #[test]
+ fn absolute_parent_anchored() {
+ let td = tmpdir();
+ mkdirp(td.path().join(".git"));
+ mkdirp(td.path().join("src/llvm"));
+ wfile(td.path().join(".gitignore"), "/llvm/\nfoo");
+
+ let ig0 = IgnoreBuilder::new().build();
+ let (ig1, err) = ig0.add_parents(td.path().join("src"));
+ assert!(err.is_none());
+ let (ig2, err) = ig1.add_child("src");
+ assert!(err.is_none());
+
+ assert!(ig1.matched("llvm", true).is_none());
+ assert!(ig2.matched("llvm", true).is_none());
+ assert!(ig2.matched("src/llvm", true).is_none());
+ assert!(ig2.matched("foo", false).is_ignore());
+ assert!(ig2.matched("src/foo", false).is_ignore());
+ }
+
+ #[test]
+ fn git_info_exclude_in_linked_worktree() {
+ let td = tmpdir();
+ let git_dir = td.path().join(".git");
+ mkdirp(git_dir.join("info"));
+ wfile(git_dir.join("info/exclude"), "ignore_me");
+ mkdirp(git_dir.join("worktrees/linked-worktree"));
+ let commondir_path =
+ || git_dir.join("worktrees/linked-worktree/commondir");
+ mkdirp(td.path().join("linked-worktree"));
+ let worktree_git_dir_abs = format!(
+ "gitdir: {}",
+ git_dir.join("worktrees/linked-worktree").to_str().unwrap(),
+ );
+ wfile(td.path().join("linked-worktree/.git"), &worktree_git_dir_abs);
+
+ // relative commondir
+ wfile(commondir_path(), "../..");
+ let ib = IgnoreBuilder::new().build();
+ let (ignore, err) = ib.add_child(td.path().join("linked-worktree"));
+ assert!(err.is_none());
+ assert!(ignore.matched("ignore_me", false).is_ignore());
+
+ // absolute commondir
+ wfile(commondir_path(), git_dir.to_str().unwrap());
+ let (ignore, err) = ib.add_child(td.path().join("linked-worktree"));
+ assert!(err.is_none());
+ assert!(ignore.matched("ignore_me", false).is_ignore());
+
+ // missing commondir file
+ assert!(fs::remove_file(commondir_path()).is_ok());
+ let (_, err) = ib.add_child(td.path().join("linked-worktree"));
+ // We squash the error in this case, because it occurs in repositories
+ // that are not linked worktrees but have submodules.
+ assert!(err.is_none());
+
+ wfile(td.path().join("linked-worktree/.git"), "garbage");
+ let (_, err) = ib.add_child(td.path().join("linked-worktree"));
+ assert!(err.is_none());
+
+ wfile(td.path().join("linked-worktree/.git"), "gitdir: garbage");
+ let (_, err) = ib.add_child(td.path().join("linked-worktree"));
+ assert!(err.is_none());
+ }
+}
diff --git a/vendor/ignore/src/gitignore.rs b/vendor/ignore/src/gitignore.rs
new file mode 100644
index 0000000..3c7ba5e
--- /dev/null
+++ b/vendor/ignore/src/gitignore.rs
@@ -0,0 +1,789 @@
+/*!
+The gitignore module provides a way to match globs from a gitignore file
+against file paths.
+
+Note that this module implements the specification as described in the
+`gitignore` man page from scratch. That is, this module does *not* shell out to
+the `git` command line tool.
+*/
+
+use std::cell::RefCell;
+use std::env;
+use std::fs::File;
+use std::io::{self, BufRead, Read};
+use std::path::{Path, PathBuf};
+use std::str;
+use std::sync::Arc;
+
+use globset::{Candidate, GlobBuilder, GlobSet, GlobSetBuilder};
+use regex::bytes::Regex;
+use thread_local::ThreadLocal;
+
+use crate::pathutil::{is_file_name, strip_prefix};
+use crate::{Error, Match, PartialErrorBuilder};
+
+/// Glob represents a single glob in a gitignore file.
+///
+/// This is used to report information about the highest precedent glob that
+/// matched in one or more gitignore files.
+#[derive(Clone, Debug)]
+pub struct Glob {
+ /// The file path that this glob was extracted from.
+ from: Option<PathBuf>,
+ /// The original glob string.
+ original: String,
+ /// The actual glob string used to convert to a regex.
+ actual: String,
+ /// Whether this is a whitelisted glob or not.
+ is_whitelist: bool,
+ /// Whether this glob should only match directories or not.
+ is_only_dir: bool,
+}
+
+impl Glob {
+ /// Returns the file path that defined this glob.
+ pub fn from(&self) -> Option<&Path> {
+ self.from.as_ref().map(|p| &**p)
+ }
+
+ /// The original glob as it was defined in a gitignore file.
+ pub fn original(&self) -> &str {
+ &self.original
+ }
+
+ /// The actual glob that was compiled to respect gitignore
+ /// semantics.
+ pub fn actual(&self) -> &str {
+ &self.actual
+ }
+
+ /// Whether this was a whitelisted glob or not.
+ pub fn is_whitelist(&self) -> bool {
+ self.is_whitelist
+ }
+
+ /// Whether this glob must match a directory or not.
+ pub fn is_only_dir(&self) -> bool {
+ self.is_only_dir
+ }
+
+ /// Returns true if and only if this glob has a `**/` prefix.
+ fn has_doublestar_prefix(&self) -> bool {
+ self.actual.starts_with("**/") || self.actual == "**"
+ }
+}
+
+/// Gitignore is a matcher for the globs in one or more gitignore files
+/// in the same directory.
+#[derive(Clone, Debug)]
+pub struct Gitignore {
+ set: GlobSet,
+ root: PathBuf,
+ globs: Vec<Glob>,
+ num_ignores: u64,
+ num_whitelists: u64,
+ matches: Option<Arc<ThreadLocal<RefCell<Vec<usize>>>>>,
+}
+
+impl Gitignore {
+ /// Creates a new gitignore matcher from the gitignore file path given.
+ ///
+ /// If it's desirable to include multiple gitignore files in a single
+ /// matcher, or read gitignore globs from a different source, then
+ /// use `GitignoreBuilder`.
+ ///
+ /// This always returns a valid matcher, even if it's empty. In particular,
+ /// a Gitignore file can be partially valid, e.g., when one glob is invalid
+ /// but the rest aren't.
+ ///
+ /// Note that I/O errors are ignored. For more granular control over
+ /// errors, use `GitignoreBuilder`.
+ pub fn new<P: AsRef<Path>>(
+ gitignore_path: P,
+ ) -> (Gitignore, Option<Error>) {
+ let path = gitignore_path.as_ref();
+ let parent = path.parent().unwrap_or(Path::new("/"));
+ let mut builder = GitignoreBuilder::new(parent);
+ let mut errs = PartialErrorBuilder::default();
+ errs.maybe_push_ignore_io(builder.add(path));
+ match builder.build() {
+ Ok(gi) => (gi, errs.into_error_option()),
+ Err(err) => {
+ errs.push(err);
+ (Gitignore::empty(), errs.into_error_option())
+ }
+ }
+ }
+
+ /// Creates a new gitignore matcher from the global ignore file, if one
+ /// exists.
+ ///
+ /// The global config file path is specified by git's `core.excludesFile`
+ /// config option.
+ ///
+ /// Git's config file location is `$HOME/.gitconfig`. If `$HOME/.gitconfig`
+ /// does not exist or does not specify `core.excludesFile`, then
+ /// `$XDG_CONFIG_HOME/git/ignore` is read. If `$XDG_CONFIG_HOME` is not
+ /// set or is empty, then `$HOME/.config/git/ignore` is used instead.
+ pub fn global() -> (Gitignore, Option<Error>) {
+ GitignoreBuilder::new("").build_global()
+ }
+
+ /// Creates a new empty gitignore matcher that never matches anything.
+ ///
+ /// Its path is empty.
+ pub fn empty() -> Gitignore {
+ Gitignore {
+ set: GlobSet::empty(),
+ root: PathBuf::from(""),
+ globs: vec![],
+ num_ignores: 0,
+ num_whitelists: 0,
+ matches: None,
+ }
+ }
+
+ /// Returns the directory containing this gitignore matcher.
+ ///
+ /// All matches are done relative to this path.
+ pub fn path(&self) -> &Path {
+ &*self.root
+ }
+
+ /// Returns true if and only if this gitignore has zero globs, and
+ /// therefore never matches any file path.
+ pub fn is_empty(&self) -> bool {
+ self.set.is_empty()
+ }
+
+ /// Returns the total number of globs, which should be equivalent to
+ /// `num_ignores + num_whitelists`.
+ pub fn len(&self) -> usize {
+ self.set.len()
+ }
+
+ /// Returns the total number of ignore globs.
+ pub fn num_ignores(&self) -> u64 {
+ self.num_ignores
+ }
+
+ /// Returns the total number of whitelisted globs.
+ pub fn num_whitelists(&self) -> u64 {
+ self.num_whitelists
+ }
+
+ /// Returns whether the given path (file or directory) matched a pattern in
+ /// this gitignore matcher.
+ ///
+ /// `is_dir` should be true if the path refers to a directory and false
+ /// otherwise.
+ ///
+ /// The given path is matched relative to the path given when building
+ /// the matcher. Specifically, before matching `path`, its prefix (as
+ /// determined by a common suffix of the directory containing this
+ /// gitignore) is stripped. If there is no common suffix/prefix overlap,
+ /// then `path` is assumed to be relative to this matcher.
+ pub fn matched<P: AsRef<Path>>(
+ &self,
+ path: P,
+ is_dir: bool,
+ ) -> Match<&Glob> {
+ if self.is_empty() {
+ return Match::None;
+ }
+ self.matched_stripped(self.strip(path.as_ref()), is_dir)
+ }
+
+ /// Returns whether the given path (file or directory, and expected to be
+ /// under the root) or any of its parent directories (up to the root)
+ /// matched a pattern in this gitignore matcher.
+ ///
+ /// NOTE: This method is more expensive than walking the directory hierarchy
+ /// top-to-bottom and matching the entries. But, is easier to use in cases
+ /// when a list of paths are available without a hierarchy.
+ ///
+ /// `is_dir` should be true if the path refers to a directory and false
+ /// otherwise.
+ ///
+ /// The given path is matched relative to the path given when building
+ /// the matcher. Specifically, before matching `path`, its prefix (as
+ /// determined by a common suffix of the directory containing this
+ /// gitignore) is stripped. If there is no common suffix/prefix overlap,
+ /// then `path` is assumed to be relative to this matcher.
+ ///
+ /// # Panics
+ ///
+ /// This method panics if the given file path is not under the root path
+ /// of this matcher.
+ pub fn matched_path_or_any_parents<P: AsRef<Path>>(
+ &self,
+ path: P,
+ is_dir: bool,
+ ) -> Match<&Glob> {
+ if self.is_empty() {
+ return Match::None;
+ }
+ let mut path = self.strip(path.as_ref());
+ assert!(!path.has_root(), "path is expected to be under the root");
+
+ match self.matched_stripped(path, is_dir) {
+ Match::None => (), // walk up
+ a_match => return a_match,
+ }
+ while let Some(parent) = path.parent() {
+ match self.matched_stripped(parent, /* is_dir */ true) {
+ Match::None => path = parent, // walk up
+ a_match => return a_match,
+ }
+ }
+ Match::None
+ }
+
+ /// Like matched, but takes a path that has already been stripped.
+ fn matched_stripped<P: AsRef<Path>>(
+ &self,
+ path: P,
+ is_dir: bool,
+ ) -> Match<&Glob> {
+ if self.is_empty() {
+ return Match::None;
+ }
+ let path = path.as_ref();
+ let _matches = self.matches.as_ref().unwrap().get_or_default();
+ let mut matches = _matches.borrow_mut();
+ let candidate = Candidate::new(path);
+ self.set.matches_candidate_into(&candidate, &mut *matches);
+ for &i in matches.iter().rev() {
+ let glob = &self.globs[i];
+ if !glob.is_only_dir() || is_dir {
+ return if glob.is_whitelist() {
+ Match::Whitelist(glob)
+ } else {
+ Match::Ignore(glob)
+ };
+ }
+ }
+ Match::None
+ }
+
+ /// Strips the given path such that it's suitable for matching with this
+ /// gitignore matcher.
+ fn strip<'a, P: 'a + AsRef<Path> + ?Sized>(
+ &'a self,
+ path: &'a P,
+ ) -> &'a Path {
+ let mut path = path.as_ref();
+ // A leading ./ is completely superfluous. We also strip it from
+ // our gitignore root path, so we need to strip it from our candidate
+ // path too.
+ if let Some(p) = strip_prefix("./", path) {
+ path = p;
+ }
+ // Strip any common prefix between the candidate path and the root
+ // of the gitignore, to make sure we get relative matching right.
+ // BUT, a file name might not have any directory components to it,
+ // in which case, we don't want to accidentally strip any part of the
+ // file name.
+ //
+ // As an additional special case, if the root is just `.`, then we
+ // shouldn't try to strip anything, e.g., when path begins with a `.`.
+ if self.root != Path::new(".") && !is_file_name(path) {
+ if let Some(p) = strip_prefix(&self.root, path) {
+ path = p;
+ // If we're left with a leading slash, get rid of it.
+ if let Some(p) = strip_prefix("/", path) {
+ path = p;
+ }
+ }
+ }
+ path
+ }
+}
+
+/// Builds a matcher for a single set of globs from a .gitignore file.
+#[derive(Clone, Debug)]
+pub struct GitignoreBuilder {
+ builder: GlobSetBuilder,
+ root: PathBuf,
+ globs: Vec<Glob>,
+ case_insensitive: bool,
+}
+
+impl GitignoreBuilder {
+ /// Create a new builder for a gitignore file.
+ ///
+ /// The path given should be the path at which the globs for this gitignore
+ /// file should be matched. Note that paths are always matched relative
+ /// to the root path given here. Generally, the root path should correspond
+ /// to the *directory* containing a `.gitignore` file.
+ pub fn new<P: AsRef<Path>>(root: P) -> GitignoreBuilder {
+ let root = root.as_ref();
+ GitignoreBuilder {
+ builder: GlobSetBuilder::new(),
+ root: strip_prefix("./", root).unwrap_or(root).to_path_buf(),
+ globs: vec![],
+ case_insensitive: false,
+ }
+ }
+
+ /// Builds a new matcher from the globs added so far.
+ ///
+ /// Once a matcher is built, no new globs can be added to it.
+ pub fn build(&self) -> Result<Gitignore, Error> {
+ let nignore = self.globs.iter().filter(|g| !g.is_whitelist()).count();
+ let nwhite = self.globs.iter().filter(|g| g.is_whitelist()).count();
+ let set = self
+ .builder
+ .build()
+ .map_err(|err| Error::Glob { glob: None, err: err.to_string() })?;
+ Ok(Gitignore {
+ set: set,
+ root: self.root.clone(),
+ globs: self.globs.clone(),
+ num_ignores: nignore as u64,
+ num_whitelists: nwhite as u64,
+ matches: Some(Arc::new(ThreadLocal::default())),
+ })
+ }
+
+ /// Build a global gitignore matcher using the configuration in this
+ /// builder.
+ ///
+ /// This consumes ownership of the builder unlike `build` because it
+ /// must mutate the builder to add the global gitignore globs.
+ ///
+ /// Note that this ignores the path given to this builder's constructor
+ /// and instead derives the path automatically from git's global
+ /// configuration.
+ pub fn build_global(mut self) -> (Gitignore, Option<Error>) {
+ match gitconfig_excludes_path() {
+ None => (Gitignore::empty(), None),
+ Some(path) => {
+ if !path.is_file() {
+ (Gitignore::empty(), None)
+ } else {
+ let mut errs = PartialErrorBuilder::default();
+ errs.maybe_push_ignore_io(self.add(path));
+ match self.build() {
+ Ok(gi) => (gi, errs.into_error_option()),
+ Err(err) => {
+ errs.push(err);
+ (Gitignore::empty(), errs.into_error_option())
+ }
+ }
+ }
+ }
+ }
+ }
+
+ /// Add each glob from the file path given.
+ ///
+ /// The file given should be formatted as a `gitignore` file.
+ ///
+ /// Note that partial errors can be returned. For example, if there was
+ /// a problem adding one glob, an error for that will be returned, but
+ /// all other valid globs will still be added.
+ pub fn add<P: AsRef<Path>>(&mut self, path: P) -> Option<Error> {
+ let path = path.as_ref();
+ let file = match File::open(path) {
+ Err(err) => return Some(Error::Io(err).with_path(path)),
+ Ok(file) => file,
+ };
+ let rdr = io::BufReader::new(file);
+ let mut errs = PartialErrorBuilder::default();
+ for (i, line) in rdr.lines().enumerate() {
+ let lineno = (i + 1) as u64;
+ let line = match line {
+ Ok(line) => line,
+ Err(err) => {
+ errs.push(Error::Io(err).tagged(path, lineno));
+ break;
+ }
+ };
+ if let Err(err) = self.add_line(Some(path.to_path_buf()), &line) {
+ errs.push(err.tagged(path, lineno));
+ }
+ }
+ errs.into_error_option()
+ }
+
+ /// Add each glob line from the string given.
+ ///
+ /// If this string came from a particular `gitignore` file, then its path
+ /// should be provided here.
+ ///
+ /// The string given should be formatted as a `gitignore` file.
+ #[cfg(test)]
+ fn add_str(
+ &mut self,
+ from: Option<PathBuf>,
+ gitignore: &str,
+ ) -> Result<&mut GitignoreBuilder, Error> {
+ for line in gitignore.lines() {
+ self.add_line(from.clone(), line)?;
+ }
+ Ok(self)
+ }
+
+ /// Add a line from a gitignore file to this builder.
+ ///
+ /// If this line came from a particular `gitignore` file, then its path
+ /// should be provided here.
+ ///
+ /// If the line could not be parsed as a glob, then an error is returned.
+ pub fn add_line(
+ &mut self,
+ from: Option<PathBuf>,
+ mut line: &str,
+ ) -> Result<&mut GitignoreBuilder, Error> {
+ #![allow(deprecated)]
+
+ if line.starts_with("#") {
+ return Ok(self);
+ }
+ if !line.ends_with("\\ ") {
+ line = line.trim_right();
+ }
+ if line.is_empty() {
+ return Ok(self);
+ }
+ let mut glob = Glob {
+ from: from,
+ original: line.to_string(),
+ actual: String::new(),
+ is_whitelist: false,
+ is_only_dir: false,
+ };
+ let mut is_absolute = false;
+ if line.starts_with("\\!") || line.starts_with("\\#") {
+ line = &line[1..];
+ is_absolute = line.chars().nth(0) == Some('/');
+ } else {
+ if line.starts_with("!") {
+ glob.is_whitelist = true;
+ line = &line[1..];
+ }
+ if line.starts_with("/") {
+ // `man gitignore` says that if a glob starts with a slash,
+ // then the glob can only match the beginning of a path
+ // (relative to the location of gitignore). We achieve this by
+ // simply banning wildcards from matching /.
+ line = &line[1..];
+ is_absolute = true;
+ }
+ }
+ // If it ends with a slash, then this should only match directories,
+ // but the slash should otherwise not be used while globbing.
+ if line.as_bytes().last() == Some(&b'/') {
+ glob.is_only_dir = true;
+ line = &line[..line.len() - 1];
+ // If the slash was escaped, then remove the escape.
+ // See: https://github.com/BurntSushi/ripgrep/issues/2236
+ if line.as_bytes().last() == Some(&b'\\') {
+ line = &line[..line.len() - 1];
+ }
+ }
+ glob.actual = line.to_string();
+ // If there is a literal slash, then this is a glob that must match the
+ // entire path name. Otherwise, we should let it match anywhere, so use
+ // a **/ prefix.
+ if !is_absolute && !line.chars().any(|c| c == '/') {
+ // ... but only if we don't already have a **/ prefix.
+ if !glob.has_doublestar_prefix() {
+ glob.actual = format!("**/{}", glob.actual);
+ }
+ }
+ // If the glob ends with `/**`, then we should only match everything
+ // inside a directory, but not the directory itself. Standard globs
+ // will match the directory. So we add `/*` to force the issue.
+ if glob.actual.ends_with("/**") {
+ glob.actual = format!("{}/*", glob.actual);
+ }
+ let parsed = GlobBuilder::new(&glob.actual)
+ .literal_separator(true)
+ .case_insensitive(self.case_insensitive)
+ .backslash_escape(true)
+ .build()
+ .map_err(|err| Error::Glob {
+ glob: Some(glob.original.clone()),
+ err: err.kind().to_string(),
+ })?;
+ self.builder.add(parsed);
+ self.globs.push(glob);
+ Ok(self)
+ }
+
+ /// Toggle whether the globs should be matched case insensitively or not.
+ ///
+ /// When this option is changed, only globs added after the change will be
+ /// affected.
+ ///
+ /// This is disabled by default.
+ pub fn case_insensitive(
+ &mut self,
+ yes: bool,
+ ) -> Result<&mut GitignoreBuilder, Error> {
+ // TODO: This should not return a `Result`. Fix this in the next semver
+ // release.
+ self.case_insensitive = yes;
+ Ok(self)
+ }
+}
+
+/// Return the file path of the current environment's global gitignore file.
+///
+/// Note that the file path returned may not exist.
+fn gitconfig_excludes_path() -> Option<PathBuf> {
+ // git supports $HOME/.gitconfig and $XDG_CONFIG_HOME/git/config. Notably,
+ // both can be active at the same time, where $HOME/.gitconfig takes
+ // precedent. So if $HOME/.gitconfig defines a `core.excludesFile`, then
+ // we're done.
+ match gitconfig_home_contents().and_then(|x| parse_excludes_file(&x)) {
+ Some(path) => return Some(path),
+ None => {}
+ }
+ match gitconfig_xdg_contents().and_then(|x| parse_excludes_file(&x)) {
+ Some(path) => return Some(path),
+ None => {}
+ }
+ excludes_file_default()
+}
+
+/// Returns the file contents of git's global config file, if one exists, in
+/// the user's home directory.
+fn gitconfig_home_contents() -> Option<Vec<u8>> {
+ let home = match home_dir() {
+ None => return None,
+ Some(home) => home,
+ };
+ let mut file = match File::open(home.join(".gitconfig")) {
+ Err(_) => return None,
+ Ok(file) => io::BufReader::new(file),
+ };
+ let mut contents = vec![];
+ file.read_to_end(&mut contents).ok().map(|_| contents)
+}
+
+/// Returns the file contents of git's global config file, if one exists, in
+/// the user's XDG_CONFIG_HOME directory.
+fn gitconfig_xdg_contents() -> Option<Vec<u8>> {
+ let path = env::var_os("XDG_CONFIG_HOME")
+ .and_then(|x| if x.is_empty() { None } else { Some(PathBuf::from(x)) })
+ .or_else(|| home_dir().map(|p| p.join(".config")))
+ .map(|x| x.join("git/config"));
+ let mut file = match path.and_then(|p| File::open(p).ok()) {
+ None => return None,
+ Some(file) => io::BufReader::new(file),
+ };
+ let mut contents = vec![];
+ file.read_to_end(&mut contents).ok().map(|_| contents)
+}
+
+/// Returns the default file path for a global .gitignore file.
+///
+/// Specifically, this respects XDG_CONFIG_HOME.
+fn excludes_file_default() -> Option<PathBuf> {
+ env::var_os("XDG_CONFIG_HOME")
+ .and_then(|x| if x.is_empty() { None } else { Some(PathBuf::from(x)) })
+ .or_else(|| home_dir().map(|p| p.join(".config")))
+ .map(|x| x.join("git/ignore"))
+}
+
+/// Extract git's `core.excludesfile` config setting from the raw file contents
+/// given.
+fn parse_excludes_file(data: &[u8]) -> Option<PathBuf> {
+ // N.B. This is the lazy approach, and isn't technically correct, but
+ // probably works in more circumstances. I guess we would ideally have
+ // a full INI parser. Yuck.
+ lazy_static::lazy_static! {
+ static ref RE: Regex =
+ Regex::new(r"(?im)^\s*excludesfile\s*=\s*(.+)\s*$").unwrap();
+ };
+ let caps = match RE.captures(data) {
+ None => return None,
+ Some(caps) => caps,
+ };
+ str::from_utf8(&caps[1]).ok().map(|s| PathBuf::from(expand_tilde(s)))
+}
+
+/// Expands ~ in file paths to the value of $HOME.
+fn expand_tilde(path: &str) -> String {
+ let home = match home_dir() {
+ None => return path.to_string(),
+ Some(home) => home.to_string_lossy().into_owned(),
+ };
+ path.replace("~", &home)
+}
+
+/// Returns the location of the user's home directory.
+fn home_dir() -> Option<PathBuf> {
+ // We're fine with using env::home_dir for now. Its bugs are, IMO, pretty
+ // minor corner cases. We should still probably eventually migrate to
+ // the `dirs` crate to get a proper implementation.
+ #![allow(deprecated)]
+ env::home_dir()
+}
+
+#[cfg(test)]
+mod tests {
+ use super::{Gitignore, GitignoreBuilder};
+ use std::path::Path;
+
+ fn gi_from_str<P: AsRef<Path>>(root: P, s: &str) -> Gitignore {
+ let mut builder = GitignoreBuilder::new(root);
+ builder.add_str(None, s).unwrap();
+ builder.build().unwrap()
+ }
+
+ macro_rules! ignored {
+ ($name:ident, $root:expr, $gi:expr, $path:expr) => {
+ ignored!($name, $root, $gi, $path, false);
+ };
+ ($name:ident, $root:expr, $gi:expr, $path:expr, $is_dir:expr) => {
+ #[test]
+ fn $name() {
+ let gi = gi_from_str($root, $gi);
+ assert!(gi.matched($path, $is_dir).is_ignore());
+ }
+ };
+ }
+
+ macro_rules! not_ignored {
+ ($name:ident, $root:expr, $gi:expr, $path:expr) => {
+ not_ignored!($name, $root, $gi, $path, false);
+ };
+ ($name:ident, $root:expr, $gi:expr, $path:expr, $is_dir:expr) => {
+ #[test]
+ fn $name() {
+ let gi = gi_from_str($root, $gi);
+ assert!(!gi.matched($path, $is_dir).is_ignore());
+ }
+ };
+ }
+
+ const ROOT: &'static str = "/home/foobar/rust/rg";
+
+ ignored!(ig1, ROOT, "months", "months");
+ ignored!(ig2, ROOT, "*.lock", "Cargo.lock");
+ ignored!(ig3, ROOT, "*.rs", "src/main.rs");
+ ignored!(ig4, ROOT, "src/*.rs", "src/main.rs");
+ ignored!(ig5, ROOT, "/*.c", "cat-file.c");
+ ignored!(ig6, ROOT, "/src/*.rs", "src/main.rs");
+ ignored!(ig7, ROOT, "!src/main.rs\n*.rs", "src/main.rs");
+ ignored!(ig8, ROOT, "foo/", "foo", true);
+ ignored!(ig9, ROOT, "**/foo", "foo");
+ ignored!(ig10, ROOT, "**/foo", "src/foo");
+ ignored!(ig11, ROOT, "**/foo/**", "src/foo/bar");
+ ignored!(ig12, ROOT, "**/foo/**", "wat/src/foo/bar/baz");
+ ignored!(ig13, ROOT, "**/foo/bar", "foo/bar");
+ ignored!(ig14, ROOT, "**/foo/bar", "src/foo/bar");
+ ignored!(ig15, ROOT, "abc/**", "abc/x");
+ ignored!(ig16, ROOT, "abc/**", "abc/x/y");
+ ignored!(ig17, ROOT, "abc/**", "abc/x/y/z");
+ ignored!(ig18, ROOT, "a/**/b", "a/b");
+ ignored!(ig19, ROOT, "a/**/b", "a/x/b");
+ ignored!(ig20, ROOT, "a/**/b", "a/x/y/b");
+ ignored!(ig21, ROOT, r"\!xy", "!xy");
+ ignored!(ig22, ROOT, r"\#foo", "#foo");
+ ignored!(ig23, ROOT, "foo", "./foo");
+ ignored!(ig24, ROOT, "target", "grep/target");
+ ignored!(ig25, ROOT, "Cargo.lock", "./tabwriter-bin/Cargo.lock");
+ ignored!(ig26, ROOT, "/foo/bar/baz", "./foo/bar/baz");
+ ignored!(ig27, ROOT, "foo/", "xyz/foo", true);
+ ignored!(ig28, "./src", "/llvm/", "./src/llvm", true);
+ ignored!(ig29, ROOT, "node_modules/ ", "node_modules", true);
+ ignored!(ig30, ROOT, "**/", "foo/bar", true);
+ ignored!(ig31, ROOT, "path1/*", "path1/foo");
+ ignored!(ig32, ROOT, ".a/b", ".a/b");
+ ignored!(ig33, "./", ".a/b", ".a/b");
+ ignored!(ig34, ".", ".a/b", ".a/b");
+ ignored!(ig35, "./.", ".a/b", ".a/b");
+ ignored!(ig36, "././", ".a/b", ".a/b");
+ ignored!(ig37, "././.", ".a/b", ".a/b");
+ ignored!(ig38, ROOT, "\\[", "[");
+ ignored!(ig39, ROOT, "\\?", "?");
+ ignored!(ig40, ROOT, "\\*", "*");
+ ignored!(ig41, ROOT, "\\a", "a");
+ ignored!(ig42, ROOT, "s*.rs", "sfoo.rs");
+ ignored!(ig43, ROOT, "**", "foo.rs");
+ ignored!(ig44, ROOT, "**/**/*", "a/foo.rs");
+
+ not_ignored!(ignot1, ROOT, "amonths", "months");
+ not_ignored!(ignot2, ROOT, "monthsa", "months");
+ not_ignored!(ignot3, ROOT, "/src/*.rs", "src/grep/src/main.rs");
+ not_ignored!(ignot4, ROOT, "/*.c", "mozilla-sha1/sha1.c");
+ not_ignored!(ignot5, ROOT, "/src/*.rs", "src/grep/src/main.rs");
+ not_ignored!(ignot6, ROOT, "*.rs\n!src/main.rs", "src/main.rs");
+ not_ignored!(ignot7, ROOT, "foo/", "foo", false);
+ not_ignored!(ignot8, ROOT, "**/foo/**", "wat/src/afoo/bar/baz");
+ not_ignored!(ignot9, ROOT, "**/foo/**", "wat/src/fooa/bar/baz");
+ not_ignored!(ignot10, ROOT, "**/foo/bar", "foo/src/bar");
+ not_ignored!(ignot11, ROOT, "#foo", "#foo");
+ not_ignored!(ignot12, ROOT, "\n\n\n", "foo");
+ not_ignored!(ignot13, ROOT, "foo/**", "foo", true);
+ not_ignored!(
+ ignot14,
+ "./third_party/protobuf",
+ "m4/ltoptions.m4",
+ "./third_party/protobuf/csharp/src/packages/repositories.config"
+ );
+ not_ignored!(ignot15, ROOT, "!/bar", "foo/bar");
+ not_ignored!(ignot16, ROOT, "*\n!**/", "foo", true);
+ not_ignored!(ignot17, ROOT, "src/*.rs", "src/grep/src/main.rs");
+ not_ignored!(ignot18, ROOT, "path1/*", "path2/path1/foo");
+ not_ignored!(ignot19, ROOT, "s*.rs", "src/foo.rs");
+
+ fn bytes(s: &str) -> Vec<u8> {
+ s.to_string().into_bytes()
+ }
+
+ fn path_string<P: AsRef<Path>>(path: P) -> String {
+ path.as_ref().to_str().unwrap().to_string()
+ }
+
+ #[test]
+ fn parse_excludes_file1() {
+ let data = bytes("[core]\nexcludesFile = /foo/bar");
+ let got = super::parse_excludes_file(&data).unwrap();
+ assert_eq!(path_string(got), "/foo/bar");
+ }
+
+ #[test]
+ fn parse_excludes_file2() {
+ let data = bytes("[core]\nexcludesFile = ~/foo/bar");
+ let got = super::parse_excludes_file(&data).unwrap();
+ assert_eq!(path_string(got), super::expand_tilde("~/foo/bar"));
+ }
+
+ #[test]
+ fn parse_excludes_file3() {
+ let data = bytes("[core]\nexcludeFile = /foo/bar");
+ assert!(super::parse_excludes_file(&data).is_none());
+ }
+
+ // See: https://github.com/BurntSushi/ripgrep/issues/106
+ #[test]
+ fn regression_106() {
+ gi_from_str("/", " ");
+ }
+
+ #[test]
+ fn case_insensitive() {
+ let gi = GitignoreBuilder::new(ROOT)
+ .case_insensitive(true)
+ .unwrap()
+ .add_str(None, "*.html")
+ .unwrap()
+ .build()
+ .unwrap();
+ assert!(gi.matched("foo.html", false).is_ignore());
+ assert!(gi.matched("foo.HTML", false).is_ignore());
+ assert!(!gi.matched("foo.htm", false).is_ignore());
+ assert!(!gi.matched("foo.HTM", false).is_ignore());
+ }
+
+ ignored!(cs1, ROOT, "*.html", "foo.html");
+ not_ignored!(cs2, ROOT, "*.html", "foo.HTML");
+ not_ignored!(cs3, ROOT, "*.html", "foo.htm");
+ not_ignored!(cs4, ROOT, "*.html", "foo.HTM");
+}
diff --git a/vendor/ignore/src/lib.rs b/vendor/ignore/src/lib.rs
new file mode 100644
index 0000000..824f7c4
--- /dev/null
+++ b/vendor/ignore/src/lib.rs
@@ -0,0 +1,550 @@
+/*!
+The ignore crate provides a fast recursive directory iterator that respects
+various filters such as globs, file types and `.gitignore` files. The precise
+matching rules and precedence is explained in the documentation for
+`WalkBuilder`.
+
+Secondarily, this crate exposes gitignore and file type matchers for use cases
+that demand more fine-grained control.
+
+# Example
+
+This example shows the most basic usage of this crate. This code will
+recursively traverse the current directory while automatically filtering out
+files and directories according to ignore globs found in files like
+`.ignore` and `.gitignore`:
+
+
+```rust,no_run
+use ignore::Walk;
+
+for result in Walk::new("./") {
+ // Each item yielded by the iterator is either a directory entry or an
+ // error, so either print the path or the error.
+ match result {
+ Ok(entry) => println!("{}", entry.path().display()),
+ Err(err) => println!("ERROR: {}", err),
+ }
+}
+```
+
+# Example: advanced
+
+By default, the recursive directory iterator will ignore hidden files and
+directories. This can be disabled by building the iterator with `WalkBuilder`:
+
+```rust,no_run
+use ignore::WalkBuilder;
+
+for result in WalkBuilder::new("./").hidden(false).build() {
+ println!("{:?}", result);
+}
+```
+
+See the documentation for `WalkBuilder` for many other options.
+*/
+
+#![deny(missing_docs)]
+
+use std::error;
+use std::fmt;
+use std::io;
+use std::path::{Path, PathBuf};
+
+pub use crate::walk::{
+ DirEntry, ParallelVisitor, ParallelVisitorBuilder, Walk, WalkBuilder,
+ WalkParallel, WalkState,
+};
+
+mod default_types;
+mod dir;
+pub mod gitignore;
+pub mod overrides;
+mod pathutil;
+pub mod types;
+mod walk;
+
+/// Represents an error that can occur when parsing a gitignore file.
+#[derive(Debug)]
+pub enum Error {
+ /// A collection of "soft" errors. These occur when adding an ignore
+ /// file partially succeeded.
+ Partial(Vec<Error>),
+ /// An error associated with a specific line number.
+ WithLineNumber {
+ /// The line number.
+ line: u64,
+ /// The underlying error.
+ err: Box<Error>,
+ },
+ /// An error associated with a particular file path.
+ WithPath {
+ /// The file path.
+ path: PathBuf,
+ /// The underlying error.
+ err: Box<Error>,
+ },
+ /// An error associated with a particular directory depth when recursively
+ /// walking a directory.
+ WithDepth {
+ /// The directory depth.
+ depth: usize,
+ /// The underlying error.
+ err: Box<Error>,
+ },
+ /// An error that occurs when a file loop is detected when traversing
+ /// symbolic links.
+ Loop {
+ /// The ancestor file path in the loop.
+ ancestor: PathBuf,
+ /// The child file path in the loop.
+ child: PathBuf,
+ },
+ /// An error that occurs when doing I/O, such as reading an ignore file.
+ Io(io::Error),
+ /// An error that occurs when trying to parse a glob.
+ Glob {
+ /// The original glob that caused this error. This glob, when
+ /// available, always corresponds to the glob provided by an end user.
+ /// e.g., It is the glob as written in a `.gitignore` file.
+ ///
+ /// (This glob may be distinct from the glob that is actually
+ /// compiled, after accounting for `gitignore` semantics.)
+ glob: Option<String>,
+ /// The underlying glob error as a string.
+ err: String,
+ },
+ /// A type selection for a file type that is not defined.
+ UnrecognizedFileType(String),
+ /// A user specified file type definition could not be parsed.
+ InvalidDefinition,
+}
+
+impl Clone for Error {
+ fn clone(&self) -> Error {
+ match *self {
+ Error::Partial(ref errs) => Error::Partial(errs.clone()),
+ Error::WithLineNumber { line, ref err } => {
+ Error::WithLineNumber { line: line, err: err.clone() }
+ }
+ Error::WithPath { ref path, ref err } => {
+ Error::WithPath { path: path.clone(), err: err.clone() }
+ }
+ Error::WithDepth { depth, ref err } => {
+ Error::WithDepth { depth: depth, err: err.clone() }
+ }
+ Error::Loop { ref ancestor, ref child } => Error::Loop {
+ ancestor: ancestor.clone(),
+ child: child.clone(),
+ },
+ Error::Io(ref err) => match err.raw_os_error() {
+ Some(e) => Error::Io(io::Error::from_raw_os_error(e)),
+ None => Error::Io(io::Error::new(err.kind(), err.to_string())),
+ },
+ Error::Glob { ref glob, ref err } => {
+ Error::Glob { glob: glob.clone(), err: err.clone() }
+ }
+ Error::UnrecognizedFileType(ref err) => {
+ Error::UnrecognizedFileType(err.clone())
+ }
+ Error::InvalidDefinition => Error::InvalidDefinition,
+ }
+ }
+}
+
+impl Error {
+ /// Returns true if this is a partial error.
+ ///
+ /// A partial error occurs when only some operations failed while others
+ /// may have succeeded. For example, an ignore file may contain an invalid
+ /// glob among otherwise valid globs.
+ pub fn is_partial(&self) -> bool {
+ match *self {
+ Error::Partial(_) => true,
+ Error::WithLineNumber { ref err, .. } => err.is_partial(),
+ Error::WithPath { ref err, .. } => err.is_partial(),
+ Error::WithDepth { ref err, .. } => err.is_partial(),
+ _ => false,
+ }
+ }
+
+ /// Returns true if this error is exclusively an I/O error.
+ pub fn is_io(&self) -> bool {
+ match *self {
+ Error::Partial(ref errs) => errs.len() == 1 && errs[0].is_io(),
+ Error::WithLineNumber { ref err, .. } => err.is_io(),
+ Error::WithPath { ref err, .. } => err.is_io(),
+ Error::WithDepth { ref err, .. } => err.is_io(),
+ Error::Loop { .. } => false,
+ Error::Io(_) => true,
+ Error::Glob { .. } => false,
+ Error::UnrecognizedFileType(_) => false,
+ Error::InvalidDefinition => false,
+ }
+ }
+
+ /// Inspect the original [`io::Error`] if there is one.
+ ///
+ /// [`None`] is returned if the [`Error`] doesn't correspond to an
+ /// [`io::Error`]. This might happen, for example, when the error was
+ /// produced because a cycle was found in the directory tree while
+ /// following symbolic links.
+ ///
+ /// This method returns a borrowed value that is bound to the lifetime of the [`Error`]. To
+ /// obtain an owned value, the [`into_io_error`] can be used instead.
+ ///
+ /// > This is the original [`io::Error`] and is _not_ the same as
+ /// > [`impl From<Error> for std::io::Error`][impl] which contains additional context about the
+ /// error.
+ ///
+ /// [`None`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html#variant.None
+ /// [`io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html
+ /// [`From`]: https://doc.rust-lang.org/stable/std/convert/trait.From.html
+ /// [`Error`]: struct.Error.html
+ /// [`into_io_error`]: struct.Error.html#method.into_io_error
+ /// [impl]: struct.Error.html#impl-From%3CError%3E
+ pub fn io_error(&self) -> Option<&std::io::Error> {
+ match *self {
+ Error::Partial(ref errs) => {
+ if errs.len() == 1 {
+ errs[0].io_error()
+ } else {
+ None
+ }
+ }
+ Error::WithLineNumber { ref err, .. } => err.io_error(),
+ Error::WithPath { ref err, .. } => err.io_error(),
+ Error::WithDepth { ref err, .. } => err.io_error(),
+ Error::Loop { .. } => None,
+ Error::Io(ref err) => Some(err),
+ Error::Glob { .. } => None,
+ Error::UnrecognizedFileType(_) => None,
+ Error::InvalidDefinition => None,
+ }
+ }
+
+ /// Similar to [`io_error`] except consumes self to convert to the original
+ /// [`io::Error`] if one exists.
+ ///
+ /// [`io_error`]: struct.Error.html#method.io_error
+ /// [`io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html
+ pub fn into_io_error(self) -> Option<std::io::Error> {
+ match self {
+ Error::Partial(mut errs) => {
+ if errs.len() == 1 {
+ errs.remove(0).into_io_error()
+ } else {
+ None
+ }
+ }
+ Error::WithLineNumber { err, .. } => err.into_io_error(),
+ Error::WithPath { err, .. } => err.into_io_error(),
+ Error::WithDepth { err, .. } => err.into_io_error(),
+ Error::Loop { .. } => None,
+ Error::Io(err) => Some(err),
+ Error::Glob { .. } => None,
+ Error::UnrecognizedFileType(_) => None,
+ Error::InvalidDefinition => None,
+ }
+ }
+
+ /// Returns a depth associated with recursively walking a directory (if
+ /// this error was generated from a recursive directory iterator).
+ pub fn depth(&self) -> Option<usize> {
+ match *self {
+ Error::WithPath { ref err, .. } => err.depth(),
+ Error::WithDepth { depth, .. } => Some(depth),
+ _ => None,
+ }
+ }
+
+ /// Turn an error into a tagged error with the given file path.
+ fn with_path<P: AsRef<Path>>(self, path: P) -> Error {
+ Error::WithPath {
+ path: path.as_ref().to_path_buf(),
+ err: Box::new(self),
+ }
+ }
+
+ /// Turn an error into a tagged error with the given depth.
+ fn with_depth(self, depth: usize) -> Error {
+ Error::WithDepth { depth: depth, err: Box::new(self) }
+ }
+
+ /// Turn an error into a tagged error with the given file path and line
+ /// number. If path is empty, then it is omitted from the error.
+ fn tagged<P: AsRef<Path>>(self, path: P, lineno: u64) -> Error {
+ let errline =
+ Error::WithLineNumber { line: lineno, err: Box::new(self) };
+ if path.as_ref().as_os_str().is_empty() {
+ return errline;
+ }
+ errline.with_path(path)
+ }
+
+ /// Build an error from a walkdir error.
+ fn from_walkdir(err: walkdir::Error) -> Error {
+ let depth = err.depth();
+ if let (Some(anc), Some(child)) = (err.loop_ancestor(), err.path()) {
+ return Error::WithDepth {
+ depth: depth,
+ err: Box::new(Error::Loop {
+ ancestor: anc.to_path_buf(),
+ child: child.to_path_buf(),
+ }),
+ };
+ }
+ let path = err.path().map(|p| p.to_path_buf());
+ let mut ig_err = Error::Io(io::Error::from(err));
+ if let Some(path) = path {
+ ig_err = Error::WithPath { path: path, err: Box::new(ig_err) };
+ }
+ ig_err
+ }
+}
+
+impl error::Error for Error {
+ #[allow(deprecated)]
+ fn description(&self) -> &str {
+ match *self {
+ Error::Partial(_) => "partial error",
+ Error::WithLineNumber { ref err, .. } => err.description(),
+ Error::WithPath { ref err, .. } => err.description(),
+ Error::WithDepth { ref err, .. } => err.description(),
+ Error::Loop { .. } => "file system loop found",
+ Error::Io(ref err) => err.description(),
+ Error::Glob { ref err, .. } => err,
+ Error::UnrecognizedFileType(_) => "unrecognized file type",
+ Error::InvalidDefinition => "invalid definition",
+ }
+ }
+}
+
+impl fmt::Display for Error {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match *self {
+ Error::Partial(ref errs) => {
+ let msgs: Vec<String> =
+ errs.iter().map(|err| err.to_string()).collect();
+ write!(f, "{}", msgs.join("\n"))
+ }
+ Error::WithLineNumber { line, ref err } => {
+ write!(f, "line {}: {}", line, err)
+ }
+ Error::WithPath { ref path, ref err } => {
+ write!(f, "{}: {}", path.display(), err)
+ }
+ Error::WithDepth { ref err, .. } => err.fmt(f),
+ Error::Loop { ref ancestor, ref child } => write!(
+ f,
+ "File system loop found: \
+ {} points to an ancestor {}",
+ child.display(),
+ ancestor.display()
+ ),
+ Error::Io(ref err) => err.fmt(f),
+ Error::Glob { glob: None, ref err } => write!(f, "{}", err),
+ Error::Glob { glob: Some(ref glob), ref err } => {
+ write!(f, "error parsing glob '{}': {}", glob, err)
+ }
+ Error::UnrecognizedFileType(ref ty) => {
+ write!(f, "unrecognized file type: {}", ty)
+ }
+ Error::InvalidDefinition => write!(
+ f,
+ "invalid definition (format is type:glob, e.g., \
+ html:*.html)"
+ ),
+ }
+ }
+}
+
+impl From<io::Error> for Error {
+ fn from(err: io::Error) -> Error {
+ Error::Io(err)
+ }
+}
+
+#[derive(Debug, Default)]
+struct PartialErrorBuilder(Vec<Error>);
+
+impl PartialErrorBuilder {
+ fn push(&mut self, err: Error) {
+ self.0.push(err);
+ }
+
+ fn push_ignore_io(&mut self, err: Error) {
+ if !err.is_io() {
+ self.push(err);
+ }
+ }
+
+ fn maybe_push(&mut self, err: Option<Error>) {
+ if let Some(err) = err {
+ self.push(err);
+ }
+ }
+
+ fn maybe_push_ignore_io(&mut self, err: Option<Error>) {
+ if let Some(err) = err {
+ self.push_ignore_io(err);
+ }
+ }
+
+ fn into_error_option(mut self) -> Option<Error> {
+ if self.0.is_empty() {
+ None
+ } else if self.0.len() == 1 {
+ Some(self.0.pop().unwrap())
+ } else {
+ Some(Error::Partial(self.0))
+ }
+ }
+}
+
+/// The result of a glob match.
+///
+/// The type parameter `T` typically refers to a type that provides more
+/// information about a particular match. For example, it might identify
+/// the specific gitignore file and the specific glob pattern that caused
+/// the match.
+#[derive(Clone, Debug)]
+pub enum Match<T> {
+ /// The path didn't match any glob.
+ None,
+ /// The highest precedent glob matched indicates the path should be
+ /// ignored.
+ Ignore(T),
+ /// The highest precedent glob matched indicates the path should be
+ /// whitelisted.
+ Whitelist(T),
+}
+
+impl<T> Match<T> {
+ /// Returns true if the match result didn't match any globs.
+ pub fn is_none(&self) -> bool {
+ match *self {
+ Match::None => true,
+ Match::Ignore(_) | Match::Whitelist(_) => false,
+ }
+ }
+
+ /// Returns true if the match result implies the path should be ignored.
+ pub fn is_ignore(&self) -> bool {
+ match *self {
+ Match::Ignore(_) => true,
+ Match::None | Match::Whitelist(_) => false,
+ }
+ }
+
+ /// Returns true if the match result implies the path should be
+ /// whitelisted.
+ pub fn is_whitelist(&self) -> bool {
+ match *self {
+ Match::Whitelist(_) => true,
+ Match::None | Match::Ignore(_) => false,
+ }
+ }
+
+ /// Inverts the match so that `Ignore` becomes `Whitelist` and
+ /// `Whitelist` becomes `Ignore`. A non-match remains the same.
+ pub fn invert(self) -> Match<T> {
+ match self {
+ Match::None => Match::None,
+ Match::Ignore(t) => Match::Whitelist(t),
+ Match::Whitelist(t) => Match::Ignore(t),
+ }
+ }
+
+ /// Return the value inside this match if it exists.
+ pub fn inner(&self) -> Option<&T> {
+ match *self {
+ Match::None => None,
+ Match::Ignore(ref t) => Some(t),
+ Match::Whitelist(ref t) => Some(t),
+ }
+ }
+
+ /// Apply the given function to the value inside this match.
+ ///
+ /// If the match has no value, then return the match unchanged.
+ pub fn map<U, F: FnOnce(T) -> U>(self, f: F) -> Match<U> {
+ match self {
+ Match::None => Match::None,
+ Match::Ignore(t) => Match::Ignore(f(t)),
+ Match::Whitelist(t) => Match::Whitelist(f(t)),
+ }
+ }
+
+ /// Return the match if it is not none. Otherwise, return other.
+ pub fn or(self, other: Self) -> Self {
+ if self.is_none() {
+ other
+ } else {
+ self
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use std::env;
+ use std::error;
+ use std::fs;
+ use std::path::{Path, PathBuf};
+ use std::result;
+
+ /// A convenient result type alias.
+ pub type Result<T> =
+ result::Result<T, Box<dyn error::Error + Send + Sync>>;
+
+ macro_rules! err {
+ ($($tt:tt)*) => {
+ Box::<dyn error::Error + Send + Sync>::from(format!($($tt)*))
+ }
+ }
+
+ /// A simple wrapper for creating a temporary directory that is
+ /// automatically deleted when it's dropped.
+ ///
+ /// We use this in lieu of tempfile because tempfile brings in too many
+ /// dependencies.
+ #[derive(Debug)]
+ pub struct TempDir(PathBuf);
+
+ impl Drop for TempDir {
+ fn drop(&mut self) {
+ fs::remove_dir_all(&self.0).unwrap();
+ }
+ }
+
+ impl TempDir {
+ /// Create a new empty temporary directory under the system's configured
+ /// temporary directory.
+ pub fn new() -> Result<TempDir> {
+ use std::sync::atomic::{AtomicUsize, Ordering};
+
+ static TRIES: usize = 100;
+ static COUNTER: AtomicUsize = AtomicUsize::new(0);
+
+ let tmpdir = env::temp_dir();
+ for _ in 0..TRIES {
+ let count = COUNTER.fetch_add(1, Ordering::SeqCst);
+ let path = tmpdir.join("rust-ignore").join(count.to_string());
+ if path.is_dir() {
+ continue;
+ }
+ fs::create_dir_all(&path).map_err(|e| {
+ err!("failed to create {}: {}", path.display(), e)
+ })?;
+ return Ok(TempDir(path));
+ }
+ Err(err!("failed to create temp dir after {} tries", TRIES))
+ }
+
+ /// Return the underlying path to this temporary directory.
+ pub fn path(&self) -> &Path {
+ &self.0
+ }
+ }
+}
diff --git a/vendor/ignore/src/overrides.rs b/vendor/ignore/src/overrides.rs
new file mode 100644
index 0000000..e96b8e0
--- /dev/null
+++ b/vendor/ignore/src/overrides.rs
@@ -0,0 +1,263 @@
+/*!
+The overrides module provides a way to specify a set of override globs.
+This provides functionality similar to `--include` or `--exclude` in command
+line tools.
+*/
+
+use std::path::Path;
+
+use crate::gitignore::{self, Gitignore, GitignoreBuilder};
+use crate::{Error, Match};
+
+/// Glob represents a single glob in an override matcher.
+///
+/// This is used to report information about the highest precedent glob
+/// that matched.
+///
+/// Note that not all matches necessarily correspond to a specific glob. For
+/// example, if there are one or more whitelist globs and a file path doesn't
+/// match any glob in the set, then the file path is considered to be ignored.
+///
+/// The lifetime `'a` refers to the lifetime of the matcher that produced
+/// this glob.
+#[derive(Clone, Debug)]
+pub struct Glob<'a>(GlobInner<'a>);
+
+#[derive(Clone, Debug)]
+enum GlobInner<'a> {
+ /// No glob matched, but the file path should still be ignored.
+ UnmatchedIgnore,
+ /// A glob matched.
+ Matched(&'a gitignore::Glob),
+}
+
+impl<'a> Glob<'a> {
+ fn unmatched() -> Glob<'a> {
+ Glob(GlobInner::UnmatchedIgnore)
+ }
+}
+
+/// Manages a set of overrides provided explicitly by the end user.
+#[derive(Clone, Debug)]
+pub struct Override(Gitignore);
+
+impl Override {
+ /// Returns an empty matcher that never matches any file path.
+ pub fn empty() -> Override {
+ Override(Gitignore::empty())
+ }
+
+ /// Returns the directory of this override set.
+ ///
+ /// All matches are done relative to this path.
+ pub fn path(&self) -> &Path {
+ self.0.path()
+ }
+
+ /// Returns true if and only if this matcher is empty.
+ ///
+ /// When a matcher is empty, it will never match any file path.
+ pub fn is_empty(&self) -> bool {
+ self.0.is_empty()
+ }
+
+ /// Returns the total number of ignore globs.
+ pub fn num_ignores(&self) -> u64 {
+ self.0.num_whitelists()
+ }
+
+ /// Returns the total number of whitelisted globs.
+ pub fn num_whitelists(&self) -> u64 {
+ self.0.num_ignores()
+ }
+
+ /// Returns whether the given file path matched a pattern in this override
+ /// matcher.
+ ///
+ /// `is_dir` should be true if the path refers to a directory and false
+ /// otherwise.
+ ///
+ /// If there are no overrides, then this always returns `Match::None`.
+ ///
+ /// If there is at least one whitelist override and `is_dir` is false, then
+ /// this never returns `Match::None`, since non-matches are interpreted as
+ /// ignored.
+ ///
+ /// The given path is matched to the globs relative to the path given
+ /// when building the override matcher. Specifically, before matching
+ /// `path`, its prefix (as determined by a common suffix of the directory
+ /// given) is stripped. If there is no common suffix/prefix overlap, then
+ /// `path` is assumed to reside in the same directory as the root path for
+ /// this set of overrides.
+ pub fn matched<'a, P: AsRef<Path>>(
+ &'a self,
+ path: P,
+ is_dir: bool,
+ ) -> Match<Glob<'a>> {
+ if self.is_empty() {
+ return Match::None;
+ }
+ let mat = self.0.matched(path, is_dir).invert();
+ if mat.is_none() && self.num_whitelists() > 0 && !is_dir {
+ return Match::Ignore(Glob::unmatched());
+ }
+ mat.map(move |giglob| Glob(GlobInner::Matched(giglob)))
+ }
+}
+
+/// Builds a matcher for a set of glob overrides.
+#[derive(Clone, Debug)]
+pub struct OverrideBuilder {
+ builder: GitignoreBuilder,
+}
+
+impl OverrideBuilder {
+ /// Create a new override builder.
+ ///
+ /// Matching is done relative to the directory path provided.
+ pub fn new<P: AsRef<Path>>(path: P) -> OverrideBuilder {
+ OverrideBuilder { builder: GitignoreBuilder::new(path) }
+ }
+
+ /// Builds a new override matcher from the globs added so far.
+ ///
+ /// Once a matcher is built, no new globs can be added to it.
+ pub fn build(&self) -> Result<Override, Error> {
+ Ok(Override(self.builder.build()?))
+ }
+
+ /// Add a glob to the set of overrides.
+ ///
+ /// Globs provided here have precisely the same semantics as a single
+ /// line in a `gitignore` file, where the meaning of `!` is inverted:
+ /// namely, `!` at the beginning of a glob will ignore a file. Without `!`,
+ /// all matches of the glob provided are treated as whitelist matches.
+ pub fn add(&mut self, glob: &str) -> Result<&mut OverrideBuilder, Error> {
+ self.builder.add_line(None, glob)?;
+ Ok(self)
+ }
+
+ /// Toggle whether the globs should be matched case insensitively or not.
+ ///
+ /// When this option is changed, only globs added after the change will be affected.
+ ///
+ /// This is disabled by default.
+ pub fn case_insensitive(
+ &mut self,
+ yes: bool,
+ ) -> Result<&mut OverrideBuilder, Error> {
+ // TODO: This should not return a `Result`. Fix this in the next semver
+ // release.
+ self.builder.case_insensitive(yes)?;
+ Ok(self)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::{Override, OverrideBuilder};
+
+ const ROOT: &'static str = "/home/andrew/foo";
+
+ fn ov(globs: &[&str]) -> Override {
+ let mut builder = OverrideBuilder::new(ROOT);
+ for glob in globs {
+ builder.add(glob).unwrap();
+ }
+ builder.build().unwrap()
+ }
+
+ #[test]
+ fn empty() {
+ let ov = ov(&[]);
+ assert!(ov.matched("a.foo", false).is_none());
+ assert!(ov.matched("a", false).is_none());
+ assert!(ov.matched("", false).is_none());
+ }
+
+ #[test]
+ fn simple() {
+ let ov = ov(&["*.foo", "!*.bar"]);
+ assert!(ov.matched("a.foo", false).is_whitelist());
+ assert!(ov.matched("a.foo", true).is_whitelist());
+ assert!(ov.matched("a.rs", false).is_ignore());
+ assert!(ov.matched("a.rs", true).is_none());
+ assert!(ov.matched("a.bar", false).is_ignore());
+ assert!(ov.matched("a.bar", true).is_ignore());
+ }
+
+ #[test]
+ fn only_ignores() {
+ let ov = ov(&["!*.bar"]);
+ assert!(ov.matched("a.rs", false).is_none());
+ assert!(ov.matched("a.rs", true).is_none());
+ assert!(ov.matched("a.bar", false).is_ignore());
+ assert!(ov.matched("a.bar", true).is_ignore());
+ }
+
+ #[test]
+ fn precedence() {
+ let ov = ov(&["*.foo", "!*.bar.foo"]);
+ assert!(ov.matched("a.foo", false).is_whitelist());
+ assert!(ov.matched("a.baz", false).is_ignore());
+ assert!(ov.matched("a.bar.foo", false).is_ignore());
+ }
+
+ #[test]
+ fn gitignore() {
+ let ov = ov(&["/foo", "bar/*.rs", "baz/**"]);
+ assert!(ov.matched("bar/lib.rs", false).is_whitelist());
+ assert!(ov.matched("bar/wat/lib.rs", false).is_ignore());
+ assert!(ov.matched("wat/bar/lib.rs", false).is_ignore());
+ assert!(ov.matched("foo", false).is_whitelist());
+ assert!(ov.matched("wat/foo", false).is_ignore());
+ assert!(ov.matched("baz", false).is_ignore());
+ assert!(ov.matched("baz/a", false).is_whitelist());
+ assert!(ov.matched("baz/a/b", false).is_whitelist());
+ }
+
+ #[test]
+ fn allow_directories() {
+ // This tests that directories are NOT ignored when they are unmatched.
+ let ov = ov(&["*.rs"]);
+ assert!(ov.matched("foo.rs", false).is_whitelist());
+ assert!(ov.matched("foo.c", false).is_ignore());
+ assert!(ov.matched("foo", false).is_ignore());
+ assert!(ov.matched("foo", true).is_none());
+ assert!(ov.matched("src/foo.rs", false).is_whitelist());
+ assert!(ov.matched("src/foo.c", false).is_ignore());
+ assert!(ov.matched("src/foo", false).is_ignore());
+ assert!(ov.matched("src/foo", true).is_none());
+ }
+
+ #[test]
+ fn absolute_path() {
+ let ov = ov(&["!/bar"]);
+ assert!(ov.matched("./foo/bar", false).is_none());
+ }
+
+ #[test]
+ fn case_insensitive() {
+ let ov = OverrideBuilder::new(ROOT)
+ .case_insensitive(true)
+ .unwrap()
+ .add("*.html")
+ .unwrap()
+ .build()
+ .unwrap();
+ assert!(ov.matched("foo.html", false).is_whitelist());
+ assert!(ov.matched("foo.HTML", false).is_whitelist());
+ assert!(ov.matched("foo.htm", false).is_ignore());
+ assert!(ov.matched("foo.HTM", false).is_ignore());
+ }
+
+ #[test]
+ fn default_case_sensitive() {
+ let ov =
+ OverrideBuilder::new(ROOT).add("*.html").unwrap().build().unwrap();
+ assert!(ov.matched("foo.html", false).is_whitelist());
+ assert!(ov.matched("foo.HTML", false).is_ignore());
+ assert!(ov.matched("foo.htm", false).is_ignore());
+ assert!(ov.matched("foo.HTM", false).is_ignore());
+ }
+}
diff --git a/vendor/ignore/src/pathutil.rs b/vendor/ignore/src/pathutil.rs
new file mode 100644
index 0000000..f21b4f5
--- /dev/null
+++ b/vendor/ignore/src/pathutil.rs
@@ -0,0 +1,142 @@
+use std::ffi::OsStr;
+use std::path::Path;
+
+use crate::walk::DirEntry;
+
+/// Returns true if and only if this entry is considered to be hidden.
+///
+/// This only returns true if the base name of the path starts with a `.`.
+///
+/// On Unix, this implements a more optimized check.
+#[cfg(unix)]
+pub fn is_hidden(dent: &DirEntry) -> bool {
+ use std::os::unix::ffi::OsStrExt;
+
+ if let Some(name) = file_name(dent.path()) {
+ name.as_bytes().get(0) == Some(&b'.')
+ } else {
+ false
+ }
+}
+
+/// Returns true if and only if this entry is considered to be hidden.
+///
+/// On Windows, this returns true if one of the following is true:
+///
+/// * The base name of the path starts with a `.`.
+/// * The file attributes have the `HIDDEN` property set.
+#[cfg(windows)]
+pub fn is_hidden(dent: &DirEntry) -> bool {
+ use std::os::windows::fs::MetadataExt;
+ use winapi_util::file;
+
+ // This looks like we're doing an extra stat call, but on Windows, the
+ // directory traverser reuses the metadata retrieved from each directory
+ // entry and stores it on the DirEntry itself. So this is "free."
+ if let Ok(md) = dent.metadata() {
+ if file::is_hidden(md.file_attributes() as u64) {
+ return true;
+ }
+ }
+ if let Some(name) = file_name(dent.path()) {
+ name.to_str().map(|s| s.starts_with(".")).unwrap_or(false)
+ } else {
+ false
+ }
+}
+
+/// Returns true if and only if this entry is considered to be hidden.
+///
+/// This only returns true if the base name of the path starts with a `.`.
+#[cfg(not(any(unix, windows)))]
+pub fn is_hidden(dent: &DirEntry) -> bool {
+ if let Some(name) = file_name(dent.path()) {
+ name.to_str().map(|s| s.starts_with(".")).unwrap_or(false)
+ } else {
+ false
+ }
+}
+
+/// Strip `prefix` from the `path` and return the remainder.
+///
+/// If `path` doesn't have a prefix `prefix`, then return `None`.
+#[cfg(unix)]
+pub fn strip_prefix<'a, P: AsRef<Path> + ?Sized>(
+ prefix: &'a P,
+ path: &'a Path,
+) -> Option<&'a Path> {
+ use std::os::unix::ffi::OsStrExt;
+
+ let prefix = prefix.as_ref().as_os_str().as_bytes();
+ let path = path.as_os_str().as_bytes();
+ if prefix.len() > path.len() || prefix != &path[0..prefix.len()] {
+ None
+ } else {
+ Some(&Path::new(OsStr::from_bytes(&path[prefix.len()..])))
+ }
+}
+
+/// Strip `prefix` from the `path` and return the remainder.
+///
+/// If `path` doesn't have a prefix `prefix`, then return `None`.
+#[cfg(not(unix))]
+pub fn strip_prefix<'a, P: AsRef<Path> + ?Sized>(
+ prefix: &'a P,
+ path: &'a Path,
+) -> Option<&'a Path> {
+ path.strip_prefix(prefix).ok()
+}
+
+/// Returns true if this file path is just a file name. i.e., Its parent is
+/// the empty string.
+#[cfg(unix)]
+pub fn is_file_name<P: AsRef<Path>>(path: P) -> bool {
+ use memchr::memchr;
+ use std::os::unix::ffi::OsStrExt;
+
+ let path = path.as_ref().as_os_str().as_bytes();
+ memchr(b'/', path).is_none()
+}
+
+/// Returns true if this file path is just a file name. i.e., Its parent is
+/// the empty string.
+#[cfg(not(unix))]
+pub fn is_file_name<P: AsRef<Path>>(path: P) -> bool {
+ path.as_ref().parent().map(|p| p.as_os_str().is_empty()).unwrap_or(false)
+}
+
+/// The final component of the path, if it is a normal file.
+///
+/// If the path terminates in ., .., or consists solely of a root of prefix,
+/// file_name will return None.
+#[cfg(unix)]
+pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
+ path: &'a P,
+) -> Option<&'a OsStr> {
+ use memchr::memrchr;
+ use std::os::unix::ffi::OsStrExt;
+
+ let path = path.as_ref().as_os_str().as_bytes();
+ if path.is_empty() {
+ return None;
+ } else if path.len() == 1 && path[0] == b'.' {
+ return None;
+ } else if path.last() == Some(&b'.') {
+ return None;
+ } else if path.len() >= 2 && &path[path.len() - 2..] == &b".."[..] {
+ return None;
+ }
+ let last_slash = memrchr(b'/', path).map(|i| i + 1).unwrap_or(0);
+ Some(OsStr::from_bytes(&path[last_slash..]))
+}
+
+/// The final component of the path, if it is a normal file.
+///
+/// If the path terminates in ., .., or consists solely of a root of prefix,
+/// file_name will return None.
+#[cfg(not(unix))]
+pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
+ path: &'a P,
+) -> Option<&'a OsStr> {
+ path.as_ref().file_name()
+}
diff --git a/vendor/ignore/src/types.rs b/vendor/ignore/src/types.rs
new file mode 100644
index 0000000..616a8d2
--- /dev/null
+++ b/vendor/ignore/src/types.rs
@@ -0,0 +1,583 @@
+/*!
+The types module provides a way of associating globs on file names to file
+types.
+
+This can be used to match specific types of files. For example, among
+the default file types provided, the Rust file type is defined to be `*.rs`
+with name `rust`. Similarly, the C file type is defined to be `*.{c,h}` with
+name `c`.
+
+Note that the set of default types may change over time.
+
+# Example
+
+This shows how to create and use a simple file type matcher using the default
+file types defined in this crate.
+
+```
+use ignore::types::TypesBuilder;
+
+let mut builder = TypesBuilder::new();
+builder.add_defaults();
+builder.select("rust");
+let matcher = builder.build().unwrap();
+
+assert!(matcher.matched("foo.rs", false).is_whitelist());
+assert!(matcher.matched("foo.c", false).is_ignore());
+```
+
+# Example: negation
+
+This is like the previous example, but shows how negating a file type works.
+That is, this will let us match file paths that *don't* correspond to a
+particular file type.
+
+```
+use ignore::types::TypesBuilder;
+
+let mut builder = TypesBuilder::new();
+builder.add_defaults();
+builder.negate("c");
+let matcher = builder.build().unwrap();
+
+assert!(matcher.matched("foo.rs", false).is_none());
+assert!(matcher.matched("foo.c", false).is_ignore());
+```
+
+# Example: custom file type definitions
+
+This shows how to extend this library default file type definitions with
+your own.
+
+```
+use ignore::types::TypesBuilder;
+
+let mut builder = TypesBuilder::new();
+builder.add_defaults();
+builder.add("foo", "*.foo");
+// Another way of adding a file type definition.
+// This is useful when accepting input from an end user.
+builder.add_def("bar:*.bar");
+// Note: we only select `foo`, not `bar`.
+builder.select("foo");
+let matcher = builder.build().unwrap();
+
+assert!(matcher.matched("x.foo", false).is_whitelist());
+// This is ignored because we only selected the `foo` file type.
+assert!(matcher.matched("x.bar", false).is_ignore());
+```
+
+We can also add file type definitions based on other definitions.
+
+```
+use ignore::types::TypesBuilder;
+
+let mut builder = TypesBuilder::new();
+builder.add_defaults();
+builder.add("foo", "*.foo");
+builder.add_def("bar:include:foo,cpp");
+builder.select("bar");
+let matcher = builder.build().unwrap();
+
+assert!(matcher.matched("x.foo", false).is_whitelist());
+assert!(matcher.matched("y.cpp", false).is_whitelist());
+```
+*/
+
+use std::cell::RefCell;
+use std::collections::HashMap;
+use std::path::Path;
+use std::sync::Arc;
+
+use globset::{GlobBuilder, GlobSet, GlobSetBuilder};
+use regex::Regex;
+use thread_local::ThreadLocal;
+
+use crate::default_types::DEFAULT_TYPES;
+use crate::pathutil::file_name;
+use crate::{Error, Match};
+
+/// Glob represents a single glob in a set of file type definitions.
+///
+/// There may be more than one glob for a particular file type.
+///
+/// This is used to report information about the highest precedent glob
+/// that matched.
+///
+/// Note that not all matches necessarily correspond to a specific glob.
+/// For example, if there are one or more selections and a file path doesn't
+/// match any of those selections, then the file path is considered to be
+/// ignored.
+///
+/// The lifetime `'a` refers to the lifetime of the underlying file type
+/// definition, which corresponds to the lifetime of the file type matcher.
+#[derive(Clone, Debug)]
+pub struct Glob<'a>(GlobInner<'a>);
+
+#[derive(Clone, Debug)]
+enum GlobInner<'a> {
+ /// No glob matched, but the file path should still be ignored.
+ UnmatchedIgnore,
+ /// A glob matched.
+ Matched {
+ /// The file type definition which provided the glob.
+ def: &'a FileTypeDef,
+ },
+}
+
+impl<'a> Glob<'a> {
+ fn unmatched() -> Glob<'a> {
+ Glob(GlobInner::UnmatchedIgnore)
+ }
+
+ /// Return the file type definition that matched, if one exists. A file type
+ /// definition always exists when a specific definition matches a file
+ /// path.
+ pub fn file_type_def(&self) -> Option<&FileTypeDef> {
+ match self {
+ Glob(GlobInner::UnmatchedIgnore) => None,
+ Glob(GlobInner::Matched { def, .. }) => Some(def),
+ }
+ }
+}
+
+/// A single file type definition.
+///
+/// File type definitions can be retrieved in aggregate from a file type
+/// matcher. File type definitions are also reported when its responsible
+/// for a match.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct FileTypeDef {
+ name: String,
+ globs: Vec<String>,
+}
+
+impl FileTypeDef {
+ /// Return the name of this file type.
+ pub fn name(&self) -> &str {
+ &self.name
+ }
+
+ /// Return the globs used to recognize this file type.
+ pub fn globs(&self) -> &[String] {
+ &self.globs
+ }
+}
+
+/// Types is a file type matcher.
+#[derive(Clone, Debug)]
+pub struct Types {
+ /// All of the file type definitions, sorted lexicographically by name.
+ defs: Vec<FileTypeDef>,
+ /// All of the selections made by the user.
+ selections: Vec<Selection<FileTypeDef>>,
+ /// Whether there is at least one Selection::Select in our selections.
+ /// When this is true, a Match::None is converted to Match::Ignore.
+ has_selected: bool,
+ /// A mapping from glob index in the set to two indices. The first is an
+ /// index into `selections` and the second is an index into the
+ /// corresponding file type definition's list of globs.
+ glob_to_selection: Vec<(usize, usize)>,
+ /// The set of all glob selections, used for actual matching.
+ set: GlobSet,
+ /// Temporary storage for globs that match.
+ matches: Arc<ThreadLocal<RefCell<Vec<usize>>>>,
+}
+
+/// Indicates the type of a selection for a particular file type.
+#[derive(Clone, Debug)]
+enum Selection<T> {
+ Select(String, T),
+ Negate(String, T),
+}
+
+impl<T> Selection<T> {
+ fn is_negated(&self) -> bool {
+ match *self {
+ Selection::Select(..) => false,
+ Selection::Negate(..) => true,
+ }
+ }
+
+ fn name(&self) -> &str {
+ match *self {
+ Selection::Select(ref name, _) => name,
+ Selection::Negate(ref name, _) => name,
+ }
+ }
+
+ fn map<U, F: FnOnce(T) -> U>(self, f: F) -> Selection<U> {
+ match self {
+ Selection::Select(name, inner) => {
+ Selection::Select(name, f(inner))
+ }
+ Selection::Negate(name, inner) => {
+ Selection::Negate(name, f(inner))
+ }
+ }
+ }
+
+ fn inner(&self) -> &T {
+ match *self {
+ Selection::Select(_, ref inner) => inner,
+ Selection::Negate(_, ref inner) => inner,
+ }
+ }
+}
+
+impl Types {
+ /// Creates a new file type matcher that never matches any path and
+ /// contains no file type definitions.
+ pub fn empty() -> Types {
+ Types {
+ defs: vec![],
+ selections: vec![],
+ has_selected: false,
+ glob_to_selection: vec![],
+ set: GlobSetBuilder::new().build().unwrap(),
+ matches: Arc::new(ThreadLocal::default()),
+ }
+ }
+
+ /// Returns true if and only if this matcher has zero selections.
+ pub fn is_empty(&self) -> bool {
+ self.selections.is_empty()
+ }
+
+ /// Returns the number of selections used in this matcher.
+ pub fn len(&self) -> usize {
+ self.selections.len()
+ }
+
+ /// Return the set of current file type definitions.
+ ///
+ /// Definitions and globs are sorted.
+ pub fn definitions(&self) -> &[FileTypeDef] {
+ &self.defs
+ }
+
+ /// Returns a match for the given path against this file type matcher.
+ ///
+ /// The path is considered whitelisted if it matches a selected file type.
+ /// The path is considered ignored if it matches a negated file type.
+ /// If at least one file type is selected and `path` doesn't match, then
+ /// the path is also considered ignored.
+ pub fn matched<'a, P: AsRef<Path>>(
+ &'a self,
+ path: P,
+ is_dir: bool,
+ ) -> Match<Glob<'a>> {
+ // File types don't apply to directories, and we can't do anything
+ // if our glob set is empty.
+ if is_dir || self.set.is_empty() {
+ return Match::None;
+ }
+ // We only want to match against the file name, so extract it.
+ // If one doesn't exist, then we can't match it.
+ let name = match file_name(path.as_ref()) {
+ Some(name) => name,
+ None if self.has_selected => {
+ return Match::Ignore(Glob::unmatched());
+ }
+ None => {
+ return Match::None;
+ }
+ };
+ let mut matches = self.matches.get_or_default().borrow_mut();
+ self.set.matches_into(name, &mut *matches);
+ // The highest precedent match is the last one.
+ if let Some(&i) = matches.last() {
+ let (isel, _) = self.glob_to_selection[i];
+ let sel = &self.selections[isel];
+ let glob = Glob(GlobInner::Matched { def: sel.inner() });
+ return if sel.is_negated() {
+ Match::Ignore(glob)
+ } else {
+ Match::Whitelist(glob)
+ };
+ }
+ if self.has_selected {
+ Match::Ignore(Glob::unmatched())
+ } else {
+ Match::None
+ }
+ }
+}
+
+/// TypesBuilder builds a type matcher from a set of file type definitions and
+/// a set of file type selections.
+pub struct TypesBuilder {
+ types: HashMap<String, FileTypeDef>,
+ selections: Vec<Selection<()>>,
+}
+
+impl TypesBuilder {
+ /// Create a new builder for a file type matcher.
+ ///
+ /// The builder contains *no* type definitions to start with. A set
+ /// of default type definitions can be added with `add_defaults`, and
+ /// additional type definitions can be added with `select` and `negate`.
+ pub fn new() -> TypesBuilder {
+ TypesBuilder { types: HashMap::new(), selections: vec![] }
+ }
+
+ /// Build the current set of file type definitions *and* selections into
+ /// a file type matcher.
+ pub fn build(&self) -> Result<Types, Error> {
+ let defs = self.definitions();
+ let has_selected = self.selections.iter().any(|s| !s.is_negated());
+
+ let mut selections = vec![];
+ let mut glob_to_selection = vec![];
+ let mut build_set = GlobSetBuilder::new();
+ for (isel, selection) in self.selections.iter().enumerate() {
+ let def = match self.types.get(selection.name()) {
+ Some(def) => def.clone(),
+ None => {
+ let name = selection.name().to_string();
+ return Err(Error::UnrecognizedFileType(name));
+ }
+ };
+ for (iglob, glob) in def.globs.iter().enumerate() {
+ build_set.add(
+ GlobBuilder::new(glob)
+ .literal_separator(true)
+ .build()
+ .map_err(|err| Error::Glob {
+ glob: Some(glob.to_string()),
+ err: err.kind().to_string(),
+ })?,
+ );
+ glob_to_selection.push((isel, iglob));
+ }
+ selections.push(selection.clone().map(move |_| def));
+ }
+ let set = build_set
+ .build()
+ .map_err(|err| Error::Glob { glob: None, err: err.to_string() })?;
+ Ok(Types {
+ defs: defs,
+ selections: selections,
+ has_selected: has_selected,
+ glob_to_selection: glob_to_selection,
+ set: set,
+ matches: Arc::new(ThreadLocal::default()),
+ })
+ }
+
+ /// Return the set of current file type definitions.
+ ///
+ /// Definitions and globs are sorted.
+ pub fn definitions(&self) -> Vec<FileTypeDef> {
+ let mut defs = vec![];
+ for def in self.types.values() {
+ let mut def = def.clone();
+ def.globs.sort();
+ defs.push(def);
+ }
+ defs.sort_by(|def1, def2| def1.name().cmp(def2.name()));
+ defs
+ }
+
+ /// Select the file type given by `name`.
+ ///
+ /// If `name` is `all`, then all file types currently defined are selected.
+ pub fn select(&mut self, name: &str) -> &mut TypesBuilder {
+ if name == "all" {
+ for name in self.types.keys() {
+ self.selections.push(Selection::Select(name.to_string(), ()));
+ }
+ } else {
+ self.selections.push(Selection::Select(name.to_string(), ()));
+ }
+ self
+ }
+
+ /// Ignore the file type given by `name`.
+ ///
+ /// If `name` is `all`, then all file types currently defined are negated.
+ pub fn negate(&mut self, name: &str) -> &mut TypesBuilder {
+ if name == "all" {
+ for name in self.types.keys() {
+ self.selections.push(Selection::Negate(name.to_string(), ()));
+ }
+ } else {
+ self.selections.push(Selection::Negate(name.to_string(), ()));
+ }
+ self
+ }
+
+ /// Clear any file type definitions for the type name given.
+ pub fn clear(&mut self, name: &str) -> &mut TypesBuilder {
+ self.types.remove(name);
+ self
+ }
+
+ /// Add a new file type definition. `name` can be arbitrary and `pat`
+ /// should be a glob recognizing file paths belonging to the `name` type.
+ ///
+ /// If `name` is `all` or otherwise contains any character that is not a
+ /// Unicode letter or number, then an error is returned.
+ pub fn add(&mut self, name: &str, glob: &str) -> Result<(), Error> {
+ lazy_static::lazy_static! {
+ static ref RE: Regex = Regex::new(r"^[\pL\pN]+$").unwrap();
+ };
+ if name == "all" || !RE.is_match(name) {
+ return Err(Error::InvalidDefinition);
+ }
+ let (key, glob) = (name.to_string(), glob.to_string());
+ self.types
+ .entry(key)
+ .or_insert_with(|| FileTypeDef {
+ name: name.to_string(),
+ globs: vec![],
+ })
+ .globs
+ .push(glob);
+ Ok(())
+ }
+
+ /// Add a new file type definition specified in string form. There are two
+ /// valid formats:
+ /// 1. `{name}:{glob}`. This defines a 'root' definition that associates the
+ /// given name with the given glob.
+ /// 2. `{name}:include:{comma-separated list of already defined names}.
+ /// This defines an 'include' definition that associates the given name
+ /// with the definitions of the given existing types.
+ /// Names may not include any characters that are not
+ /// Unicode letters or numbers.
+ pub fn add_def(&mut self, def: &str) -> Result<(), Error> {
+ let parts: Vec<&str> = def.split(':').collect();
+ match parts.len() {
+ 2 => {
+ let name = parts[0];
+ let glob = parts[1];
+ if name.is_empty() || glob.is_empty() {
+ return Err(Error::InvalidDefinition);
+ }
+ self.add(name, glob)
+ }
+ 3 => {
+ let name = parts[0];
+ let types_string = parts[2];
+ if name.is_empty()
+ || parts[1] != "include"
+ || types_string.is_empty()
+ {
+ return Err(Error::InvalidDefinition);
+ }
+ let types = types_string.split(',');
+ // Check ahead of time to ensure that all types specified are
+ // present and fail fast if not.
+ if types.clone().any(|t| !self.types.contains_key(t)) {
+ return Err(Error::InvalidDefinition);
+ }
+ for type_name in types {
+ let globs =
+ self.types.get(type_name).unwrap().globs.clone();
+ for glob in globs {
+ self.add(name, &glob)?;
+ }
+ }
+ Ok(())
+ }
+ _ => Err(Error::InvalidDefinition),
+ }
+ }
+
+ /// Add a set of default file type definitions.
+ pub fn add_defaults(&mut self) -> &mut TypesBuilder {
+ static MSG: &'static str = "adding a default type should never fail";
+ for &(name, exts) in DEFAULT_TYPES {
+ for ext in exts {
+ self.add(name, ext).expect(MSG);
+ }
+ }
+ self
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::TypesBuilder;
+
+ macro_rules! matched {
+ ($name:ident, $types:expr, $sel:expr, $selnot:expr,
+ $path:expr) => {
+ matched!($name, $types, $sel, $selnot, $path, true);
+ };
+ (not, $name:ident, $types:expr, $sel:expr, $selnot:expr,
+ $path:expr) => {
+ matched!($name, $types, $sel, $selnot, $path, false);
+ };
+ ($name:ident, $types:expr, $sel:expr, $selnot:expr,
+ $path:expr, $matched:expr) => {
+ #[test]
+ fn $name() {
+ let mut btypes = TypesBuilder::new();
+ for tydef in $types {
+ btypes.add_def(tydef).unwrap();
+ }
+ for sel in $sel {
+ btypes.select(sel);
+ }
+ for selnot in $selnot {
+ btypes.negate(selnot);
+ }
+ let types = btypes.build().unwrap();
+ let mat = types.matched($path, false);
+ assert_eq!($matched, !mat.is_ignore());
+ }
+ };
+ }
+
+ fn types() -> Vec<&'static str> {
+ vec![
+ "html:*.html",
+ "html:*.htm",
+ "rust:*.rs",
+ "js:*.js",
+ "foo:*.{rs,foo}",
+ "combo:include:html,rust",
+ ]
+ }
+
+ matched!(match1, types(), vec!["rust"], vec![], "lib.rs");
+ matched!(match2, types(), vec!["html"], vec![], "index.html");
+ matched!(match3, types(), vec!["html"], vec![], "index.htm");
+ matched!(match4, types(), vec!["html", "rust"], vec![], "main.rs");
+ matched!(match5, types(), vec![], vec![], "index.html");
+ matched!(match6, types(), vec![], vec!["rust"], "index.html");
+ matched!(match7, types(), vec!["foo"], vec!["rust"], "main.foo");
+ matched!(match8, types(), vec!["combo"], vec![], "index.html");
+ matched!(match9, types(), vec!["combo"], vec![], "lib.rs");
+
+ matched!(not, matchnot1, types(), vec!["rust"], vec![], "index.html");
+ matched!(not, matchnot2, types(), vec![], vec!["rust"], "main.rs");
+ matched!(not, matchnot3, types(), vec!["foo"], vec!["rust"], "main.rs");
+ matched!(not, matchnot4, types(), vec!["rust"], vec!["foo"], "main.rs");
+ matched!(not, matchnot5, types(), vec!["rust"], vec!["foo"], "main.foo");
+ matched!(not, matchnot6, types(), vec!["combo"], vec![], "leftpad.js");
+
+ #[test]
+ fn test_invalid_defs() {
+ let mut btypes = TypesBuilder::new();
+ for tydef in types() {
+ btypes.add_def(tydef).unwrap();
+ }
+ // Preserve the original definitions for later comparison.
+ let original_defs = btypes.definitions();
+ let bad_defs = vec![
+ // Reference to type that does not exist
+ "combo:include:html,python",
+ // Bad format
+ "combo:foobar:html,rust",
+ "",
+ ];
+ for def in bad_defs {
+ assert!(btypes.add_def(def).is_err());
+ // Ensure that nothing changed, even if some of the includes were valid.
+ assert_eq!(btypes.definitions(), original_defs);
+ }
+ }
+}
diff --git a/vendor/ignore/src/walk.rs b/vendor/ignore/src/walk.rs
new file mode 100644
index 0000000..602faae
--- /dev/null
+++ b/vendor/ignore/src/walk.rs
@@ -0,0 +1,2251 @@
+use std::cmp;
+use std::ffi::OsStr;
+use std::fmt;
+use std::fs::{self, FileType, Metadata};
+use std::io;
+use std::path::{Path, PathBuf};
+use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
+use std::sync::{Arc, Mutex};
+use std::thread;
+use std::time::Duration;
+use std::vec;
+
+use same_file::Handle;
+use walkdir::{self, WalkDir};
+
+use crate::dir::{Ignore, IgnoreBuilder};
+use crate::gitignore::GitignoreBuilder;
+use crate::overrides::Override;
+use crate::types::Types;
+use crate::{Error, PartialErrorBuilder};
+
+/// A directory entry with a possible error attached.
+///
+/// The error typically refers to a problem parsing ignore files in a
+/// particular directory.
+#[derive(Clone, Debug)]
+pub struct DirEntry {
+ dent: DirEntryInner,
+ err: Option<Error>,
+}
+
+impl DirEntry {
+ /// The full path that this entry represents.
+ pub fn path(&self) -> &Path {
+ self.dent.path()
+ }
+
+ /// The full path that this entry represents.
+ /// Analogous to [`path`], but moves ownership of the path.
+ ///
+ /// [`path`]: struct.DirEntry.html#method.path
+ pub fn into_path(self) -> PathBuf {
+ self.dent.into_path()
+ }
+
+ /// Whether this entry corresponds to a symbolic link or not.
+ pub fn path_is_symlink(&self) -> bool {
+ self.dent.path_is_symlink()
+ }
+
+ /// Returns true if and only if this entry corresponds to stdin.
+ ///
+ /// i.e., The entry has depth 0 and its file name is `-`.
+ pub fn is_stdin(&self) -> bool {
+ self.dent.is_stdin()
+ }
+
+ /// Return the metadata for the file that this entry points to.
+ pub fn metadata(&self) -> Result<Metadata, Error> {
+ self.dent.metadata()
+ }
+
+ /// Return the file type for the file that this entry points to.
+ ///
+ /// This entry doesn't have a file type if it corresponds to stdin.
+ pub fn file_type(&self) -> Option<FileType> {
+ self.dent.file_type()
+ }
+
+ /// Return the file name of this entry.
+ ///
+ /// If this entry has no file name (e.g., `/`), then the full path is
+ /// returned.
+ pub fn file_name(&self) -> &OsStr {
+ self.dent.file_name()
+ }
+
+ /// Returns the depth at which this entry was created relative to the root.
+ pub fn depth(&self) -> usize {
+ self.dent.depth()
+ }
+
+ /// Returns the underlying inode number if one exists.
+ ///
+ /// If this entry doesn't have an inode number, then `None` is returned.
+ #[cfg(unix)]
+ pub fn ino(&self) -> Option<u64> {
+ self.dent.ino()
+ }
+
+ /// Returns an error, if one exists, associated with processing this entry.
+ ///
+ /// An example of an error is one that occurred while parsing an ignore
+ /// file. Errors related to traversing a directory tree itself are reported
+ /// as part of yielding the directory entry, and not with this method.
+ pub fn error(&self) -> Option<&Error> {
+ self.err.as_ref()
+ }
+
+ /// Returns true if and only if this entry points to a directory.
+ pub(crate) fn is_dir(&self) -> bool {
+ self.dent.is_dir()
+ }
+
+ fn new_stdin() -> DirEntry {
+ DirEntry { dent: DirEntryInner::Stdin, err: None }
+ }
+
+ fn new_walkdir(dent: walkdir::DirEntry, err: Option<Error>) -> DirEntry {
+ DirEntry { dent: DirEntryInner::Walkdir(dent), err: err }
+ }
+
+ fn new_raw(dent: DirEntryRaw, err: Option<Error>) -> DirEntry {
+ DirEntry { dent: DirEntryInner::Raw(dent), err: err }
+ }
+}
+
+/// DirEntryInner is the implementation of DirEntry.
+///
+/// It specifically represents three distinct sources of directory entries:
+///
+/// 1. From the walkdir crate.
+/// 2. Special entries that represent things like stdin.
+/// 3. From a path.
+///
+/// Specifically, (3) has to essentially re-create the DirEntry implementation
+/// from WalkDir.
+#[derive(Clone, Debug)]
+enum DirEntryInner {
+ Stdin,
+ Walkdir(walkdir::DirEntry),
+ Raw(DirEntryRaw),
+}
+
+impl DirEntryInner {
+ fn path(&self) -> &Path {
+ use self::DirEntryInner::*;
+ match *self {
+ Stdin => Path::new("<stdin>"),
+ Walkdir(ref x) => x.path(),
+ Raw(ref x) => x.path(),
+ }
+ }
+
+ fn into_path(self) -> PathBuf {
+ use self::DirEntryInner::*;
+ match self {
+ Stdin => PathBuf::from("<stdin>"),
+ Walkdir(x) => x.into_path(),
+ Raw(x) => x.into_path(),
+ }
+ }
+
+ fn path_is_symlink(&self) -> bool {
+ use self::DirEntryInner::*;
+ match *self {
+ Stdin => false,
+ Walkdir(ref x) => x.path_is_symlink(),
+ Raw(ref x) => x.path_is_symlink(),
+ }
+ }
+
+ fn is_stdin(&self) -> bool {
+ match *self {
+ DirEntryInner::Stdin => true,
+ _ => false,
+ }
+ }
+
+ fn metadata(&self) -> Result<Metadata, Error> {
+ use self::DirEntryInner::*;
+ match *self {
+ Stdin => {
+ let err = Error::Io(io::Error::new(
+ io::ErrorKind::Other,
+ "<stdin> has no metadata",
+ ));
+ Err(err.with_path("<stdin>"))
+ }
+ Walkdir(ref x) => x.metadata().map_err(|err| {
+ Error::Io(io::Error::from(err)).with_path(x.path())
+ }),
+ Raw(ref x) => x.metadata(),
+ }
+ }
+
+ fn file_type(&self) -> Option<FileType> {
+ use self::DirEntryInner::*;
+ match *self {
+ Stdin => None,
+ Walkdir(ref x) => Some(x.file_type()),
+ Raw(ref x) => Some(x.file_type()),
+ }
+ }
+
+ fn file_name(&self) -> &OsStr {
+ use self::DirEntryInner::*;
+ match *self {
+ Stdin => OsStr::new("<stdin>"),
+ Walkdir(ref x) => x.file_name(),
+ Raw(ref x) => x.file_name(),
+ }
+ }
+
+ fn depth(&self) -> usize {
+ use self::DirEntryInner::*;
+ match *self {
+ Stdin => 0,
+ Walkdir(ref x) => x.depth(),
+ Raw(ref x) => x.depth(),
+ }
+ }
+
+ #[cfg(unix)]
+ fn ino(&self) -> Option<u64> {
+ use self::DirEntryInner::*;
+ use walkdir::DirEntryExt;
+ match *self {
+ Stdin => None,
+ Walkdir(ref x) => Some(x.ino()),
+ Raw(ref x) => Some(x.ino()),
+ }
+ }
+
+ /// Returns true if and only if this entry points to a directory.
+ fn is_dir(&self) -> bool {
+ self.file_type().map(|ft| ft.is_dir()).unwrap_or(false)
+ }
+}
+
+/// DirEntryRaw is essentially copied from the walkdir crate so that we can
+/// build `DirEntry`s from whole cloth in the parallel iterator.
+#[derive(Clone)]
+struct DirEntryRaw {
+ /// The path as reported by the `fs::ReadDir` iterator (even if it's a
+ /// symbolic link).
+ path: PathBuf,
+ /// The file type. Necessary for recursive iteration, so store it.
+ ty: FileType,
+ /// Is set when this entry was created from a symbolic link and the user
+ /// expects the iterator to follow symbolic links.
+ follow_link: bool,
+ /// The depth at which this entry was generated relative to the root.
+ depth: usize,
+ /// The underlying inode number (Unix only).
+ #[cfg(unix)]
+ ino: u64,
+ /// The underlying metadata (Windows only). We store this on Windows
+ /// because this comes for free while reading a directory.
+ #[cfg(windows)]
+ metadata: fs::Metadata,
+}
+
+impl fmt::Debug for DirEntryRaw {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ // Leaving out FileType because it doesn't have a debug impl
+ // in Rust 1.9. We could add it if we really wanted to by manually
+ // querying each possibly file type. Meh. ---AG
+ f.debug_struct("DirEntryRaw")
+ .field("path", &self.path)
+ .field("follow_link", &self.follow_link)
+ .field("depth", &self.depth)
+ .finish()
+ }
+}
+
+impl DirEntryRaw {
+ fn path(&self) -> &Path {
+ &self.path
+ }
+
+ fn into_path(self) -> PathBuf {
+ self.path
+ }
+
+ fn path_is_symlink(&self) -> bool {
+ self.ty.is_symlink() || self.follow_link
+ }
+
+ fn metadata(&self) -> Result<Metadata, Error> {
+ self.metadata_internal()
+ }
+
+ #[cfg(windows)]
+ fn metadata_internal(&self) -> Result<fs::Metadata, Error> {
+ if self.follow_link {
+ fs::metadata(&self.path)
+ } else {
+ Ok(self.metadata.clone())
+ }
+ .map_err(|err| Error::Io(io::Error::from(err)).with_path(&self.path))
+ }
+
+ #[cfg(not(windows))]
+ fn metadata_internal(&self) -> Result<fs::Metadata, Error> {
+ if self.follow_link {
+ fs::metadata(&self.path)
+ } else {
+ fs::symlink_metadata(&self.path)
+ }
+ .map_err(|err| Error::Io(io::Error::from(err)).with_path(&self.path))
+ }
+
+ fn file_type(&self) -> FileType {
+ self.ty
+ }
+
+ fn file_name(&self) -> &OsStr {
+ self.path.file_name().unwrap_or_else(|| self.path.as_os_str())
+ }
+
+ fn depth(&self) -> usize {
+ self.depth
+ }
+
+ #[cfg(unix)]
+ fn ino(&self) -> u64 {
+ self.ino
+ }
+
+ fn from_entry(
+ depth: usize,
+ ent: &fs::DirEntry,
+ ) -> Result<DirEntryRaw, Error> {
+ let ty = ent.file_type().map_err(|err| {
+ let err = Error::Io(io::Error::from(err)).with_path(ent.path());
+ Error::WithDepth { depth: depth, err: Box::new(err) }
+ })?;
+ DirEntryRaw::from_entry_os(depth, ent, ty)
+ }
+
+ #[cfg(windows)]
+ fn from_entry_os(
+ depth: usize,
+ ent: &fs::DirEntry,
+ ty: fs::FileType,
+ ) -> Result<DirEntryRaw, Error> {
+ let md = ent.metadata().map_err(|err| {
+ let err = Error::Io(io::Error::from(err)).with_path(ent.path());
+ Error::WithDepth { depth: depth, err: Box::new(err) }
+ })?;
+ Ok(DirEntryRaw {
+ path: ent.path(),
+ ty: ty,
+ follow_link: false,
+ depth: depth,
+ metadata: md,
+ })
+ }
+
+ #[cfg(unix)]
+ fn from_entry_os(
+ depth: usize,
+ ent: &fs::DirEntry,
+ ty: fs::FileType,
+ ) -> Result<DirEntryRaw, Error> {
+ use std::os::unix::fs::DirEntryExt;
+
+ Ok(DirEntryRaw {
+ path: ent.path(),
+ ty: ty,
+ follow_link: false,
+ depth: depth,
+ ino: ent.ino(),
+ })
+ }
+
+ // Placeholder implementation to allow compiling on non-standard platforms
+ // (e.g. wasm32).
+ #[cfg(not(any(windows, unix)))]
+ fn from_entry_os(
+ depth: usize,
+ ent: &fs::DirEntry,
+ ty: fs::FileType,
+ ) -> Result<DirEntryRaw, Error> {
+ Err(Error::Io(io::Error::new(
+ io::ErrorKind::Other,
+ "unsupported platform",
+ )))
+ }
+
+ #[cfg(windows)]
+ fn from_path(
+ depth: usize,
+ pb: PathBuf,
+ link: bool,
+ ) -> Result<DirEntryRaw, Error> {
+ let md =
+ fs::metadata(&pb).map_err(|err| Error::Io(err).with_path(&pb))?;
+ Ok(DirEntryRaw {
+ path: pb,
+ ty: md.file_type(),
+ follow_link: link,
+ depth: depth,
+ metadata: md,
+ })
+ }
+
+ #[cfg(unix)]
+ fn from_path(
+ depth: usize,
+ pb: PathBuf,
+ link: bool,
+ ) -> Result<DirEntryRaw, Error> {
+ use std::os::unix::fs::MetadataExt;
+
+ let md =
+ fs::metadata(&pb).map_err(|err| Error::Io(err).with_path(&pb))?;
+ Ok(DirEntryRaw {
+ path: pb,
+ ty: md.file_type(),
+ follow_link: link,
+ depth: depth,
+ ino: md.ino(),
+ })
+ }
+
+ // Placeholder implementation to allow compiling on non-standard platforms
+ // (e.g. wasm32).
+ #[cfg(not(any(windows, unix)))]
+ fn from_path(
+ depth: usize,
+ pb: PathBuf,
+ link: bool,
+ ) -> Result<DirEntryRaw, Error> {
+ Err(Error::Io(io::Error::new(
+ io::ErrorKind::Other,
+ "unsupported platform",
+ )))
+ }
+}
+
+/// WalkBuilder builds a recursive directory iterator.
+///
+/// The builder supports a large number of configurable options. This includes
+/// specific glob overrides, file type matching, toggling whether hidden
+/// files are ignored or not, and of course, support for respecting gitignore
+/// files.
+///
+/// By default, all ignore files found are respected. This includes `.ignore`,
+/// `.gitignore`, `.git/info/exclude` and even your global gitignore
+/// globs, usually found in `$XDG_CONFIG_HOME/git/ignore`.
+///
+/// Some standard recursive directory options are also supported, such as
+/// limiting the recursive depth or whether to follow symbolic links (disabled
+/// by default).
+///
+/// # Ignore rules
+///
+/// There are many rules that influence whether a particular file or directory
+/// is skipped by this iterator. Those rules are documented here. Note that
+/// the rules assume a default configuration.
+///
+/// * First, glob overrides are checked. If a path matches a glob override,
+/// then matching stops. The path is then only skipped if the glob that matched
+/// the path is an ignore glob. (An override glob is a whitelist glob unless it
+/// starts with a `!`, in which case it is an ignore glob.)
+/// * Second, ignore files are checked. Ignore files currently only come from
+/// git ignore files (`.gitignore`, `.git/info/exclude` and the configured
+/// global gitignore file), plain `.ignore` files, which have the same format
+/// as gitignore files, or explicitly added ignore files. The precedence order
+/// is: `.ignore`, `.gitignore`, `.git/info/exclude`, global gitignore and
+/// finally explicitly added ignore files. Note that precedence between
+/// different types of ignore files is not impacted by the directory hierarchy;
+/// any `.ignore` file overrides all `.gitignore` files. Within each precedence
+/// level, more nested ignore files have a higher precedence than less nested
+/// ignore files.
+/// * Third, if the previous step yields an ignore match, then all matching
+/// is stopped and the path is skipped. If it yields a whitelist match, then
+/// matching continues. A whitelist match can be overridden by a later matcher.
+/// * Fourth, unless the path is a directory, the file type matcher is run on
+/// the path. As above, if it yields an ignore match, then all matching is
+/// stopped and the path is skipped. If it yields a whitelist match, then
+/// matching continues.
+/// * Fifth, if the path hasn't been whitelisted and it is hidden, then the
+/// path is skipped.
+/// * Sixth, unless the path is a directory, the size of the file is compared
+/// against the max filesize limit. If it exceeds the limit, it is skipped.
+/// * Seventh, if the path has made it this far then it is yielded in the
+/// iterator.
+#[derive(Clone)]
+pub struct WalkBuilder {
+ paths: Vec<PathBuf>,
+ ig_builder: IgnoreBuilder,
+ max_depth: Option<usize>,
+ max_filesize: Option<u64>,
+ follow_links: bool,
+ same_file_system: bool,
+ sorter: Option<Sorter>,
+ threads: usize,
+ skip: Option<Arc<Handle>>,
+ filter: Option<Filter>,
+}
+
+#[derive(Clone)]
+enum Sorter {
+ ByName(
+ Arc<dyn Fn(&OsStr, &OsStr) -> cmp::Ordering + Send + Sync + 'static>,
+ ),
+ ByPath(Arc<dyn Fn(&Path, &Path) -> cmp::Ordering + Send + Sync + 'static>),
+}
+
+#[derive(Clone)]
+struct Filter(Arc<dyn Fn(&DirEntry) -> bool + Send + Sync + 'static>);
+
+impl fmt::Debug for WalkBuilder {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ f.debug_struct("WalkBuilder")
+ .field("paths", &self.paths)
+ .field("ig_builder", &self.ig_builder)
+ .field("max_depth", &self.max_depth)
+ .field("max_filesize", &self.max_filesize)
+ .field("follow_links", &self.follow_links)
+ .field("threads", &self.threads)
+ .field("skip", &self.skip)
+ .finish()
+ }
+}
+
+impl WalkBuilder {
+ /// Create a new builder for a recursive directory iterator for the
+ /// directory given.
+ ///
+ /// Note that if you want to traverse multiple different directories, it
+ /// is better to call `add` on this builder than to create multiple
+ /// `Walk` values.
+ pub fn new<P: AsRef<Path>>(path: P) -> WalkBuilder {
+ WalkBuilder {
+ paths: vec![path.as_ref().to_path_buf()],
+ ig_builder: IgnoreBuilder::new(),
+ max_depth: None,
+ max_filesize: None,
+ follow_links: false,
+ same_file_system: false,
+ sorter: None,
+ threads: 0,
+ skip: None,
+ filter: None,
+ }
+ }
+
+ /// Build a new `Walk` iterator.
+ pub fn build(&self) -> Walk {
+ let follow_links = self.follow_links;
+ let max_depth = self.max_depth;
+ let sorter = self.sorter.clone();
+ let its = self
+ .paths
+ .iter()
+ .map(move |p| {
+ if p == Path::new("-") {
+ (p.to_path_buf(), None)
+ } else {
+ let mut wd = WalkDir::new(p);
+ wd = wd.follow_links(follow_links || p.is_file());
+ wd = wd.same_file_system(self.same_file_system);
+ if let Some(max_depth) = max_depth {
+ wd = wd.max_depth(max_depth);
+ }
+ if let Some(ref sorter) = sorter {
+ match sorter.clone() {
+ Sorter::ByName(cmp) => {
+ wd = wd.sort_by(move |a, b| {
+ cmp(a.file_name(), b.file_name())
+ });
+ }
+ Sorter::ByPath(cmp) => {
+ wd = wd.sort_by(move |a, b| {
+ cmp(a.path(), b.path())
+ });
+ }
+ }
+ }
+ (p.to_path_buf(), Some(WalkEventIter::from(wd)))
+ }
+ })
+ .collect::<Vec<_>>()
+ .into_iter();
+ let ig_root = self.ig_builder.build();
+ Walk {
+ its: its,
+ it: None,
+ ig_root: ig_root.clone(),
+ ig: ig_root.clone(),
+ max_filesize: self.max_filesize,
+ skip: self.skip.clone(),
+ filter: self.filter.clone(),
+ }
+ }
+
+ /// Build a new `WalkParallel` iterator.
+ ///
+ /// Note that this *doesn't* return something that implements `Iterator`.
+ /// Instead, the returned value must be run with a closure. e.g.,
+ /// `builder.build_parallel().run(|| |path| println!("{:?}", path))`.
+ pub fn build_parallel(&self) -> WalkParallel {
+ WalkParallel {
+ paths: self.paths.clone().into_iter(),
+ ig_root: self.ig_builder.build(),
+ max_depth: self.max_depth,
+ max_filesize: self.max_filesize,
+ follow_links: self.follow_links,
+ same_file_system: self.same_file_system,
+ threads: self.threads,
+ skip: self.skip.clone(),
+ filter: self.filter.clone(),
+ }
+ }
+
+ /// Add a file path to the iterator.
+ ///
+ /// Each additional file path added is traversed recursively. This should
+ /// be preferred over building multiple `Walk` iterators since this
+ /// enables reusing resources across iteration.
+ pub fn add<P: AsRef<Path>>(&mut self, path: P) -> &mut WalkBuilder {
+ self.paths.push(path.as_ref().to_path_buf());
+ self
+ }
+
+ /// The maximum depth to recurse.
+ ///
+ /// The default, `None`, imposes no depth restriction.
+ pub fn max_depth(&mut self, depth: Option<usize>) -> &mut WalkBuilder {
+ self.max_depth = depth;
+ self
+ }
+
+ /// Whether to follow symbolic links or not.
+ pub fn follow_links(&mut self, yes: bool) -> &mut WalkBuilder {
+ self.follow_links = yes;
+ self
+ }
+
+ /// Whether to ignore files above the specified limit.
+ pub fn max_filesize(&mut self, filesize: Option<u64>) -> &mut WalkBuilder {
+ self.max_filesize = filesize;
+ self
+ }
+
+ /// The number of threads to use for traversal.
+ ///
+ /// Note that this only has an effect when using `build_parallel`.
+ ///
+ /// The default setting is `0`, which chooses the number of threads
+ /// automatically using heuristics.
+ pub fn threads(&mut self, n: usize) -> &mut WalkBuilder {
+ self.threads = n;
+ self
+ }
+
+ /// Add a global ignore file to the matcher.
+ ///
+ /// This has lower precedence than all other sources of ignore rules.
+ ///
+ /// If there was a problem adding the ignore file, then an error is
+ /// returned. Note that the error may indicate *partial* failure. For
+ /// example, if an ignore file contains an invalid glob, all other globs
+ /// are still applied.
+ pub fn add_ignore<P: AsRef<Path>>(&mut self, path: P) -> Option<Error> {
+ let mut builder = GitignoreBuilder::new("");
+ let mut errs = PartialErrorBuilder::default();
+ errs.maybe_push(builder.add(path));
+ match builder.build() {
+ Ok(gi) => {
+ self.ig_builder.add_ignore(gi);
+ }
+ Err(err) => {
+ errs.push(err);
+ }
+ }
+ errs.into_error_option()
+ }
+
+ /// Add a custom ignore file name
+ ///
+ /// These ignore files have higher precedence than all other ignore files.
+ ///
+ /// When specifying multiple names, earlier names have lower precedence than
+ /// later names.
+ pub fn add_custom_ignore_filename<S: AsRef<OsStr>>(
+ &mut self,
+ file_name: S,
+ ) -> &mut WalkBuilder {
+ self.ig_builder.add_custom_ignore_filename(file_name);
+ self
+ }
+
+ /// Add an override matcher.
+ ///
+ /// By default, no override matcher is used.
+ ///
+ /// This overrides any previous setting.
+ pub fn overrides(&mut self, overrides: Override) -> &mut WalkBuilder {
+ self.ig_builder.overrides(overrides);
+ self
+ }
+
+ /// Add a file type matcher.
+ ///
+ /// By default, no file type matcher is used.
+ ///
+ /// This overrides any previous setting.
+ pub fn types(&mut self, types: Types) -> &mut WalkBuilder {
+ self.ig_builder.types(types);
+ self
+ }
+
+ /// Enables all the standard ignore filters.
+ ///
+ /// This toggles, as a group, all the filters that are enabled by default:
+ ///
+ /// - [hidden()](#method.hidden)
+ /// - [parents()](#method.parents)
+ /// - [ignore()](#method.ignore)
+ /// - [git_ignore()](#method.git_ignore)
+ /// - [git_global()](#method.git_global)
+ /// - [git_exclude()](#method.git_exclude)
+ ///
+ /// They may still be toggled individually after calling this function.
+ ///
+ /// This is (by definition) enabled by default.
+ pub fn standard_filters(&mut self, yes: bool) -> &mut WalkBuilder {
+ self.hidden(yes)
+ .parents(yes)
+ .ignore(yes)
+ .git_ignore(yes)
+ .git_global(yes)
+ .git_exclude(yes)
+ }
+
+ /// Enables ignoring hidden files.
+ ///
+ /// This is enabled by default.
+ pub fn hidden(&mut self, yes: bool) -> &mut WalkBuilder {
+ self.ig_builder.hidden(yes);
+ self
+ }
+
+ /// Enables reading ignore files from parent directories.
+ ///
+ /// If this is enabled, then .gitignore files in parent directories of each
+ /// file path given are respected. Otherwise, they are ignored.
+ ///
+ /// This is enabled by default.
+ pub fn parents(&mut self, yes: bool) -> &mut WalkBuilder {
+ self.ig_builder.parents(yes);
+ self
+ }
+
+ /// Enables reading `.ignore` files.
+ ///
+ /// `.ignore` files have the same semantics as `gitignore` files and are
+ /// supported by search tools such as ripgrep and The Silver Searcher.
+ ///
+ /// This is enabled by default.
+ pub fn ignore(&mut self, yes: bool) -> &mut WalkBuilder {
+ self.ig_builder.ignore(yes);
+ self
+ }
+
+ /// Enables reading a global gitignore file, whose path is specified in
+ /// git's `core.excludesFile` config option.
+ ///
+ /// Git's config file location is `$HOME/.gitconfig`. If `$HOME/.gitconfig`
+ /// does not exist or does not specify `core.excludesFile`, then
+ /// `$XDG_CONFIG_HOME/git/ignore` is read. If `$XDG_CONFIG_HOME` is not
+ /// set or is empty, then `$HOME/.config/git/ignore` is used instead.
+ ///
+ /// This is enabled by default.
+ pub fn git_global(&mut self, yes: bool) -> &mut WalkBuilder {
+ self.ig_builder.git_global(yes);
+ self
+ }
+
+ /// Enables reading `.gitignore` files.
+ ///
+ /// `.gitignore` files have match semantics as described in the `gitignore`
+ /// man page.
+ ///
+ /// This is enabled by default.
+ pub fn git_ignore(&mut self, yes: bool) -> &mut WalkBuilder {
+ self.ig_builder.git_ignore(yes);
+ self
+ }
+
+ /// Enables reading `.git/info/exclude` files.
+ ///
+ /// `.git/info/exclude` files have match semantics as described in the
+ /// `gitignore` man page.
+ ///
+ /// This is enabled by default.
+ pub fn git_exclude(&mut self, yes: bool) -> &mut WalkBuilder {
+ self.ig_builder.git_exclude(yes);
+ self
+ }
+
+ /// Whether a git repository is required to apply git-related ignore
+ /// rules (global rules, .gitignore and local exclude rules).
+ ///
+ /// When disabled, git-related ignore rules are applied even when searching
+ /// outside a git repository.
+ pub fn require_git(&mut self, yes: bool) -> &mut WalkBuilder {
+ self.ig_builder.require_git(yes);
+ self
+ }
+
+ /// Process ignore files case insensitively
+ ///
+ /// This is disabled by default.
+ pub fn ignore_case_insensitive(&mut self, yes: bool) -> &mut WalkBuilder {
+ self.ig_builder.ignore_case_insensitive(yes);
+ self
+ }
+
+ /// Set a function for sorting directory entries by their path.
+ ///
+ /// If a compare function is set, the resulting iterator will return all
+ /// paths in sorted order. The compare function will be called to compare
+ /// entries from the same directory.
+ ///
+ /// This is like `sort_by_file_name`, except the comparator accepts
+ /// a `&Path` instead of the base file name, which permits it to sort by
+ /// more criteria.
+ ///
+ /// This method will override any previous sorter set by this method or
+ /// by `sort_by_file_name`.
+ ///
+ /// Note that this is not used in the parallel iterator.
+ pub fn sort_by_file_path<F>(&mut self, cmp: F) -> &mut WalkBuilder
+ where
+ F: Fn(&Path, &Path) -> cmp::Ordering + Send + Sync + 'static,
+ {
+ self.sorter = Some(Sorter::ByPath(Arc::new(cmp)));
+ self
+ }
+
+ /// Set a function for sorting directory entries by file name.
+ ///
+ /// If a compare function is set, the resulting iterator will return all
+ /// paths in sorted order. The compare function will be called to compare
+ /// names from entries from the same directory using only the name of the
+ /// entry.
+ ///
+ /// This method will override any previous sorter set by this method or
+ /// by `sort_by_file_path`.
+ ///
+ /// Note that this is not used in the parallel iterator.
+ pub fn sort_by_file_name<F>(&mut self, cmp: F) -> &mut WalkBuilder
+ where
+ F: Fn(&OsStr, &OsStr) -> cmp::Ordering + Send + Sync + 'static,
+ {
+ self.sorter = Some(Sorter::ByName(Arc::new(cmp)));
+ self
+ }
+
+ /// Do not cross file system boundaries.
+ ///
+ /// When this option is enabled, directory traversal will not descend into
+ /// directories that are on a different file system from the root path.
+ ///
+ /// Currently, this option is only supported on Unix and Windows. If this
+ /// option is used on an unsupported platform, then directory traversal
+ /// will immediately return an error and will not yield any entries.
+ pub fn same_file_system(&mut self, yes: bool) -> &mut WalkBuilder {
+ self.same_file_system = yes;
+ self
+ }
+
+ /// Do not yield directory entries that are believed to correspond to
+ /// stdout.
+ ///
+ /// This is useful when a command is invoked via shell redirection to a
+ /// file that is also being read. For example, `grep -r foo ./ > results`
+ /// might end up trying to search `results` even though it is also writing
+ /// to it, which could cause an unbounded feedback loop. Setting this
+ /// option prevents this from happening by skipping over the `results`
+ /// file.
+ ///
+ /// This is disabled by default.
+ pub fn skip_stdout(&mut self, yes: bool) -> &mut WalkBuilder {
+ if yes {
+ self.skip = stdout_handle().map(Arc::new);
+ } else {
+ self.skip = None;
+ }
+ self
+ }
+
+ /// Yields only entries which satisfy the given predicate and skips
+ /// descending into directories that do not satisfy the given predicate.
+ ///
+ /// The predicate is applied to all entries. If the predicate is
+ /// true, iteration carries on as normal. If the predicate is false, the
+ /// entry is ignored and if it is a directory, it is not descended into.
+ ///
+ /// Note that the errors for reading entries that may not satisfy the
+ /// predicate will still be yielded.
+ pub fn filter_entry<P>(&mut self, filter: P) -> &mut WalkBuilder
+ where
+ P: Fn(&DirEntry) -> bool + Send + Sync + 'static,
+ {
+ self.filter = Some(Filter(Arc::new(filter)));
+ self
+ }
+}
+
+/// Walk is a recursive directory iterator over file paths in one or more
+/// directories.
+///
+/// Only file and directory paths matching the rules are returned. By default,
+/// ignore files like `.gitignore` are respected. The precise matching rules
+/// and precedence is explained in the documentation for `WalkBuilder`.
+pub struct Walk {
+ its: vec::IntoIter<(PathBuf, Option<WalkEventIter>)>,
+ it: Option<WalkEventIter>,
+ ig_root: Ignore,
+ ig: Ignore,
+ max_filesize: Option<u64>,
+ skip: Option<Arc<Handle>>,
+ filter: Option<Filter>,
+}
+
+impl Walk {
+ /// Creates a new recursive directory iterator for the file path given.
+ ///
+ /// Note that this uses default settings, which include respecting
+ /// `.gitignore` files. To configure the iterator, use `WalkBuilder`
+ /// instead.
+ pub fn new<P: AsRef<Path>>(path: P) -> Walk {
+ WalkBuilder::new(path).build()
+ }
+
+ fn skip_entry(&self, ent: &DirEntry) -> Result<bool, Error> {
+ if ent.depth() == 0 {
+ return Ok(false);
+ }
+ // We ensure that trivial skipping is done before any other potentially
+ // expensive operations (stat, filesystem other) are done. This seems
+ // like an obvious optimization but becomes critical when filesystem
+ // operations even as simple as stat can result in significant
+ // overheads; an example of this was a bespoke filesystem layer in
+ // Windows that hosted files remotely and would download them on-demand
+ // when particular filesystem operations occurred. Users of this system
+ // who ensured correct file-type filters were being used could still
+ // get unnecessary file access resulting in large downloads.
+ if should_skip_entry(&self.ig, ent) {
+ return Ok(true);
+ }
+ if let Some(ref stdout) = self.skip {
+ if path_equals(ent, stdout)? {
+ return Ok(true);
+ }
+ }
+ if self.max_filesize.is_some() && !ent.is_dir() {
+ return Ok(skip_filesize(
+ self.max_filesize.unwrap(),
+ ent.path(),
+ &ent.metadata().ok(),
+ ));
+ }
+ if let Some(Filter(filter)) = &self.filter {
+ if !filter(ent) {
+ return Ok(true);
+ }
+ }
+ Ok(false)
+ }
+}
+
+impl Iterator for Walk {
+ type Item = Result<DirEntry, Error>;
+
+ #[inline(always)]
+ fn next(&mut self) -> Option<Result<DirEntry, Error>> {
+ loop {
+ let ev = match self.it.as_mut().and_then(|it| it.next()) {
+ Some(ev) => ev,
+ None => {
+ match self.its.next() {
+ None => return None,
+ Some((_, None)) => {
+ return Some(Ok(DirEntry::new_stdin()));
+ }
+ Some((path, Some(it))) => {
+ self.it = Some(it);
+ if path.is_dir() {
+ let (ig, err) = self.ig_root.add_parents(path);
+ self.ig = ig;
+ if let Some(err) = err {
+ return Some(Err(err));
+ }
+ } else {
+ self.ig = self.ig_root.clone();
+ }
+ }
+ }
+ continue;
+ }
+ };
+ match ev {
+ Err(err) => {
+ return Some(Err(Error::from_walkdir(err)));
+ }
+ Ok(WalkEvent::Exit) => {
+ self.ig = self.ig.parent().unwrap();
+ }
+ Ok(WalkEvent::Dir(ent)) => {
+ let mut ent = DirEntry::new_walkdir(ent, None);
+ let should_skip = match self.skip_entry(&ent) {
+ Err(err) => return Some(Err(err)),
+ Ok(should_skip) => should_skip,
+ };
+ if should_skip {
+ self.it.as_mut().unwrap().it.skip_current_dir();
+ // Still need to push this on the stack because
+ // we'll get a WalkEvent::Exit event for this dir.
+ // We don't care if it errors though.
+ let (igtmp, _) = self.ig.add_child(ent.path());
+ self.ig = igtmp;
+ continue;
+ }
+ let (igtmp, err) = self.ig.add_child(ent.path());
+ self.ig = igtmp;
+ ent.err = err;
+ return Some(Ok(ent));
+ }
+ Ok(WalkEvent::File(ent)) => {
+ let ent = DirEntry::new_walkdir(ent, None);
+ let should_skip = match self.skip_entry(&ent) {
+ Err(err) => return Some(Err(err)),
+ Ok(should_skip) => should_skip,
+ };
+ if should_skip {
+ continue;
+ }
+ return Some(Ok(ent));
+ }
+ }
+ }
+ }
+}
+
+/// WalkEventIter transforms a WalkDir iterator into an iterator that more
+/// accurately describes the directory tree. Namely, it emits events that are
+/// one of three types: directory, file or "exit." An "exit" event means that
+/// the entire contents of a directory have been enumerated.
+struct WalkEventIter {
+ depth: usize,
+ it: walkdir::IntoIter,
+ next: Option<Result<walkdir::DirEntry, walkdir::Error>>,
+}
+
+#[derive(Debug)]
+enum WalkEvent {
+ Dir(walkdir::DirEntry),
+ File(walkdir::DirEntry),
+ Exit,
+}
+
+impl From<WalkDir> for WalkEventIter {
+ fn from(it: WalkDir) -> WalkEventIter {
+ WalkEventIter { depth: 0, it: it.into_iter(), next: None }
+ }
+}
+
+impl Iterator for WalkEventIter {
+ type Item = walkdir::Result<WalkEvent>;
+
+ #[inline(always)]
+ fn next(&mut self) -> Option<walkdir::Result<WalkEvent>> {
+ let dent = self.next.take().or_else(|| self.it.next());
+ let depth = match dent {
+ None => 0,
+ Some(Ok(ref dent)) => dent.depth(),
+ Some(Err(ref err)) => err.depth(),
+ };
+ if depth < self.depth {
+ self.depth -= 1;
+ self.next = dent;
+ return Some(Ok(WalkEvent::Exit));
+ }
+ self.depth = depth;
+ match dent {
+ None => None,
+ Some(Err(err)) => Some(Err(err)),
+ Some(Ok(dent)) => {
+ if walkdir_is_dir(&dent) {
+ self.depth += 1;
+ Some(Ok(WalkEvent::Dir(dent)))
+ } else {
+ Some(Ok(WalkEvent::File(dent)))
+ }
+ }
+ }
+ }
+}
+
+/// WalkState is used in the parallel recursive directory iterator to indicate
+/// whether walking should continue as normal, skip descending into a
+/// particular directory or quit the walk entirely.
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum WalkState {
+ /// Continue walking as normal.
+ Continue,
+ /// If the directory entry given is a directory, don't descend into it.
+ /// In all other cases, this has no effect.
+ Skip,
+ /// Quit the entire iterator as soon as possible.
+ ///
+ /// Note that this is an inherently asynchronous action. It is possible
+ /// for more entries to be yielded even after instructing the iterator
+ /// to quit.
+ Quit,
+}
+
+impl WalkState {
+ fn is_continue(&self) -> bool {
+ *self == WalkState::Continue
+ }
+
+ fn is_quit(&self) -> bool {
+ *self == WalkState::Quit
+ }
+}
+
+/// A builder for constructing a visitor when using
+/// [`WalkParallel::visit`](struct.WalkParallel.html#method.visit). The builder
+/// will be called for each thread started by `WalkParallel`. The visitor
+/// returned from each builder is then called for every directory entry.
+pub trait ParallelVisitorBuilder<'s> {
+ /// Create per-thread `ParallelVisitor`s for `WalkParallel`.
+ fn build(&mut self) -> Box<dyn ParallelVisitor + 's>;
+}
+
+impl<'a, 's, P: ParallelVisitorBuilder<'s>> ParallelVisitorBuilder<'s>
+ for &'a mut P
+{
+ fn build(&mut self) -> Box<dyn ParallelVisitor + 's> {
+ (**self).build()
+ }
+}
+
+/// Receives files and directories for the current thread.
+///
+/// Setup for the traversal can be implemented as part of
+/// [`ParallelVisitorBuilder::build`](trait.ParallelVisitorBuilder.html#tymethod.build).
+/// Teardown when traversal finishes can be implemented by implementing the
+/// `Drop` trait on your traversal type.
+pub trait ParallelVisitor: Send {
+ /// Receives files and directories for the current thread. This is called
+ /// once for every directory entry visited by traversal.
+ fn visit(&mut self, entry: Result<DirEntry, Error>) -> WalkState;
+}
+
+struct FnBuilder<F> {
+ builder: F,
+}
+
+impl<'s, F: FnMut() -> FnVisitor<'s>> ParallelVisitorBuilder<'s>
+ for FnBuilder<F>
+{
+ fn build(&mut self) -> Box<dyn ParallelVisitor + 's> {
+ let visitor = (self.builder)();
+ Box::new(FnVisitorImp { visitor })
+ }
+}
+
+type FnVisitor<'s> =
+ Box<dyn FnMut(Result<DirEntry, Error>) -> WalkState + Send + 's>;
+
+struct FnVisitorImp<'s> {
+ visitor: FnVisitor<'s>,
+}
+
+impl<'s> ParallelVisitor for FnVisitorImp<'s> {
+ fn visit(&mut self, entry: Result<DirEntry, Error>) -> WalkState {
+ (self.visitor)(entry)
+ }
+}
+
+/// WalkParallel is a parallel recursive directory iterator over files paths
+/// in one or more directories.
+///
+/// Only file and directory paths matching the rules are returned. By default,
+/// ignore files like `.gitignore` are respected. The precise matching rules
+/// and precedence is explained in the documentation for `WalkBuilder`.
+///
+/// Unlike `Walk`, this uses multiple threads for traversing a directory.
+pub struct WalkParallel {
+ paths: vec::IntoIter<PathBuf>,
+ ig_root: Ignore,
+ max_filesize: Option<u64>,
+ max_depth: Option<usize>,
+ follow_links: bool,
+ same_file_system: bool,
+ threads: usize,
+ skip: Option<Arc<Handle>>,
+ filter: Option<Filter>,
+}
+
+impl WalkParallel {
+ /// Execute the parallel recursive directory iterator. `mkf` is called
+ /// for each thread used for iteration. The function produced by `mkf`
+ /// is then in turn called for each visited file path.
+ pub fn run<'s, F>(self, mkf: F)
+ where
+ F: FnMut() -> FnVisitor<'s>,
+ {
+ self.visit(&mut FnBuilder { builder: mkf })
+ }
+
+ /// Execute the parallel recursive directory iterator using a custom
+ /// visitor.
+ ///
+ /// The builder given is used to construct a visitor for every thread
+ /// used by this traversal. The visitor returned from each builder is then
+ /// called for every directory entry seen by that thread.
+ ///
+ /// Typically, creating a custom visitor is useful if you need to perform
+ /// some kind of cleanup once traversal is finished. This can be achieved
+ /// by implementing `Drop` for your builder (or for your visitor, if you
+ /// want to execute cleanup for every thread that is launched).
+ ///
+ /// For example, each visitor might build up a data structure of results
+ /// corresponding to the directory entries seen for each thread. Since each
+ /// visitor runs on only one thread, this build-up can be done without
+ /// synchronization. Then, once traversal is complete, all of the results
+ /// can be merged together into a single data structure.
+ pub fn visit(mut self, builder: &mut dyn ParallelVisitorBuilder<'_>) {
+ let threads = self.threads();
+ let stack = Arc::new(Mutex::new(vec![]));
+ {
+ let mut stack = stack.lock().unwrap();
+ let mut visitor = builder.build();
+ let mut paths = Vec::new().into_iter();
+ std::mem::swap(&mut paths, &mut self.paths);
+ // Send the initial set of root paths to the pool of workers. Note
+ // that we only send directories. For files, we send to them the
+ // callback directly.
+ for path in paths {
+ let (dent, root_device) = if path == Path::new("-") {
+ (DirEntry::new_stdin(), None)
+ } else {
+ let root_device = if !self.same_file_system {
+ None
+ } else {
+ match device_num(&path) {
+ Ok(root_device) => Some(root_device),
+ Err(err) => {
+ let err = Error::Io(err).with_path(path);
+ if visitor.visit(Err(err)).is_quit() {
+ return;
+ }
+ continue;
+ }
+ }
+ };
+ match DirEntryRaw::from_path(0, path, false) {
+ Ok(dent) => {
+ (DirEntry::new_raw(dent, None), root_device)
+ }
+ Err(err) => {
+ if visitor.visit(Err(err)).is_quit() {
+ return;
+ }
+ continue;
+ }
+ }
+ };
+ stack.push(Message::Work(Work {
+ dent: dent,
+ ignore: self.ig_root.clone(),
+ root_device: root_device,
+ }));
+ }
+ // ... but there's no need to start workers if we don't need them.
+ if stack.is_empty() {
+ return;
+ }
+ }
+ // Create the workers and then wait for them to finish.
+ let quit_now = Arc::new(AtomicBool::new(false));
+ let num_pending =
+ Arc::new(AtomicUsize::new(stack.lock().unwrap().len()));
+ std::thread::scope(|s| {
+ let mut handles = vec![];
+ for _ in 0..threads {
+ let worker = Worker {
+ visitor: builder.build(),
+ stack: stack.clone(),
+ quit_now: quit_now.clone(),
+ num_pending: num_pending.clone(),
+ max_depth: self.max_depth,
+ max_filesize: self.max_filesize,
+ follow_links: self.follow_links,
+ skip: self.skip.clone(),
+ filter: self.filter.clone(),
+ };
+ handles.push(s.spawn(|| worker.run()));
+ }
+ for handle in handles {
+ handle.join().unwrap();
+ }
+ });
+ }
+
+ fn threads(&self) -> usize {
+ if self.threads == 0 {
+ 2
+ } else {
+ self.threads
+ }
+ }
+}
+
+/// Message is the set of instructions that a worker knows how to process.
+enum Message {
+ /// A work item corresponds to a directory that should be descended into.
+ /// Work items for entries that should be skipped or ignored should not
+ /// be produced.
+ Work(Work),
+ /// This instruction indicates that the worker should quit.
+ Quit,
+}
+
+/// A unit of work for each worker to process.
+///
+/// Each unit of work corresponds to a directory that should be descended
+/// into.
+struct Work {
+ /// The directory entry.
+ dent: DirEntry,
+ /// Any ignore matchers that have been built for this directory's parents.
+ ignore: Ignore,
+ /// The root device number. When present, only files with the same device
+ /// number should be considered.
+ root_device: Option<u64>,
+}
+
+impl Work {
+ /// Returns true if and only if this work item is a directory.
+ fn is_dir(&self) -> bool {
+ self.dent.is_dir()
+ }
+
+ /// Returns true if and only if this work item is a symlink.
+ fn is_symlink(&self) -> bool {
+ self.dent.file_type().map_or(false, |ft| ft.is_symlink())
+ }
+
+ /// Adds ignore rules for parent directories.
+ ///
+ /// Note that this only applies to entries at depth 0. On all other
+ /// entries, this is a no-op.
+ fn add_parents(&mut self) -> Option<Error> {
+ if self.dent.depth() > 0 {
+ return None;
+ }
+ // At depth 0, the path of this entry is a root path, so we can
+ // use it directly to add parent ignore rules.
+ let (ig, err) = self.ignore.add_parents(self.dent.path());
+ self.ignore = ig;
+ err
+ }
+
+ /// Reads the directory contents of this work item and adds ignore
+ /// rules for this directory.
+ ///
+ /// If there was a problem with reading the directory contents, then
+ /// an error is returned. If there was a problem reading the ignore
+ /// rules for this directory, then the error is attached to this
+ /// work item's directory entry.
+ fn read_dir(&mut self) -> Result<fs::ReadDir, Error> {
+ let readdir = match fs::read_dir(self.dent.path()) {
+ Ok(readdir) => readdir,
+ Err(err) => {
+ let err = Error::from(err)
+ .with_path(self.dent.path())
+ .with_depth(self.dent.depth());
+ return Err(err);
+ }
+ };
+ let (ig, err) = self.ignore.add_child(self.dent.path());
+ self.ignore = ig;
+ self.dent.err = err;
+ Ok(readdir)
+ }
+}
+
+/// A worker is responsible for descending into directories, updating the
+/// ignore matchers, producing new work and invoking the caller's callback.
+///
+/// Note that a worker is *both* a producer and a consumer.
+struct Worker<'s> {
+ /// The caller's callback.
+ visitor: Box<dyn ParallelVisitor + 's>,
+ /// A stack of work to do.
+ ///
+ /// We use a stack instead of a channel because a stack lets us visit
+ /// directories in depth first order. This can substantially reduce peak
+ /// memory usage by keeping both the number of files path and gitignore
+ /// matchers in memory lower.
+ stack: Arc<Mutex<Vec<Message>>>,
+ /// Whether all workers should terminate at the next opportunity. Note
+ /// that we need this because we don't want other `Work` to be done after
+ /// we quit. We wouldn't need this if have a priority channel.
+ quit_now: Arc<AtomicBool>,
+ /// The number of outstanding work items.
+ num_pending: Arc<AtomicUsize>,
+ /// The maximum depth of directories to descend. A value of `0` means no
+ /// descension at all.
+ max_depth: Option<usize>,
+ /// The maximum size a searched file can be (in bytes). If a file exceeds
+ /// this size it will be skipped.
+ max_filesize: Option<u64>,
+ /// Whether to follow symbolic links or not. When this is enabled, loop
+ /// detection is performed.
+ follow_links: bool,
+ /// A file handle to skip, currently is either `None` or stdout, if it's
+ /// a file and it has been requested to skip files identical to stdout.
+ skip: Option<Arc<Handle>>,
+ /// A predicate applied to dir entries. If true, the entry and all
+ /// children will be skipped.
+ filter: Option<Filter>,
+}
+
+impl<'s> Worker<'s> {
+ /// Runs this worker until there is no more work left to do.
+ ///
+ /// The worker will call the caller's callback for all entries that aren't
+ /// skipped by the ignore matcher.
+ fn run(mut self) {
+ while let Some(work) = self.get_work() {
+ if let WalkState::Quit = self.run_one(work) {
+ self.quit_now();
+ }
+ self.work_done();
+ }
+ }
+
+ fn run_one(&mut self, mut work: Work) -> WalkState {
+ // If the work is not a directory, then we can just execute the
+ // caller's callback immediately and move on.
+ if work.is_symlink() || !work.is_dir() {
+ return self.visitor.visit(Ok(work.dent));
+ }
+ if let Some(err) = work.add_parents() {
+ let state = self.visitor.visit(Err(err));
+ if state.is_quit() {
+ return state;
+ }
+ }
+
+ let descend = if let Some(root_device) = work.root_device {
+ match is_same_file_system(root_device, work.dent.path()) {
+ Ok(true) => true,
+ Ok(false) => false,
+ Err(err) => {
+ let state = self.visitor.visit(Err(err));
+ if state.is_quit() {
+ return state;
+ }
+ false
+ }
+ }
+ } else {
+ true
+ };
+
+ // Try to read the directory first before we transfer ownership
+ // to the provided closure. Do not unwrap it immediately, though,
+ // as we may receive an `Err` value e.g. in the case when we do not
+ // have sufficient read permissions to list the directory.
+ // In that case we still want to provide the closure with a valid
+ // entry before passing the error value.
+ let readdir = work.read_dir();
+ let depth = work.dent.depth();
+ let state = self.visitor.visit(Ok(work.dent));
+ if !state.is_continue() {
+ return state;
+ }
+ if !descend {
+ return WalkState::Skip;
+ }
+
+ let readdir = match readdir {
+ Ok(readdir) => readdir,
+ Err(err) => {
+ return self.visitor.visit(Err(err));
+ }
+ };
+
+ if self.max_depth.map_or(false, |max| depth >= max) {
+ return WalkState::Skip;
+ }
+ for result in readdir {
+ let state = self.generate_work(
+ &work.ignore,
+ depth + 1,
+ work.root_device,
+ result,
+ );
+ if state.is_quit() {
+ return state;
+ }
+ }
+ WalkState::Continue
+ }
+
+ /// Decides whether to submit the given directory entry as a file to
+ /// search.
+ ///
+ /// If the entry is a path that should be ignored, then this is a no-op.
+ /// Otherwise, the entry is pushed on to the queue. (The actual execution
+ /// of the callback happens in `run_one`.)
+ ///
+ /// If an error occurs while reading the entry, then it is sent to the
+ /// caller's callback.
+ ///
+ /// `ig` is the `Ignore` matcher for the parent directory. `depth` should
+ /// be the depth of this entry. `result` should be the item yielded by
+ /// a directory iterator.
+ fn generate_work(
+ &mut self,
+ ig: &Ignore,
+ depth: usize,
+ root_device: Option<u64>,
+ result: Result<fs::DirEntry, io::Error>,
+ ) -> WalkState {
+ let fs_dent = match result {
+ Ok(fs_dent) => fs_dent,
+ Err(err) => {
+ return self
+ .visitor
+ .visit(Err(Error::from(err).with_depth(depth)));
+ }
+ };
+ let mut dent = match DirEntryRaw::from_entry(depth, &fs_dent) {
+ Ok(dent) => DirEntry::new_raw(dent, None),
+ Err(err) => {
+ return self.visitor.visit(Err(err));
+ }
+ };
+ let is_symlink = dent.file_type().map_or(false, |ft| ft.is_symlink());
+ if self.follow_links && is_symlink {
+ let path = dent.path().to_path_buf();
+ dent = match DirEntryRaw::from_path(depth, path, true) {
+ Ok(dent) => DirEntry::new_raw(dent, None),
+ Err(err) => {
+ return self.visitor.visit(Err(err));
+ }
+ };
+ if dent.is_dir() {
+ if let Err(err) = check_symlink_loop(ig, dent.path(), depth) {
+ return self.visitor.visit(Err(err));
+ }
+ }
+ }
+ // N.B. See analogous call in the single-threaded implementation about
+ // why it's important for this to come before the checks below.
+ if should_skip_entry(ig, &dent) {
+ return WalkState::Continue;
+ }
+ if let Some(ref stdout) = self.skip {
+ let is_stdout = match path_equals(&dent, stdout) {
+ Ok(is_stdout) => is_stdout,
+ Err(err) => return self.visitor.visit(Err(err)),
+ };
+ if is_stdout {
+ return WalkState::Continue;
+ }
+ }
+ let should_skip_filesize =
+ if self.max_filesize.is_some() && !dent.is_dir() {
+ skip_filesize(
+ self.max_filesize.unwrap(),
+ dent.path(),
+ &dent.metadata().ok(),
+ )
+ } else {
+ false
+ };
+ let should_skip_filtered =
+ if let Some(Filter(predicate)) = &self.filter {
+ !predicate(&dent)
+ } else {
+ false
+ };
+ if !should_skip_filesize && !should_skip_filtered {
+ self.send(Work { dent, ignore: ig.clone(), root_device });
+ }
+ WalkState::Continue
+ }
+
+ /// Returns the next directory to descend into.
+ ///
+ /// If all work has been exhausted, then this returns None. The worker
+ /// should then subsequently quit.
+ fn get_work(&mut self) -> Option<Work> {
+ let mut value = self.recv();
+ loop {
+ // Simulate a priority channel: If quit_now flag is set, we can
+ // receive only quit messages.
+ if self.is_quit_now() {
+ value = Some(Message::Quit)
+ }
+ match value {
+ Some(Message::Work(work)) => {
+ return Some(work);
+ }
+ Some(Message::Quit) => {
+ // Repeat quit message to wake up sleeping threads, if
+ // any. The domino effect will ensure that every thread
+ // will quit.
+ self.send_quit();
+ return None;
+ }
+ None => {
+ // Once num_pending reaches 0, it is impossible for it to
+ // ever increase again. Namely, it only reaches 0 once
+ // all jobs have run such that no jobs have produced more
+ // work. We have this guarantee because num_pending is
+ // always incremented before each job is submitted and only
+ // decremented once each job is completely finished.
+ // Therefore, if this reaches zero, then there can be no
+ // other job running.
+ if self.num_pending() == 0 {
+ // Every other thread is blocked at the next recv().
+ // Send the initial quit message and quit.
+ self.send_quit();
+ return None;
+ }
+ // Wait for next `Work` or `Quit` message.
+ loop {
+ if let Some(v) = self.recv() {
+ value = Some(v);
+ break;
+ }
+ // Our stack isn't blocking. Instead of burning the
+ // CPU waiting, we let the thread sleep for a bit. In
+ // general, this tends to only occur once the search is
+ // approaching termination.
+ thread::sleep(Duration::from_millis(1));
+ }
+ }
+ }
+ }
+ }
+
+ /// Indicates that all workers should quit immediately.
+ fn quit_now(&self) {
+ self.quit_now.store(true, Ordering::SeqCst);
+ }
+
+ /// Returns true if this worker should quit immediately.
+ fn is_quit_now(&self) -> bool {
+ self.quit_now.load(Ordering::SeqCst)
+ }
+
+ /// Returns the number of pending jobs.
+ fn num_pending(&self) -> usize {
+ self.num_pending.load(Ordering::SeqCst)
+ }
+
+ /// Send work.
+ fn send(&self, work: Work) {
+ self.num_pending.fetch_add(1, Ordering::SeqCst);
+ let mut stack = self.stack.lock().unwrap();
+ stack.push(Message::Work(work));
+ }
+
+ /// Send a quit message.
+ fn send_quit(&self) {
+ let mut stack = self.stack.lock().unwrap();
+ stack.push(Message::Quit);
+ }
+
+ /// Receive work.
+ fn recv(&self) -> Option<Message> {
+ let mut stack = self.stack.lock().unwrap();
+ stack.pop()
+ }
+
+ /// Signal that work has been received.
+ fn work_done(&self) {
+ self.num_pending.fetch_sub(1, Ordering::SeqCst);
+ }
+}
+
+fn check_symlink_loop(
+ ig_parent: &Ignore,
+ child_path: &Path,
+ child_depth: usize,
+) -> Result<(), Error> {
+ let hchild = Handle::from_path(child_path).map_err(|err| {
+ Error::from(err).with_path(child_path).with_depth(child_depth)
+ })?;
+ for ig in ig_parent.parents().take_while(|ig| !ig.is_absolute_parent()) {
+ let h = Handle::from_path(ig.path()).map_err(|err| {
+ Error::from(err).with_path(child_path).with_depth(child_depth)
+ })?;
+ if hchild == h {
+ return Err(Error::Loop {
+ ancestor: ig.path().to_path_buf(),
+ child: child_path.to_path_buf(),
+ }
+ .with_depth(child_depth));
+ }
+ }
+ Ok(())
+}
+
+// Before calling this function, make sure that you ensure that is really
+// necessary as the arguments imply a file stat.
+fn skip_filesize(
+ max_filesize: u64,
+ path: &Path,
+ ent: &Option<Metadata>,
+) -> bool {
+ let filesize = match *ent {
+ Some(ref md) => Some(md.len()),
+ None => None,
+ };
+
+ if let Some(fs) = filesize {
+ if fs > max_filesize {
+ log::debug!("ignoring {}: {} bytes", path.display(), fs);
+ true
+ } else {
+ false
+ }
+ } else {
+ false
+ }
+}
+
+fn should_skip_entry(ig: &Ignore, dent: &DirEntry) -> bool {
+ let m = ig.matched_dir_entry(dent);
+ if m.is_ignore() {
+ log::debug!("ignoring {}: {:?}", dent.path().display(), m);
+ true
+ } else if m.is_whitelist() {
+ log::debug!("whitelisting {}: {:?}", dent.path().display(), m);
+ false
+ } else {
+ false
+ }
+}
+
+/// Returns a handle to stdout for filtering search.
+///
+/// A handle is returned if and only if stdout is being redirected to a file.
+/// The handle returned corresponds to that file.
+///
+/// This can be used to ensure that we do not attempt to search a file that we
+/// may also be writing to.
+fn stdout_handle() -> Option<Handle> {
+ let h = match Handle::stdout() {
+ Err(_) => return None,
+ Ok(h) => h,
+ };
+ let md = match h.as_file().metadata() {
+ Err(_) => return None,
+ Ok(md) => md,
+ };
+ if !md.is_file() {
+ return None;
+ }
+ Some(h)
+}
+
+/// Returns true if and only if the given directory entry is believed to be
+/// equivalent to the given handle. If there was a problem querying the path
+/// for information to determine equality, then that error is returned.
+fn path_equals(dent: &DirEntry, handle: &Handle) -> Result<bool, Error> {
+ #[cfg(unix)]
+ fn never_equal(dent: &DirEntry, handle: &Handle) -> bool {
+ dent.ino() != Some(handle.ino())
+ }
+
+ #[cfg(not(unix))]
+ fn never_equal(_: &DirEntry, _: &Handle) -> bool {
+ false
+ }
+
+ // If we know for sure that these two things aren't equal, then avoid
+ // the costly extra stat call to determine equality.
+ if dent.is_stdin() || never_equal(dent, handle) {
+ return Ok(false);
+ }
+ Handle::from_path(dent.path())
+ .map(|h| &h == handle)
+ .map_err(|err| Error::Io(err).with_path(dent.path()))
+}
+
+/// Returns true if the given walkdir entry corresponds to a directory.
+///
+/// This is normally just `dent.file_type().is_dir()`, but when we aren't
+/// following symlinks, the root directory entry may be a symlink to a
+/// directory that we *do* follow---by virtue of it being specified by the user
+/// explicitly. In that case, we need to follow the symlink and query whether
+/// it's a directory or not. But we only do this for root entries to avoid an
+/// additional stat check in most cases.
+fn walkdir_is_dir(dent: &walkdir::DirEntry) -> bool {
+ if dent.file_type().is_dir() {
+ return true;
+ }
+ if !dent.file_type().is_symlink() || dent.depth() > 0 {
+ return false;
+ }
+ dent.path().metadata().ok().map_or(false, |md| md.file_type().is_dir())
+}
+
+/// Returns true if and only if the given path is on the same device as the
+/// given root device.
+fn is_same_file_system(root_device: u64, path: &Path) -> Result<bool, Error> {
+ let dent_device =
+ device_num(path).map_err(|err| Error::Io(err).with_path(path))?;
+ Ok(root_device == dent_device)
+}
+
+#[cfg(unix)]
+fn device_num<P: AsRef<Path>>(path: P) -> io::Result<u64> {
+ use std::os::unix::fs::MetadataExt;
+
+ path.as_ref().metadata().map(|md| md.dev())
+}
+
+#[cfg(windows)]
+fn device_num<P: AsRef<Path>>(path: P) -> io::Result<u64> {
+ use winapi_util::{file, Handle};
+
+ let h = Handle::from_path_any(path)?;
+ file::information(h).map(|info| info.volume_serial_number())
+}
+
+#[cfg(not(any(unix, windows)))]
+fn device_num<P: AsRef<Path>>(_: P) -> io::Result<u64> {
+ Err(io::Error::new(
+ io::ErrorKind::Other,
+ "walkdir: same_file_system option not supported on this platform",
+ ))
+}
+
+#[cfg(test)]
+mod tests {
+ use std::ffi::OsStr;
+ use std::fs::{self, File};
+ use std::io::Write;
+ use std::path::Path;
+ use std::sync::{Arc, Mutex};
+
+ use super::{DirEntry, WalkBuilder, WalkState};
+ use crate::tests::TempDir;
+
+ fn wfile<P: AsRef<Path>>(path: P, contents: &str) {
+ let mut file = File::create(path).unwrap();
+ file.write_all(contents.as_bytes()).unwrap();
+ }
+
+ fn wfile_size<P: AsRef<Path>>(path: P, size: u64) {
+ let file = File::create(path).unwrap();
+ file.set_len(size).unwrap();
+ }
+
+ #[cfg(unix)]
+ fn symlink<P: AsRef<Path>, Q: AsRef<Path>>(src: P, dst: Q) {
+ use std::os::unix::fs::symlink;
+ symlink(src, dst).unwrap();
+ }
+
+ fn mkdirp<P: AsRef<Path>>(path: P) {
+ fs::create_dir_all(path).unwrap();
+ }
+
+ fn normal_path(unix: &str) -> String {
+ if cfg!(windows) {
+ unix.replace("\\", "/")
+ } else {
+ unix.to_string()
+ }
+ }
+
+ fn walk_collect(prefix: &Path, builder: &WalkBuilder) -> Vec<String> {
+ let mut paths = vec![];
+ for result in builder.build() {
+ let dent = match result {
+ Err(_) => continue,
+ Ok(dent) => dent,
+ };
+ let path = dent.path().strip_prefix(prefix).unwrap();
+ if path.as_os_str().is_empty() {
+ continue;
+ }
+ paths.push(normal_path(path.to_str().unwrap()));
+ }
+ paths.sort();
+ paths
+ }
+
+ fn walk_collect_parallel(
+ prefix: &Path,
+ builder: &WalkBuilder,
+ ) -> Vec<String> {
+ let mut paths = vec![];
+ for dent in walk_collect_entries_parallel(builder) {
+ let path = dent.path().strip_prefix(prefix).unwrap();
+ if path.as_os_str().is_empty() {
+ continue;
+ }
+ paths.push(normal_path(path.to_str().unwrap()));
+ }
+ paths.sort();
+ paths
+ }
+
+ fn walk_collect_entries_parallel(builder: &WalkBuilder) -> Vec<DirEntry> {
+ let dents = Arc::new(Mutex::new(vec![]));
+ builder.build_parallel().run(|| {
+ let dents = dents.clone();
+ Box::new(move |result| {
+ if let Ok(dent) = result {
+ dents.lock().unwrap().push(dent);
+ }
+ WalkState::Continue
+ })
+ });
+
+ let dents = dents.lock().unwrap();
+ dents.to_vec()
+ }
+
+ fn mkpaths(paths: &[&str]) -> Vec<String> {
+ let mut paths: Vec<_> = paths.iter().map(|s| s.to_string()).collect();
+ paths.sort();
+ paths
+ }
+
+ fn tmpdir() -> TempDir {
+ TempDir::new().unwrap()
+ }
+
+ fn assert_paths(prefix: &Path, builder: &WalkBuilder, expected: &[&str]) {
+ let got = walk_collect(prefix, builder);
+ assert_eq!(got, mkpaths(expected), "single threaded");
+ let got = walk_collect_parallel(prefix, builder);
+ assert_eq!(got, mkpaths(expected), "parallel");
+ }
+
+ #[test]
+ fn no_ignores() {
+ let td = tmpdir();
+ mkdirp(td.path().join("a/b/c"));
+ mkdirp(td.path().join("x/y"));
+ wfile(td.path().join("a/b/foo"), "");
+ wfile(td.path().join("x/y/foo"), "");
+
+ assert_paths(
+ td.path(),
+ &WalkBuilder::new(td.path()),
+ &["x", "x/y", "x/y/foo", "a", "a/b", "a/b/foo", "a/b/c"],
+ );
+ }
+
+ #[test]
+ fn custom_ignore() {
+ let td = tmpdir();
+ let custom_ignore = ".customignore";
+ mkdirp(td.path().join("a"));
+ wfile(td.path().join(custom_ignore), "foo");
+ wfile(td.path().join("foo"), "");
+ wfile(td.path().join("a/foo"), "");
+ wfile(td.path().join("bar"), "");
+ wfile(td.path().join("a/bar"), "");
+
+ let mut builder = WalkBuilder::new(td.path());
+ builder.add_custom_ignore_filename(&custom_ignore);
+ assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]);
+ }
+
+ #[test]
+ fn custom_ignore_exclusive_use() {
+ let td = tmpdir();
+ let custom_ignore = ".customignore";
+ mkdirp(td.path().join("a"));
+ wfile(td.path().join(custom_ignore), "foo");
+ wfile(td.path().join("foo"), "");
+ wfile(td.path().join("a/foo"), "");
+ wfile(td.path().join("bar"), "");
+ wfile(td.path().join("a/bar"), "");
+
+ let mut builder = WalkBuilder::new(td.path());
+ builder.ignore(false);
+ builder.git_ignore(false);
+ builder.git_global(false);
+ builder.git_exclude(false);
+ builder.add_custom_ignore_filename(&custom_ignore);
+ assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]);
+ }
+
+ #[test]
+ fn gitignore() {
+ let td = tmpdir();
+ mkdirp(td.path().join(".git"));
+ mkdirp(td.path().join("a"));
+ wfile(td.path().join(".gitignore"), "foo");
+ wfile(td.path().join("foo"), "");
+ wfile(td.path().join("a/foo"), "");
+ wfile(td.path().join("bar"), "");
+ wfile(td.path().join("a/bar"), "");
+
+ assert_paths(
+ td.path(),
+ &WalkBuilder::new(td.path()),
+ &["bar", "a", "a/bar"],
+ );
+ }
+
+ #[test]
+ fn explicit_ignore() {
+ let td = tmpdir();
+ let igpath = td.path().join(".not-an-ignore");
+ mkdirp(td.path().join("a"));
+ wfile(&igpath, "foo");
+ wfile(td.path().join("foo"), "");
+ wfile(td.path().join("a/foo"), "");
+ wfile(td.path().join("bar"), "");
+ wfile(td.path().join("a/bar"), "");
+
+ let mut builder = WalkBuilder::new(td.path());
+ assert!(builder.add_ignore(&igpath).is_none());
+ assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]);
+ }
+
+ #[test]
+ fn explicit_ignore_exclusive_use() {
+ let td = tmpdir();
+ let igpath = td.path().join(".not-an-ignore");
+ mkdirp(td.path().join("a"));
+ wfile(&igpath, "foo");
+ wfile(td.path().join("foo"), "");
+ wfile(td.path().join("a/foo"), "");
+ wfile(td.path().join("bar"), "");
+ wfile(td.path().join("a/bar"), "");
+
+ let mut builder = WalkBuilder::new(td.path());
+ builder.standard_filters(false);
+ assert!(builder.add_ignore(&igpath).is_none());
+ assert_paths(
+ td.path(),
+ &builder,
+ &[".not-an-ignore", "bar", "a", "a/bar"],
+ );
+ }
+
+ #[test]
+ fn gitignore_parent() {
+ let td = tmpdir();
+ mkdirp(td.path().join(".git"));
+ mkdirp(td.path().join("a"));
+ wfile(td.path().join(".gitignore"), "foo");
+ wfile(td.path().join("a/foo"), "");
+ wfile(td.path().join("a/bar"), "");
+
+ let root = td.path().join("a");
+ assert_paths(&root, &WalkBuilder::new(&root), &["bar"]);
+ }
+
+ #[test]
+ fn max_depth() {
+ let td = tmpdir();
+ mkdirp(td.path().join("a/b/c"));
+ wfile(td.path().join("foo"), "");
+ wfile(td.path().join("a/foo"), "");
+ wfile(td.path().join("a/b/foo"), "");
+ wfile(td.path().join("a/b/c/foo"), "");
+
+ let mut builder = WalkBuilder::new(td.path());
+ assert_paths(
+ td.path(),
+ &builder,
+ &["a", "a/b", "a/b/c", "foo", "a/foo", "a/b/foo", "a/b/c/foo"],
+ );
+ assert_paths(td.path(), builder.max_depth(Some(0)), &[]);
+ assert_paths(td.path(), builder.max_depth(Some(1)), &["a", "foo"]);
+ assert_paths(
+ td.path(),
+ builder.max_depth(Some(2)),
+ &["a", "a/b", "foo", "a/foo"],
+ );
+ }
+
+ #[test]
+ fn max_filesize() {
+ let td = tmpdir();
+ mkdirp(td.path().join("a/b"));
+ wfile_size(td.path().join("foo"), 0);
+ wfile_size(td.path().join("bar"), 400);
+ wfile_size(td.path().join("baz"), 600);
+ wfile_size(td.path().join("a/foo"), 600);
+ wfile_size(td.path().join("a/bar"), 500);
+ wfile_size(td.path().join("a/baz"), 200);
+
+ let mut builder = WalkBuilder::new(td.path());
+ assert_paths(
+ td.path(),
+ &builder,
+ &["a", "a/b", "foo", "bar", "baz", "a/foo", "a/bar", "a/baz"],
+ );
+ assert_paths(
+ td.path(),
+ builder.max_filesize(Some(0)),
+ &["a", "a/b", "foo"],
+ );
+ assert_paths(
+ td.path(),
+ builder.max_filesize(Some(500)),
+ &["a", "a/b", "foo", "bar", "a/bar", "a/baz"],
+ );
+ assert_paths(
+ td.path(),
+ builder.max_filesize(Some(50000)),
+ &["a", "a/b", "foo", "bar", "baz", "a/foo", "a/bar", "a/baz"],
+ );
+ }
+
+ #[cfg(unix)] // because symlinks on windows are weird
+ #[test]
+ fn symlinks() {
+ let td = tmpdir();
+ mkdirp(td.path().join("a/b"));
+ symlink(td.path().join("a/b"), td.path().join("z"));
+ wfile(td.path().join("a/b/foo"), "");
+
+ let mut builder = WalkBuilder::new(td.path());
+ assert_paths(td.path(), &builder, &["a", "a/b", "a/b/foo", "z"]);
+ assert_paths(
+ td.path(),
+ &builder.follow_links(true),
+ &["a", "a/b", "a/b/foo", "z", "z/foo"],
+ );
+ }
+
+ #[cfg(unix)] // because symlinks on windows are weird
+ #[test]
+ fn first_path_not_symlink() {
+ let td = tmpdir();
+ mkdirp(td.path().join("foo"));
+
+ let dents = WalkBuilder::new(td.path().join("foo"))
+ .build()
+ .into_iter()
+ .collect::<Result<Vec<_>, _>>()
+ .unwrap();
+ assert_eq!(1, dents.len());
+ assert!(!dents[0].path_is_symlink());
+
+ let dents = walk_collect_entries_parallel(&WalkBuilder::new(
+ td.path().join("foo"),
+ ));
+ assert_eq!(1, dents.len());
+ assert!(!dents[0].path_is_symlink());
+ }
+
+ #[cfg(unix)] // because symlinks on windows are weird
+ #[test]
+ fn symlink_loop() {
+ let td = tmpdir();
+ mkdirp(td.path().join("a/b"));
+ symlink(td.path().join("a"), td.path().join("a/b/c"));
+
+ let mut builder = WalkBuilder::new(td.path());
+ assert_paths(td.path(), &builder, &["a", "a/b", "a/b/c"]);
+ assert_paths(td.path(), &builder.follow_links(true), &["a", "a/b"]);
+ }
+
+ // It's a little tricky to test the 'same_file_system' option since
+ // we need an environment with more than one file system. We adopt a
+ // heuristic where /sys is typically a distinct volume on Linux and roll
+ // with that.
+ #[test]
+ #[cfg(target_os = "linux")]
+ fn same_file_system() {
+ use super::device_num;
+
+ // If for some reason /sys doesn't exist or isn't a directory, just
+ // skip this test.
+ if !Path::new("/sys").is_dir() {
+ return;
+ }
+
+ // If our test directory actually isn't a different volume from /sys,
+ // then this test is meaningless and we shouldn't run it.
+ let td = tmpdir();
+ if device_num(td.path()).unwrap() == device_num("/sys").unwrap() {
+ return;
+ }
+
+ mkdirp(td.path().join("same_file"));
+ symlink("/sys", td.path().join("same_file").join("alink"));
+
+ // Create a symlink to sys and enable following symlinks. If the
+ // same_file_system option doesn't work, then this probably will hit a
+ // permission error. Otherwise, it should just skip over the symlink
+ // completely.
+ let mut builder = WalkBuilder::new(td.path());
+ builder.follow_links(true).same_file_system(true);
+ assert_paths(td.path(), &builder, &["same_file", "same_file/alink"]);
+ }
+
+ #[cfg(target_os = "linux")]
+ #[test]
+ fn no_read_permissions() {
+ let dir_path = Path::new("/root");
+
+ // There's no /etc/sudoers.d, skip the test.
+ if !dir_path.is_dir() {
+ return;
+ }
+ // We're the root, so the test won't check what we want it to.
+ if fs::read_dir(&dir_path).is_ok() {
+ return;
+ }
+
+ // Check that we can't descend but get an entry for the parent dir.
+ let builder = WalkBuilder::new(&dir_path);
+ assert_paths(dir_path.parent().unwrap(), &builder, &["root"]);
+ }
+
+ #[test]
+ fn filter() {
+ let td = tmpdir();
+ mkdirp(td.path().join("a/b/c"));
+ mkdirp(td.path().join("x/y"));
+ wfile(td.path().join("a/b/foo"), "");
+ wfile(td.path().join("x/y/foo"), "");
+
+ assert_paths(
+ td.path(),
+ &WalkBuilder::new(td.path()),
+ &["x", "x/y", "x/y/foo", "a", "a/b", "a/b/foo", "a/b/c"],
+ );
+
+ assert_paths(
+ td.path(),
+ &WalkBuilder::new(td.path())
+ .filter_entry(|entry| entry.file_name() != OsStr::new("a")),
+ &["x", "x/y", "x/y/foo"],
+ );
+ }
+}