diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:47:55 +0000 |
commit | 2aadc03ef15cb5ca5cc2af8a7c08e070742f0ac4 (patch) | |
tree | 033cc839730fda84ff08db877037977be94e5e3a /vendor/ignore/src | |
parent | Initial commit. (diff) | |
download | cargo-2aadc03ef15cb5ca5cc2af8a7c08e070742f0ac4.tar.xz cargo-2aadc03ef15cb5ca5cc2af8a7c08e070742f0ac4.zip |
Adding upstream version 0.70.1+ds1.upstream/0.70.1+ds1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/ignore/src')
-rw-r--r-- | vendor/ignore/src/default_types.rs | 316 | ||||
-rw-r--r-- | vendor/ignore/src/dir.rs | 1188 | ||||
-rw-r--r-- | vendor/ignore/src/gitignore.rs | 789 | ||||
-rw-r--r-- | vendor/ignore/src/lib.rs | 550 | ||||
-rw-r--r-- | vendor/ignore/src/overrides.rs | 263 | ||||
-rw-r--r-- | vendor/ignore/src/pathutil.rs | 142 | ||||
-rw-r--r-- | vendor/ignore/src/types.rs | 583 | ||||
-rw-r--r-- | vendor/ignore/src/walk.rs | 2251 |
8 files changed, 6082 insertions, 0 deletions
diff --git a/vendor/ignore/src/default_types.rs b/vendor/ignore/src/default_types.rs new file mode 100644 index 0000000..e6a3a8f --- /dev/null +++ b/vendor/ignore/src/default_types.rs @@ -0,0 +1,316 @@ +/// This list represents the default file types that ripgrep ships with. In +/// general, any file format is fair game, although it should generally be +/// limited to reasonably popular open formats. For other cases, you can add +/// types to each invocation of ripgrep with the '--type-add' flag. +/// +/// If you would like to add or improve this list, please file a PR: +/// <https://github.com/BurntSushi/ripgrep>. +/// +/// Please try to keep this list sorted lexicographically and wrapped to 79 +/// columns (inclusive). +#[rustfmt::skip] +pub const DEFAULT_TYPES: &[(&str, &[&str])] = &[ + ("agda", &["*.agda", "*.lagda"]), + ("aidl", &["*.aidl"]), + ("amake", &["*.mk", "*.bp"]), + ("asciidoc", &["*.adoc", "*.asc", "*.asciidoc"]), + ("asm", &["*.asm", "*.s", "*.S"]), + ("asp", &[ + "*.aspx", "*.aspx.cs", "*.aspx.vb", "*.ascx", "*.ascx.cs", + "*.ascx.vb", "*.asp" + ]), + ("ats", &["*.ats", "*.dats", "*.sats", "*.hats"]), + ("avro", &["*.avdl", "*.avpr", "*.avsc"]), + ("awk", &["*.awk"]), + ("bazel", &[ + "*.bazel", "*.bzl", "*.BUILD", "*.bazelrc", "BUILD", "MODULE.bazel", + "WORKSPACE", "WORKSPACE.bazel", + ]), + ("bitbake", &["*.bb", "*.bbappend", "*.bbclass", "*.conf", "*.inc"]), + ("brotli", &["*.br"]), + ("buildstream", &["*.bst"]), + ("bzip2", &["*.bz2", "*.tbz2"]), + ("c", &["*.[chH]", "*.[chH].in", "*.cats"]), + ("cabal", &["*.cabal"]), + ("candid", &["*.did"]), + ("carp", &["*.carp"]), + ("cbor", &["*.cbor"]), + ("ceylon", &["*.ceylon"]), + ("clojure", &["*.clj", "*.cljc", "*.cljs", "*.cljx"]), + ("cmake", &["*.cmake", "CMakeLists.txt"]), + ("coffeescript", &["*.coffee"]), + ("config", &["*.cfg", "*.conf", "*.config", "*.ini"]), + ("coq", &["*.v"]), + ("cpp", &[ + "*.[ChH]", "*.cc", "*.[ch]pp", "*.[ch]xx", "*.hh", "*.inl", + "*.[ChH].in", "*.cc.in", "*.[ch]pp.in", "*.[ch]xx.in", "*.hh.in", + ]), + ("creole", &["*.creole"]), + ("crystal", &["Projectfile", "*.cr", "*.ecr", "shard.yml"]), + ("cs", &["*.cs"]), + ("csharp", &["*.cs"]), + ("cshtml", &["*.cshtml"]), + ("css", &["*.css", "*.scss"]), + ("csv", &["*.csv"]), + ("cuda", &["*.cu", "*.cuh"]), + ("cython", &["*.pyx", "*.pxi", "*.pxd"]), + ("d", &["*.d"]), + ("dart", &["*.dart"]), + ("devicetree", &["*.dts", "*.dtsi"]), + ("dhall", &["*.dhall"]), + ("diff", &["*.patch", "*.diff"]), + ("docker", &["*Dockerfile*"]), + ("dts", &["*.dts", "*.dtsi"]), + ("dvc", &["Dvcfile", "*.dvc"]), + ("ebuild", &["*.ebuild"]), + ("edn", &["*.edn"]), + ("elisp", &["*.el"]), + ("elixir", &["*.ex", "*.eex", "*.exs"]), + ("elm", &["*.elm"]), + ("erb", &["*.erb"]), + ("erlang", &["*.erl", "*.hrl"]), + ("fennel", &["*.fnl"]), + ("fidl", &["*.fidl"]), + ("fish", &["*.fish"]), + ("flatbuffers", &["*.fbs"]), + ("fortran", &[ + "*.f", "*.F", "*.f77", "*.F77", "*.pfo", + "*.f90", "*.F90", "*.f95", "*.F95", + ]), + ("fsharp", &["*.fs", "*.fsx", "*.fsi"]), + ("fut", &["*.fut"]), + ("gap", &["*.g", "*.gap", "*.gi", "*.gd", "*.tst"]), + ("gn", &["*.gn", "*.gni"]), + ("go", &["*.go"]), + ("gradle", &["*.gradle"]), + ("groovy", &["*.groovy", "*.gradle"]), + ("gzip", &["*.gz", "*.tgz"]), + ("h", &["*.h", "*.hh", "*.hpp"]), + ("haml", &["*.haml"]), + ("hare", &["*.ha"]), + ("haskell", &["*.hs", "*.lhs", "*.cpphs", "*.c2hs", "*.hsc"]), + ("hbs", &["*.hbs"]), + ("hs", &["*.hs", "*.lhs"]), + ("html", &["*.htm", "*.html", "*.ejs"]), + ("hy", &["*.hy"]), + ("idris", &["*.idr", "*.lidr"]), + ("janet", &["*.janet"]), + ("java", &["*.java", "*.jsp", "*.jspx", "*.properties"]), + ("jinja", &["*.j2", "*.jinja", "*.jinja2"]), + ("jl", &["*.jl"]), + ("js", &["*.js", "*.jsx", "*.vue", "*.cjs", "*.mjs"]), + ("json", &["*.json", "composer.lock"]), + ("jsonl", &["*.jsonl"]), + ("julia", &["*.jl"]), + ("jupyter", &["*.ipynb", "*.jpynb"]), + ("k", &["*.k"]), + ("kotlin", &["*.kt", "*.kts"]), + ("less", &["*.less"]), + ("license", &[ + // General + "COPYING", "COPYING[.-]*", + "COPYRIGHT", "COPYRIGHT[.-]*", + "EULA", "EULA[.-]*", + "licen[cs]e", "licen[cs]e.*", + "LICEN[CS]E", "LICEN[CS]E[.-]*", "*[.-]LICEN[CS]E*", + "NOTICE", "NOTICE[.-]*", + "PATENTS", "PATENTS[.-]*", + "UNLICEN[CS]E", "UNLICEN[CS]E[.-]*", + // GPL (gpl.txt, etc.) + "agpl[.-]*", + "gpl[.-]*", + "lgpl[.-]*", + // Other license-specific (APACHE-2.0.txt, etc.) + "AGPL-*[0-9]*", + "APACHE-*[0-9]*", + "BSD-*[0-9]*", + "CC-BY-*", + "GFDL-*[0-9]*", + "GNU-*[0-9]*", + "GPL-*[0-9]*", + "LGPL-*[0-9]*", + "MIT-*[0-9]*", + "MPL-*[0-9]*", + "OFL-*[0-9]*", + ]), + ("lilypond", &["*.ly", "*.ily"]), + ("lisp", &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]), + ("lock", &["*.lock", "package-lock.json"]), + ("log", &["*.log"]), + ("lua", &["*.lua"]), + ("lz4", &["*.lz4"]), + ("lzma", &["*.lzma"]), + ("m4", &["*.ac", "*.m4"]), + ("make", &[ + "[Gg][Nn][Uu]makefile", "[Mm]akefile", + "[Gg][Nn][Uu]makefile.am", "[Mm]akefile.am", + "[Gg][Nn][Uu]makefile.in", "[Mm]akefile.in", + "*.mk", "*.mak" + ]), + ("mako", &["*.mako", "*.mao"]), + ("man", &["*.[0-9lnpx]", "*.[0-9][cEFMmpSx]"]), + ("markdown", &["*.markdown", "*.md", "*.mdown", "*.mkd", "*.mkdn"]), + ("matlab", &["*.m"]), + ("md", &["*.markdown", "*.md", "*.mdown", "*.mkd", "*.mkdn"]), + ("meson", &["meson.build", "meson_options.txt"]), + ("minified", &["*.min.html", "*.min.css", "*.min.js"]), + ("mint", &["*.mint"]), + ("mk", &["mkfile"]), + ("ml", &["*.ml"]), + ("motoko", &["*.mo"]), + ("msbuild", &[ + "*.csproj", "*.fsproj", "*.vcxproj", "*.proj", "*.props", "*.targets", + ]), + ("nim", &["*.nim", "*.nimf", "*.nimble", "*.nims"]), + ("nix", &["*.nix"]), + ("objc", &["*.h", "*.m"]), + ("objcpp", &["*.h", "*.mm"]), + ("ocaml", &["*.ml", "*.mli", "*.mll", "*.mly"]), + ("org", &["*.org", "*.org_archive"]), + ("pants", &["BUILD"]), + ("pascal", &["*.pas", "*.dpr", "*.lpr", "*.pp", "*.inc"]), + ("pdf", &["*.pdf"]), + ("perl", &["*.perl", "*.pl", "*.PL", "*.plh", "*.plx", "*.pm", "*.t"]), + ("php", &[ + // note that PHP 6 doesn't exist + // See: https://wiki.php.net/rfc/php6 + "*.php", "*.php3", "*.php4", "*.php5", "*.php7", "*.php8", + "*.pht", "*.phtml" + ]), + ("po", &["*.po"]), + ("pod", &["*.pod"]), + ("postscript", &["*.eps", "*.ps"]), + ("protobuf", &["*.proto"]), + ("ps", &["*.cdxml", "*.ps1", "*.ps1xml", "*.psd1", "*.psm1"]), + ("puppet", &["*.epp", "*.erb", "*.pp", "*.rb"]), + ("purs", &["*.purs"]), + ("py", &["*.py"]), + ("qmake", &["*.pro", "*.pri", "*.prf"]), + ("qml", &["*.qml"]), + ("r", &["*.R", "*.r", "*.Rmd", "*.Rnw"]), + ("racket", &["*.rkt"]), + ("rdoc", &["*.rdoc"]), + ("readme", &["README*", "*README"]), + ("reasonml", &["*.re", "*.rei"]), + ("red", &["*.r", "*.red", "*.reds"]), + ("rescript", &["*.res", "*.resi"]), + ("robot", &["*.robot"]), + ("rst", &["*.rst"]), + ("ruby", &[ + // Idiomatic files + "config.ru", "Gemfile", ".irbrc", "Rakefile", + // Extensions + "*.gemspec", "*.rb", "*.rbw" + ]), + ("rust", &["*.rs"]), + ("sass", &["*.sass", "*.scss"]), + ("scala", &["*.scala", "*.sbt"]), + ("sh", &[ + // Portable/misc. init files + ".login", ".logout", ".profile", "profile", + // bash-specific init files + ".bash_login", "bash_login", + ".bash_logout", "bash_logout", + ".bash_profile", "bash_profile", + ".bashrc", "bashrc", "*.bashrc", + // csh-specific init files + ".cshrc", "*.cshrc", + // ksh-specific init files + ".kshrc", "*.kshrc", + // tcsh-specific init files + ".tcshrc", + // zsh-specific init files + ".zshenv", "zshenv", + ".zlogin", "zlogin", + ".zlogout", "zlogout", + ".zprofile", "zprofile", + ".zshrc", "zshrc", + // Extensions + "*.bash", "*.csh", "*.ksh", "*.sh", "*.tcsh", "*.zsh", + ]), + ("slim", &["*.skim", "*.slim", "*.slime"]), + ("smarty", &["*.tpl"]), + ("sml", &["*.sml", "*.sig"]), + ("solidity", &["*.sol"]), + ("soy", &["*.soy"]), + ("spark", &["*.spark"]), + ("spec", &["*.spec"]), + ("sql", &["*.sql", "*.psql"]), + ("stylus", &["*.styl"]), + ("sv", &["*.v", "*.vg", "*.sv", "*.svh", "*.h"]), + ("svg", &["*.svg"]), + ("swift", &["*.swift"]), + ("swig", &["*.def", "*.i"]), + ("systemd", &[ + "*.automount", "*.conf", "*.device", "*.link", "*.mount", "*.path", + "*.scope", "*.service", "*.slice", "*.socket", "*.swap", "*.target", + "*.timer", + ]), + ("taskpaper", &["*.taskpaper"]), + ("tcl", &["*.tcl"]), + ("tex", &["*.tex", "*.ltx", "*.cls", "*.sty", "*.bib", "*.dtx", "*.ins"]), + ("texinfo", &["*.texi"]), + ("textile", &["*.textile"]), + ("tf", &["*.tf"]), + ("thrift", &["*.thrift"]), + ("toml", &["*.toml", "Cargo.lock"]), + ("ts", &["*.ts", "*.tsx", "*.cts", "*.mts"]), + ("twig", &["*.twig"]), + ("txt", &["*.txt"]), + ("typoscript", &["*.typoscript", "*.ts"]), + ("vala", &["*.vala"]), + ("vb", &["*.vb"]), + ("vcl", &["*.vcl"]), + ("verilog", &["*.v", "*.vh", "*.sv", "*.svh"]), + ("vhdl", &["*.vhd", "*.vhdl"]), + ("vim", &[ + "*.vim", ".vimrc", ".gvimrc", "vimrc", "gvimrc", "_vimrc", "_gvimrc", + ]), + ("vimscript", &[ + "*.vim", ".vimrc", ".gvimrc", "vimrc", "gvimrc", "_vimrc", "_gvimrc", + ]), + ("webidl", &["*.idl", "*.webidl", "*.widl"]), + ("wiki", &["*.mediawiki", "*.wiki"]), + ("xml", &[ + "*.xml", "*.xml.dist", "*.dtd", "*.xsl", "*.xslt", "*.xsd", "*.xjb", + "*.rng", "*.sch", "*.xhtml", + ]), + ("xz", &["*.xz", "*.txz"]), + ("yacc", &["*.y"]), + ("yaml", &["*.yaml", "*.yml"]), + ("yang", &["*.yang"]), + ("z", &["*.Z"]), + ("zig", &["*.zig"]), + ("zsh", &[ + ".zshenv", "zshenv", + ".zlogin", "zlogin", + ".zlogout", "zlogout", + ".zprofile", "zprofile", + ".zshrc", "zshrc", + "*.zsh", + ]), + ("zstd", &["*.zst", "*.zstd"]), +]; + +#[cfg(test)] +mod tests { + use super::DEFAULT_TYPES; + + #[test] + fn default_types_are_sorted() { + let mut names = DEFAULT_TYPES.iter().map(|(name, _exts)| name); + + let Some(mut previous_name) = names.next() else { return; }; + + for name in names { + assert!( + name > previous_name, + r#""{}" should be sorted before "{}" in `DEFAULT_TYPES`"#, + name, + previous_name + ); + + previous_name = name; + } + } +} diff --git a/vendor/ignore/src/dir.rs b/vendor/ignore/src/dir.rs new file mode 100644 index 0000000..2577665 --- /dev/null +++ b/vendor/ignore/src/dir.rs @@ -0,0 +1,1188 @@ +// This module provides a data structure, `Ignore`, that connects "directory +// traversal" with "ignore matchers." Specifically, it knows about gitignore +// semantics and precedence, and is organized based on directory hierarchy. +// Namely, every matcher logically corresponds to ignore rules from a single +// directory, and points to the matcher for its corresponding parent directory. +// In this sense, `Ignore` is a *persistent* data structure. +// +// This design was specifically chosen to make it possible to use this data +// structure in a parallel directory iterator. +// +// My initial intention was to expose this module as part of this crate's +// public API, but I think the data structure's public API is too complicated +// with non-obvious failure modes. Alas, such things haven't been documented +// well. + +use std::collections::HashMap; +use std::ffi::{OsStr, OsString}; +use std::fs::{File, FileType}; +use std::io::{self, BufRead}; +use std::path::{Path, PathBuf}; +use std::sync::{Arc, RwLock}; + +use crate::gitignore::{self, Gitignore, GitignoreBuilder}; +use crate::overrides::{self, Override}; +use crate::pathutil::{is_hidden, strip_prefix}; +use crate::types::{self, Types}; +use crate::walk::DirEntry; +use crate::{Error, Match, PartialErrorBuilder}; + +/// IgnoreMatch represents information about where a match came from when using +/// the `Ignore` matcher. +#[derive(Clone, Debug)] +pub struct IgnoreMatch<'a>(IgnoreMatchInner<'a>); + +/// IgnoreMatchInner describes precisely where the match information came from. +/// This is private to allow expansion to more matchers in the future. +#[derive(Clone, Debug)] +enum IgnoreMatchInner<'a> { + Override(overrides::Glob<'a>), + Gitignore(&'a gitignore::Glob), + Types(types::Glob<'a>), + Hidden, +} + +impl<'a> IgnoreMatch<'a> { + fn overrides(x: overrides::Glob<'a>) -> IgnoreMatch<'a> { + IgnoreMatch(IgnoreMatchInner::Override(x)) + } + + fn gitignore(x: &'a gitignore::Glob) -> IgnoreMatch<'a> { + IgnoreMatch(IgnoreMatchInner::Gitignore(x)) + } + + fn types(x: types::Glob<'a>) -> IgnoreMatch<'a> { + IgnoreMatch(IgnoreMatchInner::Types(x)) + } + + fn hidden() -> IgnoreMatch<'static> { + IgnoreMatch(IgnoreMatchInner::Hidden) + } +} + +/// Options for the ignore matcher, shared between the matcher itself and the +/// builder. +#[derive(Clone, Copy, Debug)] +struct IgnoreOptions { + /// Whether to ignore hidden file paths or not. + hidden: bool, + /// Whether to read .ignore files. + ignore: bool, + /// Whether to respect any ignore files in parent directories. + parents: bool, + /// Whether to read git's global gitignore file. + git_global: bool, + /// Whether to read .gitignore files. + git_ignore: bool, + /// Whether to read .git/info/exclude files. + git_exclude: bool, + /// Whether to ignore files case insensitively + ignore_case_insensitive: bool, + /// Whether a git repository must be present in order to apply any + /// git-related ignore rules. + require_git: bool, +} + +/// Ignore is a matcher useful for recursively walking one or more directories. +#[derive(Clone, Debug)] +pub struct Ignore(Arc<IgnoreInner>); + +#[derive(Clone, Debug)] +struct IgnoreInner { + /// A map of all existing directories that have already been + /// compiled into matchers. + /// + /// Note that this is never used during matching, only when adding new + /// parent directory matchers. This avoids needing to rebuild glob sets for + /// parent directories if many paths are being searched. + compiled: Arc<RwLock<HashMap<OsString, Ignore>>>, + /// The path to the directory that this matcher was built from. + dir: PathBuf, + /// An override matcher (default is empty). + overrides: Arc<Override>, + /// A file type matcher. + types: Arc<Types>, + /// The parent directory to match next. + /// + /// If this is the root directory or there are otherwise no more + /// directories to match, then `parent` is `None`. + parent: Option<Ignore>, + /// Whether this is an absolute parent matcher, as added by add_parent. + is_absolute_parent: bool, + /// The absolute base path of this matcher. Populated only if parent + /// directories are added. + absolute_base: Option<Arc<PathBuf>>, + /// Explicit global ignore matchers specified by the caller. + explicit_ignores: Arc<Vec<Gitignore>>, + /// Ignore files used in addition to `.ignore` + custom_ignore_filenames: Arc<Vec<OsString>>, + /// The matcher for custom ignore files + custom_ignore_matcher: Gitignore, + /// The matcher for .ignore files. + ignore_matcher: Gitignore, + /// A global gitignore matcher, usually from $XDG_CONFIG_HOME/git/ignore. + git_global_matcher: Arc<Gitignore>, + /// The matcher for .gitignore files. + git_ignore_matcher: Gitignore, + /// Special matcher for `.git/info/exclude` files. + git_exclude_matcher: Gitignore, + /// Whether this directory contains a .git sub-directory. + has_git: bool, + /// Ignore config. + opts: IgnoreOptions, +} + +impl Ignore { + /// Return the directory path of this matcher. + pub fn path(&self) -> &Path { + &self.0.dir + } + + /// Return true if this matcher has no parent. + pub fn is_root(&self) -> bool { + self.0.parent.is_none() + } + + /// Returns true if this matcher was added via the `add_parents` method. + pub fn is_absolute_parent(&self) -> bool { + self.0.is_absolute_parent + } + + /// Return this matcher's parent, if one exists. + pub fn parent(&self) -> Option<Ignore> { + self.0.parent.clone() + } + + /// Create a new `Ignore` matcher with the parent directories of `dir`. + /// + /// Note that this can only be called on an `Ignore` matcher with no + /// parents (i.e., `is_root` returns `true`). This will panic otherwise. + pub fn add_parents<P: AsRef<Path>>( + &self, + path: P, + ) -> (Ignore, Option<Error>) { + if !self.0.opts.parents + && !self.0.opts.git_ignore + && !self.0.opts.git_exclude + && !self.0.opts.git_global + { + // If we never need info from parent directories, then don't do + // anything. + return (self.clone(), None); + } + if !self.is_root() { + panic!("Ignore::add_parents called on non-root matcher"); + } + let absolute_base = match path.as_ref().canonicalize() { + Ok(path) => Arc::new(path), + Err(_) => { + // There's not much we can do here, so just return our + // existing matcher. We drop the error to be consistent + // with our general pattern of ignoring I/O errors when + // processing ignore files. + return (self.clone(), None); + } + }; + // List of parents, from child to root. + let mut parents = vec![]; + let mut path = &**absolute_base; + while let Some(parent) = path.parent() { + parents.push(parent); + path = parent; + } + let mut errs = PartialErrorBuilder::default(); + let mut ig = self.clone(); + for parent in parents.into_iter().rev() { + let mut compiled = self.0.compiled.write().unwrap(); + if let Some(prebuilt) = compiled.get(parent.as_os_str()) { + ig = prebuilt.clone(); + continue; + } + let (mut igtmp, err) = ig.add_child_path(parent); + errs.maybe_push(err); + igtmp.is_absolute_parent = true; + igtmp.absolute_base = Some(absolute_base.clone()); + igtmp.has_git = + if self.0.opts.require_git && self.0.opts.git_ignore { + parent.join(".git").exists() + } else { + false + }; + ig = Ignore(Arc::new(igtmp)); + compiled.insert(parent.as_os_str().to_os_string(), ig.clone()); + } + (ig, errs.into_error_option()) + } + + /// Create a new `Ignore` matcher for the given child directory. + /// + /// Since building the matcher may require reading from multiple + /// files, it's possible that this method partially succeeds. Therefore, + /// a matcher is always returned (which may match nothing) and an error is + /// returned if it exists. + /// + /// Note that all I/O errors are completely ignored. + pub fn add_child<P: AsRef<Path>>( + &self, + dir: P, + ) -> (Ignore, Option<Error>) { + let (ig, err) = self.add_child_path(dir.as_ref()); + (Ignore(Arc::new(ig)), err) + } + + /// Like add_child, but takes a full path and returns an IgnoreInner. + fn add_child_path(&self, dir: &Path) -> (IgnoreInner, Option<Error>) { + let git_type = if self.0.opts.require_git + && (self.0.opts.git_ignore || self.0.opts.git_exclude) + { + dir.join(".git").metadata().ok().map(|md| md.file_type()) + } else { + None + }; + let has_git = git_type.map(|_| true).unwrap_or(false); + + let mut errs = PartialErrorBuilder::default(); + let custom_ig_matcher = if self.0.custom_ignore_filenames.is_empty() { + Gitignore::empty() + } else { + let (m, err) = create_gitignore( + &dir, + &dir, + &self.0.custom_ignore_filenames, + self.0.opts.ignore_case_insensitive, + ); + errs.maybe_push(err); + m + }; + let ig_matcher = if !self.0.opts.ignore { + Gitignore::empty() + } else { + let (m, err) = create_gitignore( + &dir, + &dir, + &[".ignore"], + self.0.opts.ignore_case_insensitive, + ); + errs.maybe_push(err); + m + }; + let gi_matcher = if !self.0.opts.git_ignore { + Gitignore::empty() + } else { + let (m, err) = create_gitignore( + &dir, + &dir, + &[".gitignore"], + self.0.opts.ignore_case_insensitive, + ); + errs.maybe_push(err); + m + }; + let gi_exclude_matcher = if !self.0.opts.git_exclude { + Gitignore::empty() + } else { + match resolve_git_commondir(dir, git_type) { + Ok(git_dir) => { + let (m, err) = create_gitignore( + &dir, + &git_dir, + &["info/exclude"], + self.0.opts.ignore_case_insensitive, + ); + errs.maybe_push(err); + m + } + Err(err) => { + errs.maybe_push(err); + Gitignore::empty() + } + } + }; + let ig = IgnoreInner { + compiled: self.0.compiled.clone(), + dir: dir.to_path_buf(), + overrides: self.0.overrides.clone(), + types: self.0.types.clone(), + parent: Some(self.clone()), + is_absolute_parent: false, + absolute_base: self.0.absolute_base.clone(), + explicit_ignores: self.0.explicit_ignores.clone(), + custom_ignore_filenames: self.0.custom_ignore_filenames.clone(), + custom_ignore_matcher: custom_ig_matcher, + ignore_matcher: ig_matcher, + git_global_matcher: self.0.git_global_matcher.clone(), + git_ignore_matcher: gi_matcher, + git_exclude_matcher: gi_exclude_matcher, + has_git, + opts: self.0.opts, + }; + (ig, errs.into_error_option()) + } + + /// Returns true if at least one type of ignore rule should be matched. + fn has_any_ignore_rules(&self) -> bool { + let opts = self.0.opts; + let has_custom_ignore_files = + !self.0.custom_ignore_filenames.is_empty(); + let has_explicit_ignores = !self.0.explicit_ignores.is_empty(); + + opts.ignore + || opts.git_global + || opts.git_ignore + || opts.git_exclude + || has_custom_ignore_files + || has_explicit_ignores + } + + /// Like `matched`, but works with a directory entry instead. + pub fn matched_dir_entry<'a>( + &'a self, + dent: &DirEntry, + ) -> Match<IgnoreMatch<'a>> { + let m = self.matched(dent.path(), dent.is_dir()); + if m.is_none() && self.0.opts.hidden && is_hidden(dent) { + return Match::Ignore(IgnoreMatch::hidden()); + } + m + } + + /// Returns a match indicating whether the given file path should be + /// ignored or not. + /// + /// The match contains information about its origin. + fn matched<'a, P: AsRef<Path>>( + &'a self, + path: P, + is_dir: bool, + ) -> Match<IgnoreMatch<'a>> { + // We need to be careful with our path. If it has a leading ./, then + // strip it because it causes nothing but trouble. + let mut path = path.as_ref(); + if let Some(p) = strip_prefix("./", path) { + path = p; + } + // Match against the override patterns. If an override matches + // regardless of whether it's whitelist/ignore, then we quit and + // return that result immediately. Overrides have the highest + // precedence. + if !self.0.overrides.is_empty() { + let mat = self + .0 + .overrides + .matched(path, is_dir) + .map(IgnoreMatch::overrides); + if !mat.is_none() { + return mat; + } + } + let mut whitelisted = Match::None; + if self.has_any_ignore_rules() { + let mat = self.matched_ignore(path, is_dir); + if mat.is_ignore() { + return mat; + } else if mat.is_whitelist() { + whitelisted = mat; + } + } + if !self.0.types.is_empty() { + let mat = + self.0.types.matched(path, is_dir).map(IgnoreMatch::types); + if mat.is_ignore() { + return mat; + } else if mat.is_whitelist() { + whitelisted = mat; + } + } + whitelisted + } + + /// Performs matching only on the ignore files for this directory and + /// all parent directories. + fn matched_ignore<'a>( + &'a self, + path: &Path, + is_dir: bool, + ) -> Match<IgnoreMatch<'a>> { + let ( + mut m_custom_ignore, + mut m_ignore, + mut m_gi, + mut m_gi_exclude, + mut m_explicit, + ) = (Match::None, Match::None, Match::None, Match::None, Match::None); + let any_git = + !self.0.opts.require_git || self.parents().any(|ig| ig.0.has_git); + let mut saw_git = false; + for ig in self.parents().take_while(|ig| !ig.0.is_absolute_parent) { + if m_custom_ignore.is_none() { + m_custom_ignore = + ig.0.custom_ignore_matcher + .matched(path, is_dir) + .map(IgnoreMatch::gitignore); + } + if m_ignore.is_none() { + m_ignore = + ig.0.ignore_matcher + .matched(path, is_dir) + .map(IgnoreMatch::gitignore); + } + if any_git && !saw_git && m_gi.is_none() { + m_gi = + ig.0.git_ignore_matcher + .matched(path, is_dir) + .map(IgnoreMatch::gitignore); + } + if any_git && !saw_git && m_gi_exclude.is_none() { + m_gi_exclude = + ig.0.git_exclude_matcher + .matched(path, is_dir) + .map(IgnoreMatch::gitignore); + } + saw_git = saw_git || ig.0.has_git; + } + if self.0.opts.parents { + if let Some(abs_parent_path) = self.absolute_base() { + let path = abs_parent_path.join(path); + for ig in + self.parents().skip_while(|ig| !ig.0.is_absolute_parent) + { + if m_custom_ignore.is_none() { + m_custom_ignore = + ig.0.custom_ignore_matcher + .matched(&path, is_dir) + .map(IgnoreMatch::gitignore); + } + if m_ignore.is_none() { + m_ignore = + ig.0.ignore_matcher + .matched(&path, is_dir) + .map(IgnoreMatch::gitignore); + } + if any_git && !saw_git && m_gi.is_none() { + m_gi = + ig.0.git_ignore_matcher + .matched(&path, is_dir) + .map(IgnoreMatch::gitignore); + } + if any_git && !saw_git && m_gi_exclude.is_none() { + m_gi_exclude = + ig.0.git_exclude_matcher + .matched(&path, is_dir) + .map(IgnoreMatch::gitignore); + } + saw_git = saw_git || ig.0.has_git; + } + } + } + for gi in self.0.explicit_ignores.iter().rev() { + if !m_explicit.is_none() { + break; + } + m_explicit = gi.matched(&path, is_dir).map(IgnoreMatch::gitignore); + } + let m_global = if any_git { + self.0 + .git_global_matcher + .matched(&path, is_dir) + .map(IgnoreMatch::gitignore) + } else { + Match::None + }; + + m_custom_ignore + .or(m_ignore) + .or(m_gi) + .or(m_gi_exclude) + .or(m_global) + .or(m_explicit) + } + + /// Returns an iterator over parent ignore matchers, including this one. + pub fn parents(&self) -> Parents<'_> { + Parents(Some(self)) + } + + /// Returns the first absolute path of the first absolute parent, if + /// one exists. + fn absolute_base(&self) -> Option<&Path> { + self.0.absolute_base.as_ref().map(|p| &***p) + } +} + +/// An iterator over all parents of an ignore matcher, including itself. +/// +/// The lifetime `'a` refers to the lifetime of the initial `Ignore` matcher. +pub struct Parents<'a>(Option<&'a Ignore>); + +impl<'a> Iterator for Parents<'a> { + type Item = &'a Ignore; + + fn next(&mut self) -> Option<&'a Ignore> { + match self.0.take() { + None => None, + Some(ig) => { + self.0 = ig.0.parent.as_ref(); + Some(ig) + } + } + } +} + +/// A builder for creating an Ignore matcher. +#[derive(Clone, Debug)] +pub struct IgnoreBuilder { + /// The root directory path for this ignore matcher. + dir: PathBuf, + /// An override matcher (default is empty). + overrides: Arc<Override>, + /// A type matcher (default is empty). + types: Arc<Types>, + /// Explicit global ignore matchers. + explicit_ignores: Vec<Gitignore>, + /// Ignore files in addition to .ignore. + custom_ignore_filenames: Vec<OsString>, + /// Ignore config. + opts: IgnoreOptions, +} + +impl IgnoreBuilder { + /// Create a new builder for an `Ignore` matcher. + /// + /// All relative file paths are resolved with respect to the current + /// working directory. + pub fn new() -> IgnoreBuilder { + IgnoreBuilder { + dir: Path::new("").to_path_buf(), + overrides: Arc::new(Override::empty()), + types: Arc::new(Types::empty()), + explicit_ignores: vec![], + custom_ignore_filenames: vec![], + opts: IgnoreOptions { + hidden: true, + ignore: true, + parents: true, + git_global: true, + git_ignore: true, + git_exclude: true, + ignore_case_insensitive: false, + require_git: true, + }, + } + } + + /// Builds a new `Ignore` matcher. + /// + /// The matcher returned won't match anything until ignore rules from + /// directories are added to it. + pub fn build(&self) -> Ignore { + let git_global_matcher = if !self.opts.git_global { + Gitignore::empty() + } else { + let mut builder = GitignoreBuilder::new(""); + builder + .case_insensitive(self.opts.ignore_case_insensitive) + .unwrap(); + let (gi, err) = builder.build_global(); + if let Some(err) = err { + log::debug!("{}", err); + } + gi + }; + + Ignore(Arc::new(IgnoreInner { + compiled: Arc::new(RwLock::new(HashMap::new())), + dir: self.dir.clone(), + overrides: self.overrides.clone(), + types: self.types.clone(), + parent: None, + is_absolute_parent: true, + absolute_base: None, + explicit_ignores: Arc::new(self.explicit_ignores.clone()), + custom_ignore_filenames: Arc::new( + self.custom_ignore_filenames.clone(), + ), + custom_ignore_matcher: Gitignore::empty(), + ignore_matcher: Gitignore::empty(), + git_global_matcher: Arc::new(git_global_matcher), + git_ignore_matcher: Gitignore::empty(), + git_exclude_matcher: Gitignore::empty(), + has_git: false, + opts: self.opts, + })) + } + + /// Add an override matcher. + /// + /// By default, no override matcher is used. + /// + /// This overrides any previous setting. + pub fn overrides(&mut self, overrides: Override) -> &mut IgnoreBuilder { + self.overrides = Arc::new(overrides); + self + } + + /// Add a file type matcher. + /// + /// By default, no file type matcher is used. + /// + /// This overrides any previous setting. + pub fn types(&mut self, types: Types) -> &mut IgnoreBuilder { + self.types = Arc::new(types); + self + } + + /// Adds a new global ignore matcher from the ignore file path given. + pub fn add_ignore(&mut self, ig: Gitignore) -> &mut IgnoreBuilder { + self.explicit_ignores.push(ig); + self + } + + /// Add a custom ignore file name + /// + /// These ignore files have higher precedence than all other ignore files. + /// + /// When specifying multiple names, earlier names have lower precedence than + /// later names. + pub fn add_custom_ignore_filename<S: AsRef<OsStr>>( + &mut self, + file_name: S, + ) -> &mut IgnoreBuilder { + self.custom_ignore_filenames.push(file_name.as_ref().to_os_string()); + self + } + + /// Enables ignoring hidden files. + /// + /// This is enabled by default. + pub fn hidden(&mut self, yes: bool) -> &mut IgnoreBuilder { + self.opts.hidden = yes; + self + } + + /// Enables reading `.ignore` files. + /// + /// `.ignore` files have the same semantics as `gitignore` files and are + /// supported by search tools such as ripgrep and The Silver Searcher. + /// + /// This is enabled by default. + pub fn ignore(&mut self, yes: bool) -> &mut IgnoreBuilder { + self.opts.ignore = yes; + self + } + + /// Enables reading ignore files from parent directories. + /// + /// If this is enabled, then .gitignore files in parent directories of each + /// file path given are respected. Otherwise, they are ignored. + /// + /// This is enabled by default. + pub fn parents(&mut self, yes: bool) -> &mut IgnoreBuilder { + self.opts.parents = yes; + self + } + + /// Add a global gitignore matcher. + /// + /// Its precedence is lower than both normal `.gitignore` files and + /// `.git/info/exclude` files. + /// + /// This overwrites any previous global gitignore setting. + /// + /// This is enabled by default. + pub fn git_global(&mut self, yes: bool) -> &mut IgnoreBuilder { + self.opts.git_global = yes; + self + } + + /// Enables reading `.gitignore` files. + /// + /// `.gitignore` files have match semantics as described in the `gitignore` + /// man page. + /// + /// This is enabled by default. + pub fn git_ignore(&mut self, yes: bool) -> &mut IgnoreBuilder { + self.opts.git_ignore = yes; + self + } + + /// Enables reading `.git/info/exclude` files. + /// + /// `.git/info/exclude` files have match semantics as described in the + /// `gitignore` man page. + /// + /// This is enabled by default. + pub fn git_exclude(&mut self, yes: bool) -> &mut IgnoreBuilder { + self.opts.git_exclude = yes; + self + } + + /// Whether a git repository is required to apply git-related ignore + /// rules (global rules, .gitignore and local exclude rules). + /// + /// When disabled, git-related ignore rules are applied even when searching + /// outside a git repository. + pub fn require_git(&mut self, yes: bool) -> &mut IgnoreBuilder { + self.opts.require_git = yes; + self + } + + /// Process ignore files case insensitively + /// + /// This is disabled by default. + pub fn ignore_case_insensitive( + &mut self, + yes: bool, + ) -> &mut IgnoreBuilder { + self.opts.ignore_case_insensitive = yes; + self + } +} + +/// Creates a new gitignore matcher for the directory given. +/// +/// The matcher is meant to match files below `dir`. +/// Ignore globs are extracted from each of the file names relative to +/// `dir_for_ignorefile` in the order given (earlier names have lower +/// precedence than later names). +/// +/// I/O errors are ignored. +pub fn create_gitignore<T: AsRef<OsStr>>( + dir: &Path, + dir_for_ignorefile: &Path, + names: &[T], + case_insensitive: bool, +) -> (Gitignore, Option<Error>) { + let mut builder = GitignoreBuilder::new(dir); + let mut errs = PartialErrorBuilder::default(); + builder.case_insensitive(case_insensitive).unwrap(); + for name in names { + let gipath = dir_for_ignorefile.join(name.as_ref()); + // This check is not necessary, but is added for performance. Namely, + // a simple stat call checking for existence can often be just a bit + // quicker than actually trying to open a file. Since the number of + // directories without ignore files likely greatly exceeds the number + // with ignore files, this check generally makes sense. + // + // However, until demonstrated otherwise, we speculatively do not do + // this on Windows since Windows is notorious for having slow file + // system operations. Namely, it's not clear whether this analysis + // makes sense on Windows. + // + // For more details: https://github.com/BurntSushi/ripgrep/pull/1381 + if cfg!(windows) || gipath.exists() { + errs.maybe_push_ignore_io(builder.add(gipath)); + } + } + let gi = match builder.build() { + Ok(gi) => gi, + Err(err) => { + errs.push(err); + GitignoreBuilder::new(dir).build().unwrap() + } + }; + (gi, errs.into_error_option()) +} + +/// Find the GIT_COMMON_DIR for the given git worktree. +/// +/// This is the directory that may contain a private ignore file +/// "info/exclude". Unlike git, this function does *not* read environment +/// variables GIT_DIR and GIT_COMMON_DIR, because it is not clear how to use +/// them when multiple repositories are searched. +/// +/// Some I/O errors are ignored. +fn resolve_git_commondir( + dir: &Path, + git_type: Option<FileType>, +) -> Result<PathBuf, Option<Error>> { + let git_dir_path = || dir.join(".git"); + let git_dir = git_dir_path(); + if !git_type.map_or(false, |ft| ft.is_file()) { + return Ok(git_dir); + } + let file = match File::open(git_dir) { + Ok(file) => io::BufReader::new(file), + Err(err) => { + return Err(Some(Error::Io(err).with_path(git_dir_path()))); + } + }; + let dot_git_line = match file.lines().next() { + Some(Ok(line)) => line, + Some(Err(err)) => { + return Err(Some(Error::Io(err).with_path(git_dir_path()))); + } + None => return Err(None), + }; + if !dot_git_line.starts_with("gitdir: ") { + return Err(None); + } + let real_git_dir = PathBuf::from(&dot_git_line["gitdir: ".len()..]); + let git_commondir_file = || real_git_dir.join("commondir"); + let file = match File::open(git_commondir_file()) { + Ok(file) => io::BufReader::new(file), + Err(_) => return Err(None), + }; + let commondir_line = match file.lines().next() { + Some(Ok(line)) => line, + Some(Err(err)) => { + return Err(Some(Error::Io(err).with_path(git_commondir_file()))); + } + None => return Err(None), + }; + let commondir_abs = if commondir_line.starts_with(".") { + real_git_dir.join(commondir_line) // relative commondir + } else { + PathBuf::from(commondir_line) + }; + Ok(commondir_abs) +} + +#[cfg(test)] +mod tests { + use std::fs::{self, File}; + use std::io::Write; + use std::path::Path; + + use crate::dir::IgnoreBuilder; + use crate::gitignore::Gitignore; + use crate::tests::TempDir; + use crate::Error; + + fn wfile<P: AsRef<Path>>(path: P, contents: &str) { + let mut file = File::create(path).unwrap(); + file.write_all(contents.as_bytes()).unwrap(); + } + + fn mkdirp<P: AsRef<Path>>(path: P) { + fs::create_dir_all(path).unwrap(); + } + + fn partial(err: Error) -> Vec<Error> { + match err { + Error::Partial(errs) => errs, + _ => panic!("expected partial error but got {:?}", err), + } + } + + fn tmpdir() -> TempDir { + TempDir::new().unwrap() + } + + #[test] + fn explicit_ignore() { + let td = tmpdir(); + wfile(td.path().join("not-an-ignore"), "foo\n!bar"); + + let (gi, err) = Gitignore::new(td.path().join("not-an-ignore")); + assert!(err.is_none()); + let (ig, err) = + IgnoreBuilder::new().add_ignore(gi).build().add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched("foo", false).is_ignore()); + assert!(ig.matched("bar", false).is_whitelist()); + assert!(ig.matched("baz", false).is_none()); + } + + #[test] + fn git_exclude() { + let td = tmpdir(); + mkdirp(td.path().join(".git/info")); + wfile(td.path().join(".git/info/exclude"), "foo\n!bar"); + + let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched("foo", false).is_ignore()); + assert!(ig.matched("bar", false).is_whitelist()); + assert!(ig.matched("baz", false).is_none()); + } + + #[test] + fn gitignore() { + let td = tmpdir(); + mkdirp(td.path().join(".git")); + wfile(td.path().join(".gitignore"), "foo\n!bar"); + + let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched("foo", false).is_ignore()); + assert!(ig.matched("bar", false).is_whitelist()); + assert!(ig.matched("baz", false).is_none()); + } + + #[test] + fn gitignore_no_git() { + let td = tmpdir(); + wfile(td.path().join(".gitignore"), "foo\n!bar"); + + let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched("foo", false).is_none()); + assert!(ig.matched("bar", false).is_none()); + assert!(ig.matched("baz", false).is_none()); + } + + #[test] + fn gitignore_allowed_no_git() { + let td = tmpdir(); + wfile(td.path().join(".gitignore"), "foo\n!bar"); + + let (ig, err) = IgnoreBuilder::new() + .require_git(false) + .build() + .add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched("foo", false).is_ignore()); + assert!(ig.matched("bar", false).is_whitelist()); + assert!(ig.matched("baz", false).is_none()); + } + + #[test] + fn ignore() { + let td = tmpdir(); + wfile(td.path().join(".ignore"), "foo\n!bar"); + + let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched("foo", false).is_ignore()); + assert!(ig.matched("bar", false).is_whitelist()); + assert!(ig.matched("baz", false).is_none()); + } + + #[test] + fn custom_ignore() { + let td = tmpdir(); + let custom_ignore = ".customignore"; + wfile(td.path().join(custom_ignore), "foo\n!bar"); + + let (ig, err) = IgnoreBuilder::new() + .add_custom_ignore_filename(custom_ignore) + .build() + .add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched("foo", false).is_ignore()); + assert!(ig.matched("bar", false).is_whitelist()); + assert!(ig.matched("baz", false).is_none()); + } + + // Tests that a custom ignore file will override an .ignore. + #[test] + fn custom_ignore_over_ignore() { + let td = tmpdir(); + let custom_ignore = ".customignore"; + wfile(td.path().join(".ignore"), "foo"); + wfile(td.path().join(custom_ignore), "!foo"); + + let (ig, err) = IgnoreBuilder::new() + .add_custom_ignore_filename(custom_ignore) + .build() + .add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched("foo", false).is_whitelist()); + } + + // Tests that earlier custom ignore files have lower precedence than later. + #[test] + fn custom_ignore_precedence() { + let td = tmpdir(); + let custom_ignore1 = ".customignore1"; + let custom_ignore2 = ".customignore2"; + wfile(td.path().join(custom_ignore1), "foo"); + wfile(td.path().join(custom_ignore2), "!foo"); + + let (ig, err) = IgnoreBuilder::new() + .add_custom_ignore_filename(custom_ignore1) + .add_custom_ignore_filename(custom_ignore2) + .build() + .add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched("foo", false).is_whitelist()); + } + + // Tests that an .ignore will override a .gitignore. + #[test] + fn ignore_over_gitignore() { + let td = tmpdir(); + wfile(td.path().join(".gitignore"), "foo"); + wfile(td.path().join(".ignore"), "!foo"); + + let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched("foo", false).is_whitelist()); + } + + // Tests that exclude has lower precedent than both .ignore and .gitignore. + #[test] + fn exclude_lowest() { + let td = tmpdir(); + wfile(td.path().join(".gitignore"), "!foo"); + wfile(td.path().join(".ignore"), "!bar"); + mkdirp(td.path().join(".git/info")); + wfile(td.path().join(".git/info/exclude"), "foo\nbar\nbaz"); + + let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched("baz", false).is_ignore()); + assert!(ig.matched("foo", false).is_whitelist()); + assert!(ig.matched("bar", false).is_whitelist()); + } + + #[test] + fn errored() { + let td = tmpdir(); + wfile(td.path().join(".gitignore"), "{foo"); + + let (_, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_some()); + } + + #[test] + fn errored_both() { + let td = tmpdir(); + wfile(td.path().join(".gitignore"), "{foo"); + wfile(td.path().join(".ignore"), "{bar"); + + let (_, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert_eq!(2, partial(err.expect("an error")).len()); + } + + #[test] + fn errored_partial() { + let td = tmpdir(); + mkdirp(td.path().join(".git")); + wfile(td.path().join(".gitignore"), "{foo\nbar"); + + let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_some()); + assert!(ig.matched("bar", false).is_ignore()); + } + + #[test] + fn errored_partial_and_ignore() { + let td = tmpdir(); + wfile(td.path().join(".gitignore"), "{foo\nbar"); + wfile(td.path().join(".ignore"), "!bar"); + + let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_some()); + assert!(ig.matched("bar", false).is_whitelist()); + } + + #[test] + fn not_present_empty() { + let td = tmpdir(); + + let (_, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_none()); + } + + #[test] + fn stops_at_git_dir() { + // This tests that .gitignore files beyond a .git barrier aren't + // matched, but .ignore files are. + let td = tmpdir(); + mkdirp(td.path().join(".git")); + mkdirp(td.path().join("foo/.git")); + wfile(td.path().join(".gitignore"), "foo"); + wfile(td.path().join(".ignore"), "bar"); + + let ig0 = IgnoreBuilder::new().build(); + let (ig1, err) = ig0.add_child(td.path()); + assert!(err.is_none()); + let (ig2, err) = ig1.add_child(ig1.path().join("foo")); + assert!(err.is_none()); + + assert!(ig1.matched("foo", false).is_ignore()); + assert!(ig2.matched("foo", false).is_none()); + + assert!(ig1.matched("bar", false).is_ignore()); + assert!(ig2.matched("bar", false).is_ignore()); + } + + #[test] + fn absolute_parent() { + let td = tmpdir(); + mkdirp(td.path().join(".git")); + mkdirp(td.path().join("foo")); + wfile(td.path().join(".gitignore"), "bar"); + + // First, check that the parent gitignore file isn't detected if the + // parent isn't added. This establishes a baseline. + let ig0 = IgnoreBuilder::new().build(); + let (ig1, err) = ig0.add_child(td.path().join("foo")); + assert!(err.is_none()); + assert!(ig1.matched("bar", false).is_none()); + + // Second, check that adding a parent directory actually works. + let ig0 = IgnoreBuilder::new().build(); + let (ig1, err) = ig0.add_parents(td.path().join("foo")); + assert!(err.is_none()); + let (ig2, err) = ig1.add_child(td.path().join("foo")); + assert!(err.is_none()); + assert!(ig2.matched("bar", false).is_ignore()); + } + + #[test] + fn absolute_parent_anchored() { + let td = tmpdir(); + mkdirp(td.path().join(".git")); + mkdirp(td.path().join("src/llvm")); + wfile(td.path().join(".gitignore"), "/llvm/\nfoo"); + + let ig0 = IgnoreBuilder::new().build(); + let (ig1, err) = ig0.add_parents(td.path().join("src")); + assert!(err.is_none()); + let (ig2, err) = ig1.add_child("src"); + assert!(err.is_none()); + + assert!(ig1.matched("llvm", true).is_none()); + assert!(ig2.matched("llvm", true).is_none()); + assert!(ig2.matched("src/llvm", true).is_none()); + assert!(ig2.matched("foo", false).is_ignore()); + assert!(ig2.matched("src/foo", false).is_ignore()); + } + + #[test] + fn git_info_exclude_in_linked_worktree() { + let td = tmpdir(); + let git_dir = td.path().join(".git"); + mkdirp(git_dir.join("info")); + wfile(git_dir.join("info/exclude"), "ignore_me"); + mkdirp(git_dir.join("worktrees/linked-worktree")); + let commondir_path = + || git_dir.join("worktrees/linked-worktree/commondir"); + mkdirp(td.path().join("linked-worktree")); + let worktree_git_dir_abs = format!( + "gitdir: {}", + git_dir.join("worktrees/linked-worktree").to_str().unwrap(), + ); + wfile(td.path().join("linked-worktree/.git"), &worktree_git_dir_abs); + + // relative commondir + wfile(commondir_path(), "../.."); + let ib = IgnoreBuilder::new().build(); + let (ignore, err) = ib.add_child(td.path().join("linked-worktree")); + assert!(err.is_none()); + assert!(ignore.matched("ignore_me", false).is_ignore()); + + // absolute commondir + wfile(commondir_path(), git_dir.to_str().unwrap()); + let (ignore, err) = ib.add_child(td.path().join("linked-worktree")); + assert!(err.is_none()); + assert!(ignore.matched("ignore_me", false).is_ignore()); + + // missing commondir file + assert!(fs::remove_file(commondir_path()).is_ok()); + let (_, err) = ib.add_child(td.path().join("linked-worktree")); + // We squash the error in this case, because it occurs in repositories + // that are not linked worktrees but have submodules. + assert!(err.is_none()); + + wfile(td.path().join("linked-worktree/.git"), "garbage"); + let (_, err) = ib.add_child(td.path().join("linked-worktree")); + assert!(err.is_none()); + + wfile(td.path().join("linked-worktree/.git"), "gitdir: garbage"); + let (_, err) = ib.add_child(td.path().join("linked-worktree")); + assert!(err.is_none()); + } +} diff --git a/vendor/ignore/src/gitignore.rs b/vendor/ignore/src/gitignore.rs new file mode 100644 index 0000000..3c7ba5e --- /dev/null +++ b/vendor/ignore/src/gitignore.rs @@ -0,0 +1,789 @@ +/*! +The gitignore module provides a way to match globs from a gitignore file +against file paths. + +Note that this module implements the specification as described in the +`gitignore` man page from scratch. That is, this module does *not* shell out to +the `git` command line tool. +*/ + +use std::cell::RefCell; +use std::env; +use std::fs::File; +use std::io::{self, BufRead, Read}; +use std::path::{Path, PathBuf}; +use std::str; +use std::sync::Arc; + +use globset::{Candidate, GlobBuilder, GlobSet, GlobSetBuilder}; +use regex::bytes::Regex; +use thread_local::ThreadLocal; + +use crate::pathutil::{is_file_name, strip_prefix}; +use crate::{Error, Match, PartialErrorBuilder}; + +/// Glob represents a single glob in a gitignore file. +/// +/// This is used to report information about the highest precedent glob that +/// matched in one or more gitignore files. +#[derive(Clone, Debug)] +pub struct Glob { + /// The file path that this glob was extracted from. + from: Option<PathBuf>, + /// The original glob string. + original: String, + /// The actual glob string used to convert to a regex. + actual: String, + /// Whether this is a whitelisted glob or not. + is_whitelist: bool, + /// Whether this glob should only match directories or not. + is_only_dir: bool, +} + +impl Glob { + /// Returns the file path that defined this glob. + pub fn from(&self) -> Option<&Path> { + self.from.as_ref().map(|p| &**p) + } + + /// The original glob as it was defined in a gitignore file. + pub fn original(&self) -> &str { + &self.original + } + + /// The actual glob that was compiled to respect gitignore + /// semantics. + pub fn actual(&self) -> &str { + &self.actual + } + + /// Whether this was a whitelisted glob or not. + pub fn is_whitelist(&self) -> bool { + self.is_whitelist + } + + /// Whether this glob must match a directory or not. + pub fn is_only_dir(&self) -> bool { + self.is_only_dir + } + + /// Returns true if and only if this glob has a `**/` prefix. + fn has_doublestar_prefix(&self) -> bool { + self.actual.starts_with("**/") || self.actual == "**" + } +} + +/// Gitignore is a matcher for the globs in one or more gitignore files +/// in the same directory. +#[derive(Clone, Debug)] +pub struct Gitignore { + set: GlobSet, + root: PathBuf, + globs: Vec<Glob>, + num_ignores: u64, + num_whitelists: u64, + matches: Option<Arc<ThreadLocal<RefCell<Vec<usize>>>>>, +} + +impl Gitignore { + /// Creates a new gitignore matcher from the gitignore file path given. + /// + /// If it's desirable to include multiple gitignore files in a single + /// matcher, or read gitignore globs from a different source, then + /// use `GitignoreBuilder`. + /// + /// This always returns a valid matcher, even if it's empty. In particular, + /// a Gitignore file can be partially valid, e.g., when one glob is invalid + /// but the rest aren't. + /// + /// Note that I/O errors are ignored. For more granular control over + /// errors, use `GitignoreBuilder`. + pub fn new<P: AsRef<Path>>( + gitignore_path: P, + ) -> (Gitignore, Option<Error>) { + let path = gitignore_path.as_ref(); + let parent = path.parent().unwrap_or(Path::new("/")); + let mut builder = GitignoreBuilder::new(parent); + let mut errs = PartialErrorBuilder::default(); + errs.maybe_push_ignore_io(builder.add(path)); + match builder.build() { + Ok(gi) => (gi, errs.into_error_option()), + Err(err) => { + errs.push(err); + (Gitignore::empty(), errs.into_error_option()) + } + } + } + + /// Creates a new gitignore matcher from the global ignore file, if one + /// exists. + /// + /// The global config file path is specified by git's `core.excludesFile` + /// config option. + /// + /// Git's config file location is `$HOME/.gitconfig`. If `$HOME/.gitconfig` + /// does not exist or does not specify `core.excludesFile`, then + /// `$XDG_CONFIG_HOME/git/ignore` is read. If `$XDG_CONFIG_HOME` is not + /// set or is empty, then `$HOME/.config/git/ignore` is used instead. + pub fn global() -> (Gitignore, Option<Error>) { + GitignoreBuilder::new("").build_global() + } + + /// Creates a new empty gitignore matcher that never matches anything. + /// + /// Its path is empty. + pub fn empty() -> Gitignore { + Gitignore { + set: GlobSet::empty(), + root: PathBuf::from(""), + globs: vec![], + num_ignores: 0, + num_whitelists: 0, + matches: None, + } + } + + /// Returns the directory containing this gitignore matcher. + /// + /// All matches are done relative to this path. + pub fn path(&self) -> &Path { + &*self.root + } + + /// Returns true if and only if this gitignore has zero globs, and + /// therefore never matches any file path. + pub fn is_empty(&self) -> bool { + self.set.is_empty() + } + + /// Returns the total number of globs, which should be equivalent to + /// `num_ignores + num_whitelists`. + pub fn len(&self) -> usize { + self.set.len() + } + + /// Returns the total number of ignore globs. + pub fn num_ignores(&self) -> u64 { + self.num_ignores + } + + /// Returns the total number of whitelisted globs. + pub fn num_whitelists(&self) -> u64 { + self.num_whitelists + } + + /// Returns whether the given path (file or directory) matched a pattern in + /// this gitignore matcher. + /// + /// `is_dir` should be true if the path refers to a directory and false + /// otherwise. + /// + /// The given path is matched relative to the path given when building + /// the matcher. Specifically, before matching `path`, its prefix (as + /// determined by a common suffix of the directory containing this + /// gitignore) is stripped. If there is no common suffix/prefix overlap, + /// then `path` is assumed to be relative to this matcher. + pub fn matched<P: AsRef<Path>>( + &self, + path: P, + is_dir: bool, + ) -> Match<&Glob> { + if self.is_empty() { + return Match::None; + } + self.matched_stripped(self.strip(path.as_ref()), is_dir) + } + + /// Returns whether the given path (file or directory, and expected to be + /// under the root) or any of its parent directories (up to the root) + /// matched a pattern in this gitignore matcher. + /// + /// NOTE: This method is more expensive than walking the directory hierarchy + /// top-to-bottom and matching the entries. But, is easier to use in cases + /// when a list of paths are available without a hierarchy. + /// + /// `is_dir` should be true if the path refers to a directory and false + /// otherwise. + /// + /// The given path is matched relative to the path given when building + /// the matcher. Specifically, before matching `path`, its prefix (as + /// determined by a common suffix of the directory containing this + /// gitignore) is stripped. If there is no common suffix/prefix overlap, + /// then `path` is assumed to be relative to this matcher. + /// + /// # Panics + /// + /// This method panics if the given file path is not under the root path + /// of this matcher. + pub fn matched_path_or_any_parents<P: AsRef<Path>>( + &self, + path: P, + is_dir: bool, + ) -> Match<&Glob> { + if self.is_empty() { + return Match::None; + } + let mut path = self.strip(path.as_ref()); + assert!(!path.has_root(), "path is expected to be under the root"); + + match self.matched_stripped(path, is_dir) { + Match::None => (), // walk up + a_match => return a_match, + } + while let Some(parent) = path.parent() { + match self.matched_stripped(parent, /* is_dir */ true) { + Match::None => path = parent, // walk up + a_match => return a_match, + } + } + Match::None + } + + /// Like matched, but takes a path that has already been stripped. + fn matched_stripped<P: AsRef<Path>>( + &self, + path: P, + is_dir: bool, + ) -> Match<&Glob> { + if self.is_empty() { + return Match::None; + } + let path = path.as_ref(); + let _matches = self.matches.as_ref().unwrap().get_or_default(); + let mut matches = _matches.borrow_mut(); + let candidate = Candidate::new(path); + self.set.matches_candidate_into(&candidate, &mut *matches); + for &i in matches.iter().rev() { + let glob = &self.globs[i]; + if !glob.is_only_dir() || is_dir { + return if glob.is_whitelist() { + Match::Whitelist(glob) + } else { + Match::Ignore(glob) + }; + } + } + Match::None + } + + /// Strips the given path such that it's suitable for matching with this + /// gitignore matcher. + fn strip<'a, P: 'a + AsRef<Path> + ?Sized>( + &'a self, + path: &'a P, + ) -> &'a Path { + let mut path = path.as_ref(); + // A leading ./ is completely superfluous. We also strip it from + // our gitignore root path, so we need to strip it from our candidate + // path too. + if let Some(p) = strip_prefix("./", path) { + path = p; + } + // Strip any common prefix between the candidate path and the root + // of the gitignore, to make sure we get relative matching right. + // BUT, a file name might not have any directory components to it, + // in which case, we don't want to accidentally strip any part of the + // file name. + // + // As an additional special case, if the root is just `.`, then we + // shouldn't try to strip anything, e.g., when path begins with a `.`. + if self.root != Path::new(".") && !is_file_name(path) { + if let Some(p) = strip_prefix(&self.root, path) { + path = p; + // If we're left with a leading slash, get rid of it. + if let Some(p) = strip_prefix("/", path) { + path = p; + } + } + } + path + } +} + +/// Builds a matcher for a single set of globs from a .gitignore file. +#[derive(Clone, Debug)] +pub struct GitignoreBuilder { + builder: GlobSetBuilder, + root: PathBuf, + globs: Vec<Glob>, + case_insensitive: bool, +} + +impl GitignoreBuilder { + /// Create a new builder for a gitignore file. + /// + /// The path given should be the path at which the globs for this gitignore + /// file should be matched. Note that paths are always matched relative + /// to the root path given here. Generally, the root path should correspond + /// to the *directory* containing a `.gitignore` file. + pub fn new<P: AsRef<Path>>(root: P) -> GitignoreBuilder { + let root = root.as_ref(); + GitignoreBuilder { + builder: GlobSetBuilder::new(), + root: strip_prefix("./", root).unwrap_or(root).to_path_buf(), + globs: vec![], + case_insensitive: false, + } + } + + /// Builds a new matcher from the globs added so far. + /// + /// Once a matcher is built, no new globs can be added to it. + pub fn build(&self) -> Result<Gitignore, Error> { + let nignore = self.globs.iter().filter(|g| !g.is_whitelist()).count(); + let nwhite = self.globs.iter().filter(|g| g.is_whitelist()).count(); + let set = self + .builder + .build() + .map_err(|err| Error::Glob { glob: None, err: err.to_string() })?; + Ok(Gitignore { + set: set, + root: self.root.clone(), + globs: self.globs.clone(), + num_ignores: nignore as u64, + num_whitelists: nwhite as u64, + matches: Some(Arc::new(ThreadLocal::default())), + }) + } + + /// Build a global gitignore matcher using the configuration in this + /// builder. + /// + /// This consumes ownership of the builder unlike `build` because it + /// must mutate the builder to add the global gitignore globs. + /// + /// Note that this ignores the path given to this builder's constructor + /// and instead derives the path automatically from git's global + /// configuration. + pub fn build_global(mut self) -> (Gitignore, Option<Error>) { + match gitconfig_excludes_path() { + None => (Gitignore::empty(), None), + Some(path) => { + if !path.is_file() { + (Gitignore::empty(), None) + } else { + let mut errs = PartialErrorBuilder::default(); + errs.maybe_push_ignore_io(self.add(path)); + match self.build() { + Ok(gi) => (gi, errs.into_error_option()), + Err(err) => { + errs.push(err); + (Gitignore::empty(), errs.into_error_option()) + } + } + } + } + } + } + + /// Add each glob from the file path given. + /// + /// The file given should be formatted as a `gitignore` file. + /// + /// Note that partial errors can be returned. For example, if there was + /// a problem adding one glob, an error for that will be returned, but + /// all other valid globs will still be added. + pub fn add<P: AsRef<Path>>(&mut self, path: P) -> Option<Error> { + let path = path.as_ref(); + let file = match File::open(path) { + Err(err) => return Some(Error::Io(err).with_path(path)), + Ok(file) => file, + }; + let rdr = io::BufReader::new(file); + let mut errs = PartialErrorBuilder::default(); + for (i, line) in rdr.lines().enumerate() { + let lineno = (i + 1) as u64; + let line = match line { + Ok(line) => line, + Err(err) => { + errs.push(Error::Io(err).tagged(path, lineno)); + break; + } + }; + if let Err(err) = self.add_line(Some(path.to_path_buf()), &line) { + errs.push(err.tagged(path, lineno)); + } + } + errs.into_error_option() + } + + /// Add each glob line from the string given. + /// + /// If this string came from a particular `gitignore` file, then its path + /// should be provided here. + /// + /// The string given should be formatted as a `gitignore` file. + #[cfg(test)] + fn add_str( + &mut self, + from: Option<PathBuf>, + gitignore: &str, + ) -> Result<&mut GitignoreBuilder, Error> { + for line in gitignore.lines() { + self.add_line(from.clone(), line)?; + } + Ok(self) + } + + /// Add a line from a gitignore file to this builder. + /// + /// If this line came from a particular `gitignore` file, then its path + /// should be provided here. + /// + /// If the line could not be parsed as a glob, then an error is returned. + pub fn add_line( + &mut self, + from: Option<PathBuf>, + mut line: &str, + ) -> Result<&mut GitignoreBuilder, Error> { + #![allow(deprecated)] + + if line.starts_with("#") { + return Ok(self); + } + if !line.ends_with("\\ ") { + line = line.trim_right(); + } + if line.is_empty() { + return Ok(self); + } + let mut glob = Glob { + from: from, + original: line.to_string(), + actual: String::new(), + is_whitelist: false, + is_only_dir: false, + }; + let mut is_absolute = false; + if line.starts_with("\\!") || line.starts_with("\\#") { + line = &line[1..]; + is_absolute = line.chars().nth(0) == Some('/'); + } else { + if line.starts_with("!") { + glob.is_whitelist = true; + line = &line[1..]; + } + if line.starts_with("/") { + // `man gitignore` says that if a glob starts with a slash, + // then the glob can only match the beginning of a path + // (relative to the location of gitignore). We achieve this by + // simply banning wildcards from matching /. + line = &line[1..]; + is_absolute = true; + } + } + // If it ends with a slash, then this should only match directories, + // but the slash should otherwise not be used while globbing. + if line.as_bytes().last() == Some(&b'/') { + glob.is_only_dir = true; + line = &line[..line.len() - 1]; + // If the slash was escaped, then remove the escape. + // See: https://github.com/BurntSushi/ripgrep/issues/2236 + if line.as_bytes().last() == Some(&b'\\') { + line = &line[..line.len() - 1]; + } + } + glob.actual = line.to_string(); + // If there is a literal slash, then this is a glob that must match the + // entire path name. Otherwise, we should let it match anywhere, so use + // a **/ prefix. + if !is_absolute && !line.chars().any(|c| c == '/') { + // ... but only if we don't already have a **/ prefix. + if !glob.has_doublestar_prefix() { + glob.actual = format!("**/{}", glob.actual); + } + } + // If the glob ends with `/**`, then we should only match everything + // inside a directory, but not the directory itself. Standard globs + // will match the directory. So we add `/*` to force the issue. + if glob.actual.ends_with("/**") { + glob.actual = format!("{}/*", glob.actual); + } + let parsed = GlobBuilder::new(&glob.actual) + .literal_separator(true) + .case_insensitive(self.case_insensitive) + .backslash_escape(true) + .build() + .map_err(|err| Error::Glob { + glob: Some(glob.original.clone()), + err: err.kind().to_string(), + })?; + self.builder.add(parsed); + self.globs.push(glob); + Ok(self) + } + + /// Toggle whether the globs should be matched case insensitively or not. + /// + /// When this option is changed, only globs added after the change will be + /// affected. + /// + /// This is disabled by default. + pub fn case_insensitive( + &mut self, + yes: bool, + ) -> Result<&mut GitignoreBuilder, Error> { + // TODO: This should not return a `Result`. Fix this in the next semver + // release. + self.case_insensitive = yes; + Ok(self) + } +} + +/// Return the file path of the current environment's global gitignore file. +/// +/// Note that the file path returned may not exist. +fn gitconfig_excludes_path() -> Option<PathBuf> { + // git supports $HOME/.gitconfig and $XDG_CONFIG_HOME/git/config. Notably, + // both can be active at the same time, where $HOME/.gitconfig takes + // precedent. So if $HOME/.gitconfig defines a `core.excludesFile`, then + // we're done. + match gitconfig_home_contents().and_then(|x| parse_excludes_file(&x)) { + Some(path) => return Some(path), + None => {} + } + match gitconfig_xdg_contents().and_then(|x| parse_excludes_file(&x)) { + Some(path) => return Some(path), + None => {} + } + excludes_file_default() +} + +/// Returns the file contents of git's global config file, if one exists, in +/// the user's home directory. +fn gitconfig_home_contents() -> Option<Vec<u8>> { + let home = match home_dir() { + None => return None, + Some(home) => home, + }; + let mut file = match File::open(home.join(".gitconfig")) { + Err(_) => return None, + Ok(file) => io::BufReader::new(file), + }; + let mut contents = vec![]; + file.read_to_end(&mut contents).ok().map(|_| contents) +} + +/// Returns the file contents of git's global config file, if one exists, in +/// the user's XDG_CONFIG_HOME directory. +fn gitconfig_xdg_contents() -> Option<Vec<u8>> { + let path = env::var_os("XDG_CONFIG_HOME") + .and_then(|x| if x.is_empty() { None } else { Some(PathBuf::from(x)) }) + .or_else(|| home_dir().map(|p| p.join(".config"))) + .map(|x| x.join("git/config")); + let mut file = match path.and_then(|p| File::open(p).ok()) { + None => return None, + Some(file) => io::BufReader::new(file), + }; + let mut contents = vec![]; + file.read_to_end(&mut contents).ok().map(|_| contents) +} + +/// Returns the default file path for a global .gitignore file. +/// +/// Specifically, this respects XDG_CONFIG_HOME. +fn excludes_file_default() -> Option<PathBuf> { + env::var_os("XDG_CONFIG_HOME") + .and_then(|x| if x.is_empty() { None } else { Some(PathBuf::from(x)) }) + .or_else(|| home_dir().map(|p| p.join(".config"))) + .map(|x| x.join("git/ignore")) +} + +/// Extract git's `core.excludesfile` config setting from the raw file contents +/// given. +fn parse_excludes_file(data: &[u8]) -> Option<PathBuf> { + // N.B. This is the lazy approach, and isn't technically correct, but + // probably works in more circumstances. I guess we would ideally have + // a full INI parser. Yuck. + lazy_static::lazy_static! { + static ref RE: Regex = + Regex::new(r"(?im)^\s*excludesfile\s*=\s*(.+)\s*$").unwrap(); + }; + let caps = match RE.captures(data) { + None => return None, + Some(caps) => caps, + }; + str::from_utf8(&caps[1]).ok().map(|s| PathBuf::from(expand_tilde(s))) +} + +/// Expands ~ in file paths to the value of $HOME. +fn expand_tilde(path: &str) -> String { + let home = match home_dir() { + None => return path.to_string(), + Some(home) => home.to_string_lossy().into_owned(), + }; + path.replace("~", &home) +} + +/// Returns the location of the user's home directory. +fn home_dir() -> Option<PathBuf> { + // We're fine with using env::home_dir for now. Its bugs are, IMO, pretty + // minor corner cases. We should still probably eventually migrate to + // the `dirs` crate to get a proper implementation. + #![allow(deprecated)] + env::home_dir() +} + +#[cfg(test)] +mod tests { + use super::{Gitignore, GitignoreBuilder}; + use std::path::Path; + + fn gi_from_str<P: AsRef<Path>>(root: P, s: &str) -> Gitignore { + let mut builder = GitignoreBuilder::new(root); + builder.add_str(None, s).unwrap(); + builder.build().unwrap() + } + + macro_rules! ignored { + ($name:ident, $root:expr, $gi:expr, $path:expr) => { + ignored!($name, $root, $gi, $path, false); + }; + ($name:ident, $root:expr, $gi:expr, $path:expr, $is_dir:expr) => { + #[test] + fn $name() { + let gi = gi_from_str($root, $gi); + assert!(gi.matched($path, $is_dir).is_ignore()); + } + }; + } + + macro_rules! not_ignored { + ($name:ident, $root:expr, $gi:expr, $path:expr) => { + not_ignored!($name, $root, $gi, $path, false); + }; + ($name:ident, $root:expr, $gi:expr, $path:expr, $is_dir:expr) => { + #[test] + fn $name() { + let gi = gi_from_str($root, $gi); + assert!(!gi.matched($path, $is_dir).is_ignore()); + } + }; + } + + const ROOT: &'static str = "/home/foobar/rust/rg"; + + ignored!(ig1, ROOT, "months", "months"); + ignored!(ig2, ROOT, "*.lock", "Cargo.lock"); + ignored!(ig3, ROOT, "*.rs", "src/main.rs"); + ignored!(ig4, ROOT, "src/*.rs", "src/main.rs"); + ignored!(ig5, ROOT, "/*.c", "cat-file.c"); + ignored!(ig6, ROOT, "/src/*.rs", "src/main.rs"); + ignored!(ig7, ROOT, "!src/main.rs\n*.rs", "src/main.rs"); + ignored!(ig8, ROOT, "foo/", "foo", true); + ignored!(ig9, ROOT, "**/foo", "foo"); + ignored!(ig10, ROOT, "**/foo", "src/foo"); + ignored!(ig11, ROOT, "**/foo/**", "src/foo/bar"); + ignored!(ig12, ROOT, "**/foo/**", "wat/src/foo/bar/baz"); + ignored!(ig13, ROOT, "**/foo/bar", "foo/bar"); + ignored!(ig14, ROOT, "**/foo/bar", "src/foo/bar"); + ignored!(ig15, ROOT, "abc/**", "abc/x"); + ignored!(ig16, ROOT, "abc/**", "abc/x/y"); + ignored!(ig17, ROOT, "abc/**", "abc/x/y/z"); + ignored!(ig18, ROOT, "a/**/b", "a/b"); + ignored!(ig19, ROOT, "a/**/b", "a/x/b"); + ignored!(ig20, ROOT, "a/**/b", "a/x/y/b"); + ignored!(ig21, ROOT, r"\!xy", "!xy"); + ignored!(ig22, ROOT, r"\#foo", "#foo"); + ignored!(ig23, ROOT, "foo", "./foo"); + ignored!(ig24, ROOT, "target", "grep/target"); + ignored!(ig25, ROOT, "Cargo.lock", "./tabwriter-bin/Cargo.lock"); + ignored!(ig26, ROOT, "/foo/bar/baz", "./foo/bar/baz"); + ignored!(ig27, ROOT, "foo/", "xyz/foo", true); + ignored!(ig28, "./src", "/llvm/", "./src/llvm", true); + ignored!(ig29, ROOT, "node_modules/ ", "node_modules", true); + ignored!(ig30, ROOT, "**/", "foo/bar", true); + ignored!(ig31, ROOT, "path1/*", "path1/foo"); + ignored!(ig32, ROOT, ".a/b", ".a/b"); + ignored!(ig33, "./", ".a/b", ".a/b"); + ignored!(ig34, ".", ".a/b", ".a/b"); + ignored!(ig35, "./.", ".a/b", ".a/b"); + ignored!(ig36, "././", ".a/b", ".a/b"); + ignored!(ig37, "././.", ".a/b", ".a/b"); + ignored!(ig38, ROOT, "\\[", "["); + ignored!(ig39, ROOT, "\\?", "?"); + ignored!(ig40, ROOT, "\\*", "*"); + ignored!(ig41, ROOT, "\\a", "a"); + ignored!(ig42, ROOT, "s*.rs", "sfoo.rs"); + ignored!(ig43, ROOT, "**", "foo.rs"); + ignored!(ig44, ROOT, "**/**/*", "a/foo.rs"); + + not_ignored!(ignot1, ROOT, "amonths", "months"); + not_ignored!(ignot2, ROOT, "monthsa", "months"); + not_ignored!(ignot3, ROOT, "/src/*.rs", "src/grep/src/main.rs"); + not_ignored!(ignot4, ROOT, "/*.c", "mozilla-sha1/sha1.c"); + not_ignored!(ignot5, ROOT, "/src/*.rs", "src/grep/src/main.rs"); + not_ignored!(ignot6, ROOT, "*.rs\n!src/main.rs", "src/main.rs"); + not_ignored!(ignot7, ROOT, "foo/", "foo", false); + not_ignored!(ignot8, ROOT, "**/foo/**", "wat/src/afoo/bar/baz"); + not_ignored!(ignot9, ROOT, "**/foo/**", "wat/src/fooa/bar/baz"); + not_ignored!(ignot10, ROOT, "**/foo/bar", "foo/src/bar"); + not_ignored!(ignot11, ROOT, "#foo", "#foo"); + not_ignored!(ignot12, ROOT, "\n\n\n", "foo"); + not_ignored!(ignot13, ROOT, "foo/**", "foo", true); + not_ignored!( + ignot14, + "./third_party/protobuf", + "m4/ltoptions.m4", + "./third_party/protobuf/csharp/src/packages/repositories.config" + ); + not_ignored!(ignot15, ROOT, "!/bar", "foo/bar"); + not_ignored!(ignot16, ROOT, "*\n!**/", "foo", true); + not_ignored!(ignot17, ROOT, "src/*.rs", "src/grep/src/main.rs"); + not_ignored!(ignot18, ROOT, "path1/*", "path2/path1/foo"); + not_ignored!(ignot19, ROOT, "s*.rs", "src/foo.rs"); + + fn bytes(s: &str) -> Vec<u8> { + s.to_string().into_bytes() + } + + fn path_string<P: AsRef<Path>>(path: P) -> String { + path.as_ref().to_str().unwrap().to_string() + } + + #[test] + fn parse_excludes_file1() { + let data = bytes("[core]\nexcludesFile = /foo/bar"); + let got = super::parse_excludes_file(&data).unwrap(); + assert_eq!(path_string(got), "/foo/bar"); + } + + #[test] + fn parse_excludes_file2() { + let data = bytes("[core]\nexcludesFile = ~/foo/bar"); + let got = super::parse_excludes_file(&data).unwrap(); + assert_eq!(path_string(got), super::expand_tilde("~/foo/bar")); + } + + #[test] + fn parse_excludes_file3() { + let data = bytes("[core]\nexcludeFile = /foo/bar"); + assert!(super::parse_excludes_file(&data).is_none()); + } + + // See: https://github.com/BurntSushi/ripgrep/issues/106 + #[test] + fn regression_106() { + gi_from_str("/", " "); + } + + #[test] + fn case_insensitive() { + let gi = GitignoreBuilder::new(ROOT) + .case_insensitive(true) + .unwrap() + .add_str(None, "*.html") + .unwrap() + .build() + .unwrap(); + assert!(gi.matched("foo.html", false).is_ignore()); + assert!(gi.matched("foo.HTML", false).is_ignore()); + assert!(!gi.matched("foo.htm", false).is_ignore()); + assert!(!gi.matched("foo.HTM", false).is_ignore()); + } + + ignored!(cs1, ROOT, "*.html", "foo.html"); + not_ignored!(cs2, ROOT, "*.html", "foo.HTML"); + not_ignored!(cs3, ROOT, "*.html", "foo.htm"); + not_ignored!(cs4, ROOT, "*.html", "foo.HTM"); +} diff --git a/vendor/ignore/src/lib.rs b/vendor/ignore/src/lib.rs new file mode 100644 index 0000000..824f7c4 --- /dev/null +++ b/vendor/ignore/src/lib.rs @@ -0,0 +1,550 @@ +/*! +The ignore crate provides a fast recursive directory iterator that respects +various filters such as globs, file types and `.gitignore` files. The precise +matching rules and precedence is explained in the documentation for +`WalkBuilder`. + +Secondarily, this crate exposes gitignore and file type matchers for use cases +that demand more fine-grained control. + +# Example + +This example shows the most basic usage of this crate. This code will +recursively traverse the current directory while automatically filtering out +files and directories according to ignore globs found in files like +`.ignore` and `.gitignore`: + + +```rust,no_run +use ignore::Walk; + +for result in Walk::new("./") { + // Each item yielded by the iterator is either a directory entry or an + // error, so either print the path or the error. + match result { + Ok(entry) => println!("{}", entry.path().display()), + Err(err) => println!("ERROR: {}", err), + } +} +``` + +# Example: advanced + +By default, the recursive directory iterator will ignore hidden files and +directories. This can be disabled by building the iterator with `WalkBuilder`: + +```rust,no_run +use ignore::WalkBuilder; + +for result in WalkBuilder::new("./").hidden(false).build() { + println!("{:?}", result); +} +``` + +See the documentation for `WalkBuilder` for many other options. +*/ + +#![deny(missing_docs)] + +use std::error; +use std::fmt; +use std::io; +use std::path::{Path, PathBuf}; + +pub use crate::walk::{ + DirEntry, ParallelVisitor, ParallelVisitorBuilder, Walk, WalkBuilder, + WalkParallel, WalkState, +}; + +mod default_types; +mod dir; +pub mod gitignore; +pub mod overrides; +mod pathutil; +pub mod types; +mod walk; + +/// Represents an error that can occur when parsing a gitignore file. +#[derive(Debug)] +pub enum Error { + /// A collection of "soft" errors. These occur when adding an ignore + /// file partially succeeded. + Partial(Vec<Error>), + /// An error associated with a specific line number. + WithLineNumber { + /// The line number. + line: u64, + /// The underlying error. + err: Box<Error>, + }, + /// An error associated with a particular file path. + WithPath { + /// The file path. + path: PathBuf, + /// The underlying error. + err: Box<Error>, + }, + /// An error associated with a particular directory depth when recursively + /// walking a directory. + WithDepth { + /// The directory depth. + depth: usize, + /// The underlying error. + err: Box<Error>, + }, + /// An error that occurs when a file loop is detected when traversing + /// symbolic links. + Loop { + /// The ancestor file path in the loop. + ancestor: PathBuf, + /// The child file path in the loop. + child: PathBuf, + }, + /// An error that occurs when doing I/O, such as reading an ignore file. + Io(io::Error), + /// An error that occurs when trying to parse a glob. + Glob { + /// The original glob that caused this error. This glob, when + /// available, always corresponds to the glob provided by an end user. + /// e.g., It is the glob as written in a `.gitignore` file. + /// + /// (This glob may be distinct from the glob that is actually + /// compiled, after accounting for `gitignore` semantics.) + glob: Option<String>, + /// The underlying glob error as a string. + err: String, + }, + /// A type selection for a file type that is not defined. + UnrecognizedFileType(String), + /// A user specified file type definition could not be parsed. + InvalidDefinition, +} + +impl Clone for Error { + fn clone(&self) -> Error { + match *self { + Error::Partial(ref errs) => Error::Partial(errs.clone()), + Error::WithLineNumber { line, ref err } => { + Error::WithLineNumber { line: line, err: err.clone() } + } + Error::WithPath { ref path, ref err } => { + Error::WithPath { path: path.clone(), err: err.clone() } + } + Error::WithDepth { depth, ref err } => { + Error::WithDepth { depth: depth, err: err.clone() } + } + Error::Loop { ref ancestor, ref child } => Error::Loop { + ancestor: ancestor.clone(), + child: child.clone(), + }, + Error::Io(ref err) => match err.raw_os_error() { + Some(e) => Error::Io(io::Error::from_raw_os_error(e)), + None => Error::Io(io::Error::new(err.kind(), err.to_string())), + }, + Error::Glob { ref glob, ref err } => { + Error::Glob { glob: glob.clone(), err: err.clone() } + } + Error::UnrecognizedFileType(ref err) => { + Error::UnrecognizedFileType(err.clone()) + } + Error::InvalidDefinition => Error::InvalidDefinition, + } + } +} + +impl Error { + /// Returns true if this is a partial error. + /// + /// A partial error occurs when only some operations failed while others + /// may have succeeded. For example, an ignore file may contain an invalid + /// glob among otherwise valid globs. + pub fn is_partial(&self) -> bool { + match *self { + Error::Partial(_) => true, + Error::WithLineNumber { ref err, .. } => err.is_partial(), + Error::WithPath { ref err, .. } => err.is_partial(), + Error::WithDepth { ref err, .. } => err.is_partial(), + _ => false, + } + } + + /// Returns true if this error is exclusively an I/O error. + pub fn is_io(&self) -> bool { + match *self { + Error::Partial(ref errs) => errs.len() == 1 && errs[0].is_io(), + Error::WithLineNumber { ref err, .. } => err.is_io(), + Error::WithPath { ref err, .. } => err.is_io(), + Error::WithDepth { ref err, .. } => err.is_io(), + Error::Loop { .. } => false, + Error::Io(_) => true, + Error::Glob { .. } => false, + Error::UnrecognizedFileType(_) => false, + Error::InvalidDefinition => false, + } + } + + /// Inspect the original [`io::Error`] if there is one. + /// + /// [`None`] is returned if the [`Error`] doesn't correspond to an + /// [`io::Error`]. This might happen, for example, when the error was + /// produced because a cycle was found in the directory tree while + /// following symbolic links. + /// + /// This method returns a borrowed value that is bound to the lifetime of the [`Error`]. To + /// obtain an owned value, the [`into_io_error`] can be used instead. + /// + /// > This is the original [`io::Error`] and is _not_ the same as + /// > [`impl From<Error> for std::io::Error`][impl] which contains additional context about the + /// error. + /// + /// [`None`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html#variant.None + /// [`io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html + /// [`From`]: https://doc.rust-lang.org/stable/std/convert/trait.From.html + /// [`Error`]: struct.Error.html + /// [`into_io_error`]: struct.Error.html#method.into_io_error + /// [impl]: struct.Error.html#impl-From%3CError%3E + pub fn io_error(&self) -> Option<&std::io::Error> { + match *self { + Error::Partial(ref errs) => { + if errs.len() == 1 { + errs[0].io_error() + } else { + None + } + } + Error::WithLineNumber { ref err, .. } => err.io_error(), + Error::WithPath { ref err, .. } => err.io_error(), + Error::WithDepth { ref err, .. } => err.io_error(), + Error::Loop { .. } => None, + Error::Io(ref err) => Some(err), + Error::Glob { .. } => None, + Error::UnrecognizedFileType(_) => None, + Error::InvalidDefinition => None, + } + } + + /// Similar to [`io_error`] except consumes self to convert to the original + /// [`io::Error`] if one exists. + /// + /// [`io_error`]: struct.Error.html#method.io_error + /// [`io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html + pub fn into_io_error(self) -> Option<std::io::Error> { + match self { + Error::Partial(mut errs) => { + if errs.len() == 1 { + errs.remove(0).into_io_error() + } else { + None + } + } + Error::WithLineNumber { err, .. } => err.into_io_error(), + Error::WithPath { err, .. } => err.into_io_error(), + Error::WithDepth { err, .. } => err.into_io_error(), + Error::Loop { .. } => None, + Error::Io(err) => Some(err), + Error::Glob { .. } => None, + Error::UnrecognizedFileType(_) => None, + Error::InvalidDefinition => None, + } + } + + /// Returns a depth associated with recursively walking a directory (if + /// this error was generated from a recursive directory iterator). + pub fn depth(&self) -> Option<usize> { + match *self { + Error::WithPath { ref err, .. } => err.depth(), + Error::WithDepth { depth, .. } => Some(depth), + _ => None, + } + } + + /// Turn an error into a tagged error with the given file path. + fn with_path<P: AsRef<Path>>(self, path: P) -> Error { + Error::WithPath { + path: path.as_ref().to_path_buf(), + err: Box::new(self), + } + } + + /// Turn an error into a tagged error with the given depth. + fn with_depth(self, depth: usize) -> Error { + Error::WithDepth { depth: depth, err: Box::new(self) } + } + + /// Turn an error into a tagged error with the given file path and line + /// number. If path is empty, then it is omitted from the error. + fn tagged<P: AsRef<Path>>(self, path: P, lineno: u64) -> Error { + let errline = + Error::WithLineNumber { line: lineno, err: Box::new(self) }; + if path.as_ref().as_os_str().is_empty() { + return errline; + } + errline.with_path(path) + } + + /// Build an error from a walkdir error. + fn from_walkdir(err: walkdir::Error) -> Error { + let depth = err.depth(); + if let (Some(anc), Some(child)) = (err.loop_ancestor(), err.path()) { + return Error::WithDepth { + depth: depth, + err: Box::new(Error::Loop { + ancestor: anc.to_path_buf(), + child: child.to_path_buf(), + }), + }; + } + let path = err.path().map(|p| p.to_path_buf()); + let mut ig_err = Error::Io(io::Error::from(err)); + if let Some(path) = path { + ig_err = Error::WithPath { path: path, err: Box::new(ig_err) }; + } + ig_err + } +} + +impl error::Error for Error { + #[allow(deprecated)] + fn description(&self) -> &str { + match *self { + Error::Partial(_) => "partial error", + Error::WithLineNumber { ref err, .. } => err.description(), + Error::WithPath { ref err, .. } => err.description(), + Error::WithDepth { ref err, .. } => err.description(), + Error::Loop { .. } => "file system loop found", + Error::Io(ref err) => err.description(), + Error::Glob { ref err, .. } => err, + Error::UnrecognizedFileType(_) => "unrecognized file type", + Error::InvalidDefinition => "invalid definition", + } + } +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match *self { + Error::Partial(ref errs) => { + let msgs: Vec<String> = + errs.iter().map(|err| err.to_string()).collect(); + write!(f, "{}", msgs.join("\n")) + } + Error::WithLineNumber { line, ref err } => { + write!(f, "line {}: {}", line, err) + } + Error::WithPath { ref path, ref err } => { + write!(f, "{}: {}", path.display(), err) + } + Error::WithDepth { ref err, .. } => err.fmt(f), + Error::Loop { ref ancestor, ref child } => write!( + f, + "File system loop found: \ + {} points to an ancestor {}", + child.display(), + ancestor.display() + ), + Error::Io(ref err) => err.fmt(f), + Error::Glob { glob: None, ref err } => write!(f, "{}", err), + Error::Glob { glob: Some(ref glob), ref err } => { + write!(f, "error parsing glob '{}': {}", glob, err) + } + Error::UnrecognizedFileType(ref ty) => { + write!(f, "unrecognized file type: {}", ty) + } + Error::InvalidDefinition => write!( + f, + "invalid definition (format is type:glob, e.g., \ + html:*.html)" + ), + } + } +} + +impl From<io::Error> for Error { + fn from(err: io::Error) -> Error { + Error::Io(err) + } +} + +#[derive(Debug, Default)] +struct PartialErrorBuilder(Vec<Error>); + +impl PartialErrorBuilder { + fn push(&mut self, err: Error) { + self.0.push(err); + } + + fn push_ignore_io(&mut self, err: Error) { + if !err.is_io() { + self.push(err); + } + } + + fn maybe_push(&mut self, err: Option<Error>) { + if let Some(err) = err { + self.push(err); + } + } + + fn maybe_push_ignore_io(&mut self, err: Option<Error>) { + if let Some(err) = err { + self.push_ignore_io(err); + } + } + + fn into_error_option(mut self) -> Option<Error> { + if self.0.is_empty() { + None + } else if self.0.len() == 1 { + Some(self.0.pop().unwrap()) + } else { + Some(Error::Partial(self.0)) + } + } +} + +/// The result of a glob match. +/// +/// The type parameter `T` typically refers to a type that provides more +/// information about a particular match. For example, it might identify +/// the specific gitignore file and the specific glob pattern that caused +/// the match. +#[derive(Clone, Debug)] +pub enum Match<T> { + /// The path didn't match any glob. + None, + /// The highest precedent glob matched indicates the path should be + /// ignored. + Ignore(T), + /// The highest precedent glob matched indicates the path should be + /// whitelisted. + Whitelist(T), +} + +impl<T> Match<T> { + /// Returns true if the match result didn't match any globs. + pub fn is_none(&self) -> bool { + match *self { + Match::None => true, + Match::Ignore(_) | Match::Whitelist(_) => false, + } + } + + /// Returns true if the match result implies the path should be ignored. + pub fn is_ignore(&self) -> bool { + match *self { + Match::Ignore(_) => true, + Match::None | Match::Whitelist(_) => false, + } + } + + /// Returns true if the match result implies the path should be + /// whitelisted. + pub fn is_whitelist(&self) -> bool { + match *self { + Match::Whitelist(_) => true, + Match::None | Match::Ignore(_) => false, + } + } + + /// Inverts the match so that `Ignore` becomes `Whitelist` and + /// `Whitelist` becomes `Ignore`. A non-match remains the same. + pub fn invert(self) -> Match<T> { + match self { + Match::None => Match::None, + Match::Ignore(t) => Match::Whitelist(t), + Match::Whitelist(t) => Match::Ignore(t), + } + } + + /// Return the value inside this match if it exists. + pub fn inner(&self) -> Option<&T> { + match *self { + Match::None => None, + Match::Ignore(ref t) => Some(t), + Match::Whitelist(ref t) => Some(t), + } + } + + /// Apply the given function to the value inside this match. + /// + /// If the match has no value, then return the match unchanged. + pub fn map<U, F: FnOnce(T) -> U>(self, f: F) -> Match<U> { + match self { + Match::None => Match::None, + Match::Ignore(t) => Match::Ignore(f(t)), + Match::Whitelist(t) => Match::Whitelist(f(t)), + } + } + + /// Return the match if it is not none. Otherwise, return other. + pub fn or(self, other: Self) -> Self { + if self.is_none() { + other + } else { + self + } + } +} + +#[cfg(test)] +mod tests { + use std::env; + use std::error; + use std::fs; + use std::path::{Path, PathBuf}; + use std::result; + + /// A convenient result type alias. + pub type Result<T> = + result::Result<T, Box<dyn error::Error + Send + Sync>>; + + macro_rules! err { + ($($tt:tt)*) => { + Box::<dyn error::Error + Send + Sync>::from(format!($($tt)*)) + } + } + + /// A simple wrapper for creating a temporary directory that is + /// automatically deleted when it's dropped. + /// + /// We use this in lieu of tempfile because tempfile brings in too many + /// dependencies. + #[derive(Debug)] + pub struct TempDir(PathBuf); + + impl Drop for TempDir { + fn drop(&mut self) { + fs::remove_dir_all(&self.0).unwrap(); + } + } + + impl TempDir { + /// Create a new empty temporary directory under the system's configured + /// temporary directory. + pub fn new() -> Result<TempDir> { + use std::sync::atomic::{AtomicUsize, Ordering}; + + static TRIES: usize = 100; + static COUNTER: AtomicUsize = AtomicUsize::new(0); + + let tmpdir = env::temp_dir(); + for _ in 0..TRIES { + let count = COUNTER.fetch_add(1, Ordering::SeqCst); + let path = tmpdir.join("rust-ignore").join(count.to_string()); + if path.is_dir() { + continue; + } + fs::create_dir_all(&path).map_err(|e| { + err!("failed to create {}: {}", path.display(), e) + })?; + return Ok(TempDir(path)); + } + Err(err!("failed to create temp dir after {} tries", TRIES)) + } + + /// Return the underlying path to this temporary directory. + pub fn path(&self) -> &Path { + &self.0 + } + } +} diff --git a/vendor/ignore/src/overrides.rs b/vendor/ignore/src/overrides.rs new file mode 100644 index 0000000..e96b8e0 --- /dev/null +++ b/vendor/ignore/src/overrides.rs @@ -0,0 +1,263 @@ +/*! +The overrides module provides a way to specify a set of override globs. +This provides functionality similar to `--include` or `--exclude` in command +line tools. +*/ + +use std::path::Path; + +use crate::gitignore::{self, Gitignore, GitignoreBuilder}; +use crate::{Error, Match}; + +/// Glob represents a single glob in an override matcher. +/// +/// This is used to report information about the highest precedent glob +/// that matched. +/// +/// Note that not all matches necessarily correspond to a specific glob. For +/// example, if there are one or more whitelist globs and a file path doesn't +/// match any glob in the set, then the file path is considered to be ignored. +/// +/// The lifetime `'a` refers to the lifetime of the matcher that produced +/// this glob. +#[derive(Clone, Debug)] +pub struct Glob<'a>(GlobInner<'a>); + +#[derive(Clone, Debug)] +enum GlobInner<'a> { + /// No glob matched, but the file path should still be ignored. + UnmatchedIgnore, + /// A glob matched. + Matched(&'a gitignore::Glob), +} + +impl<'a> Glob<'a> { + fn unmatched() -> Glob<'a> { + Glob(GlobInner::UnmatchedIgnore) + } +} + +/// Manages a set of overrides provided explicitly by the end user. +#[derive(Clone, Debug)] +pub struct Override(Gitignore); + +impl Override { + /// Returns an empty matcher that never matches any file path. + pub fn empty() -> Override { + Override(Gitignore::empty()) + } + + /// Returns the directory of this override set. + /// + /// All matches are done relative to this path. + pub fn path(&self) -> &Path { + self.0.path() + } + + /// Returns true if and only if this matcher is empty. + /// + /// When a matcher is empty, it will never match any file path. + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Returns the total number of ignore globs. + pub fn num_ignores(&self) -> u64 { + self.0.num_whitelists() + } + + /// Returns the total number of whitelisted globs. + pub fn num_whitelists(&self) -> u64 { + self.0.num_ignores() + } + + /// Returns whether the given file path matched a pattern in this override + /// matcher. + /// + /// `is_dir` should be true if the path refers to a directory and false + /// otherwise. + /// + /// If there are no overrides, then this always returns `Match::None`. + /// + /// If there is at least one whitelist override and `is_dir` is false, then + /// this never returns `Match::None`, since non-matches are interpreted as + /// ignored. + /// + /// The given path is matched to the globs relative to the path given + /// when building the override matcher. Specifically, before matching + /// `path`, its prefix (as determined by a common suffix of the directory + /// given) is stripped. If there is no common suffix/prefix overlap, then + /// `path` is assumed to reside in the same directory as the root path for + /// this set of overrides. + pub fn matched<'a, P: AsRef<Path>>( + &'a self, + path: P, + is_dir: bool, + ) -> Match<Glob<'a>> { + if self.is_empty() { + return Match::None; + } + let mat = self.0.matched(path, is_dir).invert(); + if mat.is_none() && self.num_whitelists() > 0 && !is_dir { + return Match::Ignore(Glob::unmatched()); + } + mat.map(move |giglob| Glob(GlobInner::Matched(giglob))) + } +} + +/// Builds a matcher for a set of glob overrides. +#[derive(Clone, Debug)] +pub struct OverrideBuilder { + builder: GitignoreBuilder, +} + +impl OverrideBuilder { + /// Create a new override builder. + /// + /// Matching is done relative to the directory path provided. + pub fn new<P: AsRef<Path>>(path: P) -> OverrideBuilder { + OverrideBuilder { builder: GitignoreBuilder::new(path) } + } + + /// Builds a new override matcher from the globs added so far. + /// + /// Once a matcher is built, no new globs can be added to it. + pub fn build(&self) -> Result<Override, Error> { + Ok(Override(self.builder.build()?)) + } + + /// Add a glob to the set of overrides. + /// + /// Globs provided here have precisely the same semantics as a single + /// line in a `gitignore` file, where the meaning of `!` is inverted: + /// namely, `!` at the beginning of a glob will ignore a file. Without `!`, + /// all matches of the glob provided are treated as whitelist matches. + pub fn add(&mut self, glob: &str) -> Result<&mut OverrideBuilder, Error> { + self.builder.add_line(None, glob)?; + Ok(self) + } + + /// Toggle whether the globs should be matched case insensitively or not. + /// + /// When this option is changed, only globs added after the change will be affected. + /// + /// This is disabled by default. + pub fn case_insensitive( + &mut self, + yes: bool, + ) -> Result<&mut OverrideBuilder, Error> { + // TODO: This should not return a `Result`. Fix this in the next semver + // release. + self.builder.case_insensitive(yes)?; + Ok(self) + } +} + +#[cfg(test)] +mod tests { + use super::{Override, OverrideBuilder}; + + const ROOT: &'static str = "/home/andrew/foo"; + + fn ov(globs: &[&str]) -> Override { + let mut builder = OverrideBuilder::new(ROOT); + for glob in globs { + builder.add(glob).unwrap(); + } + builder.build().unwrap() + } + + #[test] + fn empty() { + let ov = ov(&[]); + assert!(ov.matched("a.foo", false).is_none()); + assert!(ov.matched("a", false).is_none()); + assert!(ov.matched("", false).is_none()); + } + + #[test] + fn simple() { + let ov = ov(&["*.foo", "!*.bar"]); + assert!(ov.matched("a.foo", false).is_whitelist()); + assert!(ov.matched("a.foo", true).is_whitelist()); + assert!(ov.matched("a.rs", false).is_ignore()); + assert!(ov.matched("a.rs", true).is_none()); + assert!(ov.matched("a.bar", false).is_ignore()); + assert!(ov.matched("a.bar", true).is_ignore()); + } + + #[test] + fn only_ignores() { + let ov = ov(&["!*.bar"]); + assert!(ov.matched("a.rs", false).is_none()); + assert!(ov.matched("a.rs", true).is_none()); + assert!(ov.matched("a.bar", false).is_ignore()); + assert!(ov.matched("a.bar", true).is_ignore()); + } + + #[test] + fn precedence() { + let ov = ov(&["*.foo", "!*.bar.foo"]); + assert!(ov.matched("a.foo", false).is_whitelist()); + assert!(ov.matched("a.baz", false).is_ignore()); + assert!(ov.matched("a.bar.foo", false).is_ignore()); + } + + #[test] + fn gitignore() { + let ov = ov(&["/foo", "bar/*.rs", "baz/**"]); + assert!(ov.matched("bar/lib.rs", false).is_whitelist()); + assert!(ov.matched("bar/wat/lib.rs", false).is_ignore()); + assert!(ov.matched("wat/bar/lib.rs", false).is_ignore()); + assert!(ov.matched("foo", false).is_whitelist()); + assert!(ov.matched("wat/foo", false).is_ignore()); + assert!(ov.matched("baz", false).is_ignore()); + assert!(ov.matched("baz/a", false).is_whitelist()); + assert!(ov.matched("baz/a/b", false).is_whitelist()); + } + + #[test] + fn allow_directories() { + // This tests that directories are NOT ignored when they are unmatched. + let ov = ov(&["*.rs"]); + assert!(ov.matched("foo.rs", false).is_whitelist()); + assert!(ov.matched("foo.c", false).is_ignore()); + assert!(ov.matched("foo", false).is_ignore()); + assert!(ov.matched("foo", true).is_none()); + assert!(ov.matched("src/foo.rs", false).is_whitelist()); + assert!(ov.matched("src/foo.c", false).is_ignore()); + assert!(ov.matched("src/foo", false).is_ignore()); + assert!(ov.matched("src/foo", true).is_none()); + } + + #[test] + fn absolute_path() { + let ov = ov(&["!/bar"]); + assert!(ov.matched("./foo/bar", false).is_none()); + } + + #[test] + fn case_insensitive() { + let ov = OverrideBuilder::new(ROOT) + .case_insensitive(true) + .unwrap() + .add("*.html") + .unwrap() + .build() + .unwrap(); + assert!(ov.matched("foo.html", false).is_whitelist()); + assert!(ov.matched("foo.HTML", false).is_whitelist()); + assert!(ov.matched("foo.htm", false).is_ignore()); + assert!(ov.matched("foo.HTM", false).is_ignore()); + } + + #[test] + fn default_case_sensitive() { + let ov = + OverrideBuilder::new(ROOT).add("*.html").unwrap().build().unwrap(); + assert!(ov.matched("foo.html", false).is_whitelist()); + assert!(ov.matched("foo.HTML", false).is_ignore()); + assert!(ov.matched("foo.htm", false).is_ignore()); + assert!(ov.matched("foo.HTM", false).is_ignore()); + } +} diff --git a/vendor/ignore/src/pathutil.rs b/vendor/ignore/src/pathutil.rs new file mode 100644 index 0000000..f21b4f5 --- /dev/null +++ b/vendor/ignore/src/pathutil.rs @@ -0,0 +1,142 @@ +use std::ffi::OsStr; +use std::path::Path; + +use crate::walk::DirEntry; + +/// Returns true if and only if this entry is considered to be hidden. +/// +/// This only returns true if the base name of the path starts with a `.`. +/// +/// On Unix, this implements a more optimized check. +#[cfg(unix)] +pub fn is_hidden(dent: &DirEntry) -> bool { + use std::os::unix::ffi::OsStrExt; + + if let Some(name) = file_name(dent.path()) { + name.as_bytes().get(0) == Some(&b'.') + } else { + false + } +} + +/// Returns true if and only if this entry is considered to be hidden. +/// +/// On Windows, this returns true if one of the following is true: +/// +/// * The base name of the path starts with a `.`. +/// * The file attributes have the `HIDDEN` property set. +#[cfg(windows)] +pub fn is_hidden(dent: &DirEntry) -> bool { + use std::os::windows::fs::MetadataExt; + use winapi_util::file; + + // This looks like we're doing an extra stat call, but on Windows, the + // directory traverser reuses the metadata retrieved from each directory + // entry and stores it on the DirEntry itself. So this is "free." + if let Ok(md) = dent.metadata() { + if file::is_hidden(md.file_attributes() as u64) { + return true; + } + } + if let Some(name) = file_name(dent.path()) { + name.to_str().map(|s| s.starts_with(".")).unwrap_or(false) + } else { + false + } +} + +/// Returns true if and only if this entry is considered to be hidden. +/// +/// This only returns true if the base name of the path starts with a `.`. +#[cfg(not(any(unix, windows)))] +pub fn is_hidden(dent: &DirEntry) -> bool { + if let Some(name) = file_name(dent.path()) { + name.to_str().map(|s| s.starts_with(".")).unwrap_or(false) + } else { + false + } +} + +/// Strip `prefix` from the `path` and return the remainder. +/// +/// If `path` doesn't have a prefix `prefix`, then return `None`. +#[cfg(unix)] +pub fn strip_prefix<'a, P: AsRef<Path> + ?Sized>( + prefix: &'a P, + path: &'a Path, +) -> Option<&'a Path> { + use std::os::unix::ffi::OsStrExt; + + let prefix = prefix.as_ref().as_os_str().as_bytes(); + let path = path.as_os_str().as_bytes(); + if prefix.len() > path.len() || prefix != &path[0..prefix.len()] { + None + } else { + Some(&Path::new(OsStr::from_bytes(&path[prefix.len()..]))) + } +} + +/// Strip `prefix` from the `path` and return the remainder. +/// +/// If `path` doesn't have a prefix `prefix`, then return `None`. +#[cfg(not(unix))] +pub fn strip_prefix<'a, P: AsRef<Path> + ?Sized>( + prefix: &'a P, + path: &'a Path, +) -> Option<&'a Path> { + path.strip_prefix(prefix).ok() +} + +/// Returns true if this file path is just a file name. i.e., Its parent is +/// the empty string. +#[cfg(unix)] +pub fn is_file_name<P: AsRef<Path>>(path: P) -> bool { + use memchr::memchr; + use std::os::unix::ffi::OsStrExt; + + let path = path.as_ref().as_os_str().as_bytes(); + memchr(b'/', path).is_none() +} + +/// Returns true if this file path is just a file name. i.e., Its parent is +/// the empty string. +#[cfg(not(unix))] +pub fn is_file_name<P: AsRef<Path>>(path: P) -> bool { + path.as_ref().parent().map(|p| p.as_os_str().is_empty()).unwrap_or(false) +} + +/// The final component of the path, if it is a normal file. +/// +/// If the path terminates in ., .., or consists solely of a root of prefix, +/// file_name will return None. +#[cfg(unix)] +pub fn file_name<'a, P: AsRef<Path> + ?Sized>( + path: &'a P, +) -> Option<&'a OsStr> { + use memchr::memrchr; + use std::os::unix::ffi::OsStrExt; + + let path = path.as_ref().as_os_str().as_bytes(); + if path.is_empty() { + return None; + } else if path.len() == 1 && path[0] == b'.' { + return None; + } else if path.last() == Some(&b'.') { + return None; + } else if path.len() >= 2 && &path[path.len() - 2..] == &b".."[..] { + return None; + } + let last_slash = memrchr(b'/', path).map(|i| i + 1).unwrap_or(0); + Some(OsStr::from_bytes(&path[last_slash..])) +} + +/// The final component of the path, if it is a normal file. +/// +/// If the path terminates in ., .., or consists solely of a root of prefix, +/// file_name will return None. +#[cfg(not(unix))] +pub fn file_name<'a, P: AsRef<Path> + ?Sized>( + path: &'a P, +) -> Option<&'a OsStr> { + path.as_ref().file_name() +} diff --git a/vendor/ignore/src/types.rs b/vendor/ignore/src/types.rs new file mode 100644 index 0000000..616a8d2 --- /dev/null +++ b/vendor/ignore/src/types.rs @@ -0,0 +1,583 @@ +/*! +The types module provides a way of associating globs on file names to file +types. + +This can be used to match specific types of files. For example, among +the default file types provided, the Rust file type is defined to be `*.rs` +with name `rust`. Similarly, the C file type is defined to be `*.{c,h}` with +name `c`. + +Note that the set of default types may change over time. + +# Example + +This shows how to create and use a simple file type matcher using the default +file types defined in this crate. + +``` +use ignore::types::TypesBuilder; + +let mut builder = TypesBuilder::new(); +builder.add_defaults(); +builder.select("rust"); +let matcher = builder.build().unwrap(); + +assert!(matcher.matched("foo.rs", false).is_whitelist()); +assert!(matcher.matched("foo.c", false).is_ignore()); +``` + +# Example: negation + +This is like the previous example, but shows how negating a file type works. +That is, this will let us match file paths that *don't* correspond to a +particular file type. + +``` +use ignore::types::TypesBuilder; + +let mut builder = TypesBuilder::new(); +builder.add_defaults(); +builder.negate("c"); +let matcher = builder.build().unwrap(); + +assert!(matcher.matched("foo.rs", false).is_none()); +assert!(matcher.matched("foo.c", false).is_ignore()); +``` + +# Example: custom file type definitions + +This shows how to extend this library default file type definitions with +your own. + +``` +use ignore::types::TypesBuilder; + +let mut builder = TypesBuilder::new(); +builder.add_defaults(); +builder.add("foo", "*.foo"); +// Another way of adding a file type definition. +// This is useful when accepting input from an end user. +builder.add_def("bar:*.bar"); +// Note: we only select `foo`, not `bar`. +builder.select("foo"); +let matcher = builder.build().unwrap(); + +assert!(matcher.matched("x.foo", false).is_whitelist()); +// This is ignored because we only selected the `foo` file type. +assert!(matcher.matched("x.bar", false).is_ignore()); +``` + +We can also add file type definitions based on other definitions. + +``` +use ignore::types::TypesBuilder; + +let mut builder = TypesBuilder::new(); +builder.add_defaults(); +builder.add("foo", "*.foo"); +builder.add_def("bar:include:foo,cpp"); +builder.select("bar"); +let matcher = builder.build().unwrap(); + +assert!(matcher.matched("x.foo", false).is_whitelist()); +assert!(matcher.matched("y.cpp", false).is_whitelist()); +``` +*/ + +use std::cell::RefCell; +use std::collections::HashMap; +use std::path::Path; +use std::sync::Arc; + +use globset::{GlobBuilder, GlobSet, GlobSetBuilder}; +use regex::Regex; +use thread_local::ThreadLocal; + +use crate::default_types::DEFAULT_TYPES; +use crate::pathutil::file_name; +use crate::{Error, Match}; + +/// Glob represents a single glob in a set of file type definitions. +/// +/// There may be more than one glob for a particular file type. +/// +/// This is used to report information about the highest precedent glob +/// that matched. +/// +/// Note that not all matches necessarily correspond to a specific glob. +/// For example, if there are one or more selections and a file path doesn't +/// match any of those selections, then the file path is considered to be +/// ignored. +/// +/// The lifetime `'a` refers to the lifetime of the underlying file type +/// definition, which corresponds to the lifetime of the file type matcher. +#[derive(Clone, Debug)] +pub struct Glob<'a>(GlobInner<'a>); + +#[derive(Clone, Debug)] +enum GlobInner<'a> { + /// No glob matched, but the file path should still be ignored. + UnmatchedIgnore, + /// A glob matched. + Matched { + /// The file type definition which provided the glob. + def: &'a FileTypeDef, + }, +} + +impl<'a> Glob<'a> { + fn unmatched() -> Glob<'a> { + Glob(GlobInner::UnmatchedIgnore) + } + + /// Return the file type definition that matched, if one exists. A file type + /// definition always exists when a specific definition matches a file + /// path. + pub fn file_type_def(&self) -> Option<&FileTypeDef> { + match self { + Glob(GlobInner::UnmatchedIgnore) => None, + Glob(GlobInner::Matched { def, .. }) => Some(def), + } + } +} + +/// A single file type definition. +/// +/// File type definitions can be retrieved in aggregate from a file type +/// matcher. File type definitions are also reported when its responsible +/// for a match. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct FileTypeDef { + name: String, + globs: Vec<String>, +} + +impl FileTypeDef { + /// Return the name of this file type. + pub fn name(&self) -> &str { + &self.name + } + + /// Return the globs used to recognize this file type. + pub fn globs(&self) -> &[String] { + &self.globs + } +} + +/// Types is a file type matcher. +#[derive(Clone, Debug)] +pub struct Types { + /// All of the file type definitions, sorted lexicographically by name. + defs: Vec<FileTypeDef>, + /// All of the selections made by the user. + selections: Vec<Selection<FileTypeDef>>, + /// Whether there is at least one Selection::Select in our selections. + /// When this is true, a Match::None is converted to Match::Ignore. + has_selected: bool, + /// A mapping from glob index in the set to two indices. The first is an + /// index into `selections` and the second is an index into the + /// corresponding file type definition's list of globs. + glob_to_selection: Vec<(usize, usize)>, + /// The set of all glob selections, used for actual matching. + set: GlobSet, + /// Temporary storage for globs that match. + matches: Arc<ThreadLocal<RefCell<Vec<usize>>>>, +} + +/// Indicates the type of a selection for a particular file type. +#[derive(Clone, Debug)] +enum Selection<T> { + Select(String, T), + Negate(String, T), +} + +impl<T> Selection<T> { + fn is_negated(&self) -> bool { + match *self { + Selection::Select(..) => false, + Selection::Negate(..) => true, + } + } + + fn name(&self) -> &str { + match *self { + Selection::Select(ref name, _) => name, + Selection::Negate(ref name, _) => name, + } + } + + fn map<U, F: FnOnce(T) -> U>(self, f: F) -> Selection<U> { + match self { + Selection::Select(name, inner) => { + Selection::Select(name, f(inner)) + } + Selection::Negate(name, inner) => { + Selection::Negate(name, f(inner)) + } + } + } + + fn inner(&self) -> &T { + match *self { + Selection::Select(_, ref inner) => inner, + Selection::Negate(_, ref inner) => inner, + } + } +} + +impl Types { + /// Creates a new file type matcher that never matches any path and + /// contains no file type definitions. + pub fn empty() -> Types { + Types { + defs: vec![], + selections: vec![], + has_selected: false, + glob_to_selection: vec![], + set: GlobSetBuilder::new().build().unwrap(), + matches: Arc::new(ThreadLocal::default()), + } + } + + /// Returns true if and only if this matcher has zero selections. + pub fn is_empty(&self) -> bool { + self.selections.is_empty() + } + + /// Returns the number of selections used in this matcher. + pub fn len(&self) -> usize { + self.selections.len() + } + + /// Return the set of current file type definitions. + /// + /// Definitions and globs are sorted. + pub fn definitions(&self) -> &[FileTypeDef] { + &self.defs + } + + /// Returns a match for the given path against this file type matcher. + /// + /// The path is considered whitelisted if it matches a selected file type. + /// The path is considered ignored if it matches a negated file type. + /// If at least one file type is selected and `path` doesn't match, then + /// the path is also considered ignored. + pub fn matched<'a, P: AsRef<Path>>( + &'a self, + path: P, + is_dir: bool, + ) -> Match<Glob<'a>> { + // File types don't apply to directories, and we can't do anything + // if our glob set is empty. + if is_dir || self.set.is_empty() { + return Match::None; + } + // We only want to match against the file name, so extract it. + // If one doesn't exist, then we can't match it. + let name = match file_name(path.as_ref()) { + Some(name) => name, + None if self.has_selected => { + return Match::Ignore(Glob::unmatched()); + } + None => { + return Match::None; + } + }; + let mut matches = self.matches.get_or_default().borrow_mut(); + self.set.matches_into(name, &mut *matches); + // The highest precedent match is the last one. + if let Some(&i) = matches.last() { + let (isel, _) = self.glob_to_selection[i]; + let sel = &self.selections[isel]; + let glob = Glob(GlobInner::Matched { def: sel.inner() }); + return if sel.is_negated() { + Match::Ignore(glob) + } else { + Match::Whitelist(glob) + }; + } + if self.has_selected { + Match::Ignore(Glob::unmatched()) + } else { + Match::None + } + } +} + +/// TypesBuilder builds a type matcher from a set of file type definitions and +/// a set of file type selections. +pub struct TypesBuilder { + types: HashMap<String, FileTypeDef>, + selections: Vec<Selection<()>>, +} + +impl TypesBuilder { + /// Create a new builder for a file type matcher. + /// + /// The builder contains *no* type definitions to start with. A set + /// of default type definitions can be added with `add_defaults`, and + /// additional type definitions can be added with `select` and `negate`. + pub fn new() -> TypesBuilder { + TypesBuilder { types: HashMap::new(), selections: vec![] } + } + + /// Build the current set of file type definitions *and* selections into + /// a file type matcher. + pub fn build(&self) -> Result<Types, Error> { + let defs = self.definitions(); + let has_selected = self.selections.iter().any(|s| !s.is_negated()); + + let mut selections = vec![]; + let mut glob_to_selection = vec![]; + let mut build_set = GlobSetBuilder::new(); + for (isel, selection) in self.selections.iter().enumerate() { + let def = match self.types.get(selection.name()) { + Some(def) => def.clone(), + None => { + let name = selection.name().to_string(); + return Err(Error::UnrecognizedFileType(name)); + } + }; + for (iglob, glob) in def.globs.iter().enumerate() { + build_set.add( + GlobBuilder::new(glob) + .literal_separator(true) + .build() + .map_err(|err| Error::Glob { + glob: Some(glob.to_string()), + err: err.kind().to_string(), + })?, + ); + glob_to_selection.push((isel, iglob)); + } + selections.push(selection.clone().map(move |_| def)); + } + let set = build_set + .build() + .map_err(|err| Error::Glob { glob: None, err: err.to_string() })?; + Ok(Types { + defs: defs, + selections: selections, + has_selected: has_selected, + glob_to_selection: glob_to_selection, + set: set, + matches: Arc::new(ThreadLocal::default()), + }) + } + + /// Return the set of current file type definitions. + /// + /// Definitions and globs are sorted. + pub fn definitions(&self) -> Vec<FileTypeDef> { + let mut defs = vec![]; + for def in self.types.values() { + let mut def = def.clone(); + def.globs.sort(); + defs.push(def); + } + defs.sort_by(|def1, def2| def1.name().cmp(def2.name())); + defs + } + + /// Select the file type given by `name`. + /// + /// If `name` is `all`, then all file types currently defined are selected. + pub fn select(&mut self, name: &str) -> &mut TypesBuilder { + if name == "all" { + for name in self.types.keys() { + self.selections.push(Selection::Select(name.to_string(), ())); + } + } else { + self.selections.push(Selection::Select(name.to_string(), ())); + } + self + } + + /// Ignore the file type given by `name`. + /// + /// If `name` is `all`, then all file types currently defined are negated. + pub fn negate(&mut self, name: &str) -> &mut TypesBuilder { + if name == "all" { + for name in self.types.keys() { + self.selections.push(Selection::Negate(name.to_string(), ())); + } + } else { + self.selections.push(Selection::Negate(name.to_string(), ())); + } + self + } + + /// Clear any file type definitions for the type name given. + pub fn clear(&mut self, name: &str) -> &mut TypesBuilder { + self.types.remove(name); + self + } + + /// Add a new file type definition. `name` can be arbitrary and `pat` + /// should be a glob recognizing file paths belonging to the `name` type. + /// + /// If `name` is `all` or otherwise contains any character that is not a + /// Unicode letter or number, then an error is returned. + pub fn add(&mut self, name: &str, glob: &str) -> Result<(), Error> { + lazy_static::lazy_static! { + static ref RE: Regex = Regex::new(r"^[\pL\pN]+$").unwrap(); + }; + if name == "all" || !RE.is_match(name) { + return Err(Error::InvalidDefinition); + } + let (key, glob) = (name.to_string(), glob.to_string()); + self.types + .entry(key) + .or_insert_with(|| FileTypeDef { + name: name.to_string(), + globs: vec![], + }) + .globs + .push(glob); + Ok(()) + } + + /// Add a new file type definition specified in string form. There are two + /// valid formats: + /// 1. `{name}:{glob}`. This defines a 'root' definition that associates the + /// given name with the given glob. + /// 2. `{name}:include:{comma-separated list of already defined names}. + /// This defines an 'include' definition that associates the given name + /// with the definitions of the given existing types. + /// Names may not include any characters that are not + /// Unicode letters or numbers. + pub fn add_def(&mut self, def: &str) -> Result<(), Error> { + let parts: Vec<&str> = def.split(':').collect(); + match parts.len() { + 2 => { + let name = parts[0]; + let glob = parts[1]; + if name.is_empty() || glob.is_empty() { + return Err(Error::InvalidDefinition); + } + self.add(name, glob) + } + 3 => { + let name = parts[0]; + let types_string = parts[2]; + if name.is_empty() + || parts[1] != "include" + || types_string.is_empty() + { + return Err(Error::InvalidDefinition); + } + let types = types_string.split(','); + // Check ahead of time to ensure that all types specified are + // present and fail fast if not. + if types.clone().any(|t| !self.types.contains_key(t)) { + return Err(Error::InvalidDefinition); + } + for type_name in types { + let globs = + self.types.get(type_name).unwrap().globs.clone(); + for glob in globs { + self.add(name, &glob)?; + } + } + Ok(()) + } + _ => Err(Error::InvalidDefinition), + } + } + + /// Add a set of default file type definitions. + pub fn add_defaults(&mut self) -> &mut TypesBuilder { + static MSG: &'static str = "adding a default type should never fail"; + for &(name, exts) in DEFAULT_TYPES { + for ext in exts { + self.add(name, ext).expect(MSG); + } + } + self + } +} + +#[cfg(test)] +mod tests { + use super::TypesBuilder; + + macro_rules! matched { + ($name:ident, $types:expr, $sel:expr, $selnot:expr, + $path:expr) => { + matched!($name, $types, $sel, $selnot, $path, true); + }; + (not, $name:ident, $types:expr, $sel:expr, $selnot:expr, + $path:expr) => { + matched!($name, $types, $sel, $selnot, $path, false); + }; + ($name:ident, $types:expr, $sel:expr, $selnot:expr, + $path:expr, $matched:expr) => { + #[test] + fn $name() { + let mut btypes = TypesBuilder::new(); + for tydef in $types { + btypes.add_def(tydef).unwrap(); + } + for sel in $sel { + btypes.select(sel); + } + for selnot in $selnot { + btypes.negate(selnot); + } + let types = btypes.build().unwrap(); + let mat = types.matched($path, false); + assert_eq!($matched, !mat.is_ignore()); + } + }; + } + + fn types() -> Vec<&'static str> { + vec![ + "html:*.html", + "html:*.htm", + "rust:*.rs", + "js:*.js", + "foo:*.{rs,foo}", + "combo:include:html,rust", + ] + } + + matched!(match1, types(), vec!["rust"], vec![], "lib.rs"); + matched!(match2, types(), vec!["html"], vec![], "index.html"); + matched!(match3, types(), vec!["html"], vec![], "index.htm"); + matched!(match4, types(), vec!["html", "rust"], vec![], "main.rs"); + matched!(match5, types(), vec![], vec![], "index.html"); + matched!(match6, types(), vec![], vec!["rust"], "index.html"); + matched!(match7, types(), vec!["foo"], vec!["rust"], "main.foo"); + matched!(match8, types(), vec!["combo"], vec![], "index.html"); + matched!(match9, types(), vec!["combo"], vec![], "lib.rs"); + + matched!(not, matchnot1, types(), vec!["rust"], vec![], "index.html"); + matched!(not, matchnot2, types(), vec![], vec!["rust"], "main.rs"); + matched!(not, matchnot3, types(), vec!["foo"], vec!["rust"], "main.rs"); + matched!(not, matchnot4, types(), vec!["rust"], vec!["foo"], "main.rs"); + matched!(not, matchnot5, types(), vec!["rust"], vec!["foo"], "main.foo"); + matched!(not, matchnot6, types(), vec!["combo"], vec![], "leftpad.js"); + + #[test] + fn test_invalid_defs() { + let mut btypes = TypesBuilder::new(); + for tydef in types() { + btypes.add_def(tydef).unwrap(); + } + // Preserve the original definitions for later comparison. + let original_defs = btypes.definitions(); + let bad_defs = vec![ + // Reference to type that does not exist + "combo:include:html,python", + // Bad format + "combo:foobar:html,rust", + "", + ]; + for def in bad_defs { + assert!(btypes.add_def(def).is_err()); + // Ensure that nothing changed, even if some of the includes were valid. + assert_eq!(btypes.definitions(), original_defs); + } + } +} diff --git a/vendor/ignore/src/walk.rs b/vendor/ignore/src/walk.rs new file mode 100644 index 0000000..602faae --- /dev/null +++ b/vendor/ignore/src/walk.rs @@ -0,0 +1,2251 @@ +use std::cmp; +use std::ffi::OsStr; +use std::fmt; +use std::fs::{self, FileType, Metadata}; +use std::io; +use std::path::{Path, PathBuf}; +use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; +use std::sync::{Arc, Mutex}; +use std::thread; +use std::time::Duration; +use std::vec; + +use same_file::Handle; +use walkdir::{self, WalkDir}; + +use crate::dir::{Ignore, IgnoreBuilder}; +use crate::gitignore::GitignoreBuilder; +use crate::overrides::Override; +use crate::types::Types; +use crate::{Error, PartialErrorBuilder}; + +/// A directory entry with a possible error attached. +/// +/// The error typically refers to a problem parsing ignore files in a +/// particular directory. +#[derive(Clone, Debug)] +pub struct DirEntry { + dent: DirEntryInner, + err: Option<Error>, +} + +impl DirEntry { + /// The full path that this entry represents. + pub fn path(&self) -> &Path { + self.dent.path() + } + + /// The full path that this entry represents. + /// Analogous to [`path`], but moves ownership of the path. + /// + /// [`path`]: struct.DirEntry.html#method.path + pub fn into_path(self) -> PathBuf { + self.dent.into_path() + } + + /// Whether this entry corresponds to a symbolic link or not. + pub fn path_is_symlink(&self) -> bool { + self.dent.path_is_symlink() + } + + /// Returns true if and only if this entry corresponds to stdin. + /// + /// i.e., The entry has depth 0 and its file name is `-`. + pub fn is_stdin(&self) -> bool { + self.dent.is_stdin() + } + + /// Return the metadata for the file that this entry points to. + pub fn metadata(&self) -> Result<Metadata, Error> { + self.dent.metadata() + } + + /// Return the file type for the file that this entry points to. + /// + /// This entry doesn't have a file type if it corresponds to stdin. + pub fn file_type(&self) -> Option<FileType> { + self.dent.file_type() + } + + /// Return the file name of this entry. + /// + /// If this entry has no file name (e.g., `/`), then the full path is + /// returned. + pub fn file_name(&self) -> &OsStr { + self.dent.file_name() + } + + /// Returns the depth at which this entry was created relative to the root. + pub fn depth(&self) -> usize { + self.dent.depth() + } + + /// Returns the underlying inode number if one exists. + /// + /// If this entry doesn't have an inode number, then `None` is returned. + #[cfg(unix)] + pub fn ino(&self) -> Option<u64> { + self.dent.ino() + } + + /// Returns an error, if one exists, associated with processing this entry. + /// + /// An example of an error is one that occurred while parsing an ignore + /// file. Errors related to traversing a directory tree itself are reported + /// as part of yielding the directory entry, and not with this method. + pub fn error(&self) -> Option<&Error> { + self.err.as_ref() + } + + /// Returns true if and only if this entry points to a directory. + pub(crate) fn is_dir(&self) -> bool { + self.dent.is_dir() + } + + fn new_stdin() -> DirEntry { + DirEntry { dent: DirEntryInner::Stdin, err: None } + } + + fn new_walkdir(dent: walkdir::DirEntry, err: Option<Error>) -> DirEntry { + DirEntry { dent: DirEntryInner::Walkdir(dent), err: err } + } + + fn new_raw(dent: DirEntryRaw, err: Option<Error>) -> DirEntry { + DirEntry { dent: DirEntryInner::Raw(dent), err: err } + } +} + +/// DirEntryInner is the implementation of DirEntry. +/// +/// It specifically represents three distinct sources of directory entries: +/// +/// 1. From the walkdir crate. +/// 2. Special entries that represent things like stdin. +/// 3. From a path. +/// +/// Specifically, (3) has to essentially re-create the DirEntry implementation +/// from WalkDir. +#[derive(Clone, Debug)] +enum DirEntryInner { + Stdin, + Walkdir(walkdir::DirEntry), + Raw(DirEntryRaw), +} + +impl DirEntryInner { + fn path(&self) -> &Path { + use self::DirEntryInner::*; + match *self { + Stdin => Path::new("<stdin>"), + Walkdir(ref x) => x.path(), + Raw(ref x) => x.path(), + } + } + + fn into_path(self) -> PathBuf { + use self::DirEntryInner::*; + match self { + Stdin => PathBuf::from("<stdin>"), + Walkdir(x) => x.into_path(), + Raw(x) => x.into_path(), + } + } + + fn path_is_symlink(&self) -> bool { + use self::DirEntryInner::*; + match *self { + Stdin => false, + Walkdir(ref x) => x.path_is_symlink(), + Raw(ref x) => x.path_is_symlink(), + } + } + + fn is_stdin(&self) -> bool { + match *self { + DirEntryInner::Stdin => true, + _ => false, + } + } + + fn metadata(&self) -> Result<Metadata, Error> { + use self::DirEntryInner::*; + match *self { + Stdin => { + let err = Error::Io(io::Error::new( + io::ErrorKind::Other, + "<stdin> has no metadata", + )); + Err(err.with_path("<stdin>")) + } + Walkdir(ref x) => x.metadata().map_err(|err| { + Error::Io(io::Error::from(err)).with_path(x.path()) + }), + Raw(ref x) => x.metadata(), + } + } + + fn file_type(&self) -> Option<FileType> { + use self::DirEntryInner::*; + match *self { + Stdin => None, + Walkdir(ref x) => Some(x.file_type()), + Raw(ref x) => Some(x.file_type()), + } + } + + fn file_name(&self) -> &OsStr { + use self::DirEntryInner::*; + match *self { + Stdin => OsStr::new("<stdin>"), + Walkdir(ref x) => x.file_name(), + Raw(ref x) => x.file_name(), + } + } + + fn depth(&self) -> usize { + use self::DirEntryInner::*; + match *self { + Stdin => 0, + Walkdir(ref x) => x.depth(), + Raw(ref x) => x.depth(), + } + } + + #[cfg(unix)] + fn ino(&self) -> Option<u64> { + use self::DirEntryInner::*; + use walkdir::DirEntryExt; + match *self { + Stdin => None, + Walkdir(ref x) => Some(x.ino()), + Raw(ref x) => Some(x.ino()), + } + } + + /// Returns true if and only if this entry points to a directory. + fn is_dir(&self) -> bool { + self.file_type().map(|ft| ft.is_dir()).unwrap_or(false) + } +} + +/// DirEntryRaw is essentially copied from the walkdir crate so that we can +/// build `DirEntry`s from whole cloth in the parallel iterator. +#[derive(Clone)] +struct DirEntryRaw { + /// The path as reported by the `fs::ReadDir` iterator (even if it's a + /// symbolic link). + path: PathBuf, + /// The file type. Necessary for recursive iteration, so store it. + ty: FileType, + /// Is set when this entry was created from a symbolic link and the user + /// expects the iterator to follow symbolic links. + follow_link: bool, + /// The depth at which this entry was generated relative to the root. + depth: usize, + /// The underlying inode number (Unix only). + #[cfg(unix)] + ino: u64, + /// The underlying metadata (Windows only). We store this on Windows + /// because this comes for free while reading a directory. + #[cfg(windows)] + metadata: fs::Metadata, +} + +impl fmt::Debug for DirEntryRaw { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Leaving out FileType because it doesn't have a debug impl + // in Rust 1.9. We could add it if we really wanted to by manually + // querying each possibly file type. Meh. ---AG + f.debug_struct("DirEntryRaw") + .field("path", &self.path) + .field("follow_link", &self.follow_link) + .field("depth", &self.depth) + .finish() + } +} + +impl DirEntryRaw { + fn path(&self) -> &Path { + &self.path + } + + fn into_path(self) -> PathBuf { + self.path + } + + fn path_is_symlink(&self) -> bool { + self.ty.is_symlink() || self.follow_link + } + + fn metadata(&self) -> Result<Metadata, Error> { + self.metadata_internal() + } + + #[cfg(windows)] + fn metadata_internal(&self) -> Result<fs::Metadata, Error> { + if self.follow_link { + fs::metadata(&self.path) + } else { + Ok(self.metadata.clone()) + } + .map_err(|err| Error::Io(io::Error::from(err)).with_path(&self.path)) + } + + #[cfg(not(windows))] + fn metadata_internal(&self) -> Result<fs::Metadata, Error> { + if self.follow_link { + fs::metadata(&self.path) + } else { + fs::symlink_metadata(&self.path) + } + .map_err(|err| Error::Io(io::Error::from(err)).with_path(&self.path)) + } + + fn file_type(&self) -> FileType { + self.ty + } + + fn file_name(&self) -> &OsStr { + self.path.file_name().unwrap_or_else(|| self.path.as_os_str()) + } + + fn depth(&self) -> usize { + self.depth + } + + #[cfg(unix)] + fn ino(&self) -> u64 { + self.ino + } + + fn from_entry( + depth: usize, + ent: &fs::DirEntry, + ) -> Result<DirEntryRaw, Error> { + let ty = ent.file_type().map_err(|err| { + let err = Error::Io(io::Error::from(err)).with_path(ent.path()); + Error::WithDepth { depth: depth, err: Box::new(err) } + })?; + DirEntryRaw::from_entry_os(depth, ent, ty) + } + + #[cfg(windows)] + fn from_entry_os( + depth: usize, + ent: &fs::DirEntry, + ty: fs::FileType, + ) -> Result<DirEntryRaw, Error> { + let md = ent.metadata().map_err(|err| { + let err = Error::Io(io::Error::from(err)).with_path(ent.path()); + Error::WithDepth { depth: depth, err: Box::new(err) } + })?; + Ok(DirEntryRaw { + path: ent.path(), + ty: ty, + follow_link: false, + depth: depth, + metadata: md, + }) + } + + #[cfg(unix)] + fn from_entry_os( + depth: usize, + ent: &fs::DirEntry, + ty: fs::FileType, + ) -> Result<DirEntryRaw, Error> { + use std::os::unix::fs::DirEntryExt; + + Ok(DirEntryRaw { + path: ent.path(), + ty: ty, + follow_link: false, + depth: depth, + ino: ent.ino(), + }) + } + + // Placeholder implementation to allow compiling on non-standard platforms + // (e.g. wasm32). + #[cfg(not(any(windows, unix)))] + fn from_entry_os( + depth: usize, + ent: &fs::DirEntry, + ty: fs::FileType, + ) -> Result<DirEntryRaw, Error> { + Err(Error::Io(io::Error::new( + io::ErrorKind::Other, + "unsupported platform", + ))) + } + + #[cfg(windows)] + fn from_path( + depth: usize, + pb: PathBuf, + link: bool, + ) -> Result<DirEntryRaw, Error> { + let md = + fs::metadata(&pb).map_err(|err| Error::Io(err).with_path(&pb))?; + Ok(DirEntryRaw { + path: pb, + ty: md.file_type(), + follow_link: link, + depth: depth, + metadata: md, + }) + } + + #[cfg(unix)] + fn from_path( + depth: usize, + pb: PathBuf, + link: bool, + ) -> Result<DirEntryRaw, Error> { + use std::os::unix::fs::MetadataExt; + + let md = + fs::metadata(&pb).map_err(|err| Error::Io(err).with_path(&pb))?; + Ok(DirEntryRaw { + path: pb, + ty: md.file_type(), + follow_link: link, + depth: depth, + ino: md.ino(), + }) + } + + // Placeholder implementation to allow compiling on non-standard platforms + // (e.g. wasm32). + #[cfg(not(any(windows, unix)))] + fn from_path( + depth: usize, + pb: PathBuf, + link: bool, + ) -> Result<DirEntryRaw, Error> { + Err(Error::Io(io::Error::new( + io::ErrorKind::Other, + "unsupported platform", + ))) + } +} + +/// WalkBuilder builds a recursive directory iterator. +/// +/// The builder supports a large number of configurable options. This includes +/// specific glob overrides, file type matching, toggling whether hidden +/// files are ignored or not, and of course, support for respecting gitignore +/// files. +/// +/// By default, all ignore files found are respected. This includes `.ignore`, +/// `.gitignore`, `.git/info/exclude` and even your global gitignore +/// globs, usually found in `$XDG_CONFIG_HOME/git/ignore`. +/// +/// Some standard recursive directory options are also supported, such as +/// limiting the recursive depth or whether to follow symbolic links (disabled +/// by default). +/// +/// # Ignore rules +/// +/// There are many rules that influence whether a particular file or directory +/// is skipped by this iterator. Those rules are documented here. Note that +/// the rules assume a default configuration. +/// +/// * First, glob overrides are checked. If a path matches a glob override, +/// then matching stops. The path is then only skipped if the glob that matched +/// the path is an ignore glob. (An override glob is a whitelist glob unless it +/// starts with a `!`, in which case it is an ignore glob.) +/// * Second, ignore files are checked. Ignore files currently only come from +/// git ignore files (`.gitignore`, `.git/info/exclude` and the configured +/// global gitignore file), plain `.ignore` files, which have the same format +/// as gitignore files, or explicitly added ignore files. The precedence order +/// is: `.ignore`, `.gitignore`, `.git/info/exclude`, global gitignore and +/// finally explicitly added ignore files. Note that precedence between +/// different types of ignore files is not impacted by the directory hierarchy; +/// any `.ignore` file overrides all `.gitignore` files. Within each precedence +/// level, more nested ignore files have a higher precedence than less nested +/// ignore files. +/// * Third, if the previous step yields an ignore match, then all matching +/// is stopped and the path is skipped. If it yields a whitelist match, then +/// matching continues. A whitelist match can be overridden by a later matcher. +/// * Fourth, unless the path is a directory, the file type matcher is run on +/// the path. As above, if it yields an ignore match, then all matching is +/// stopped and the path is skipped. If it yields a whitelist match, then +/// matching continues. +/// * Fifth, if the path hasn't been whitelisted and it is hidden, then the +/// path is skipped. +/// * Sixth, unless the path is a directory, the size of the file is compared +/// against the max filesize limit. If it exceeds the limit, it is skipped. +/// * Seventh, if the path has made it this far then it is yielded in the +/// iterator. +#[derive(Clone)] +pub struct WalkBuilder { + paths: Vec<PathBuf>, + ig_builder: IgnoreBuilder, + max_depth: Option<usize>, + max_filesize: Option<u64>, + follow_links: bool, + same_file_system: bool, + sorter: Option<Sorter>, + threads: usize, + skip: Option<Arc<Handle>>, + filter: Option<Filter>, +} + +#[derive(Clone)] +enum Sorter { + ByName( + Arc<dyn Fn(&OsStr, &OsStr) -> cmp::Ordering + Send + Sync + 'static>, + ), + ByPath(Arc<dyn Fn(&Path, &Path) -> cmp::Ordering + Send + Sync + 'static>), +} + +#[derive(Clone)] +struct Filter(Arc<dyn Fn(&DirEntry) -> bool + Send + Sync + 'static>); + +impl fmt::Debug for WalkBuilder { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("WalkBuilder") + .field("paths", &self.paths) + .field("ig_builder", &self.ig_builder) + .field("max_depth", &self.max_depth) + .field("max_filesize", &self.max_filesize) + .field("follow_links", &self.follow_links) + .field("threads", &self.threads) + .field("skip", &self.skip) + .finish() + } +} + +impl WalkBuilder { + /// Create a new builder for a recursive directory iterator for the + /// directory given. + /// + /// Note that if you want to traverse multiple different directories, it + /// is better to call `add` on this builder than to create multiple + /// `Walk` values. + pub fn new<P: AsRef<Path>>(path: P) -> WalkBuilder { + WalkBuilder { + paths: vec![path.as_ref().to_path_buf()], + ig_builder: IgnoreBuilder::new(), + max_depth: None, + max_filesize: None, + follow_links: false, + same_file_system: false, + sorter: None, + threads: 0, + skip: None, + filter: None, + } + } + + /// Build a new `Walk` iterator. + pub fn build(&self) -> Walk { + let follow_links = self.follow_links; + let max_depth = self.max_depth; + let sorter = self.sorter.clone(); + let its = self + .paths + .iter() + .map(move |p| { + if p == Path::new("-") { + (p.to_path_buf(), None) + } else { + let mut wd = WalkDir::new(p); + wd = wd.follow_links(follow_links || p.is_file()); + wd = wd.same_file_system(self.same_file_system); + if let Some(max_depth) = max_depth { + wd = wd.max_depth(max_depth); + } + if let Some(ref sorter) = sorter { + match sorter.clone() { + Sorter::ByName(cmp) => { + wd = wd.sort_by(move |a, b| { + cmp(a.file_name(), b.file_name()) + }); + } + Sorter::ByPath(cmp) => { + wd = wd.sort_by(move |a, b| { + cmp(a.path(), b.path()) + }); + } + } + } + (p.to_path_buf(), Some(WalkEventIter::from(wd))) + } + }) + .collect::<Vec<_>>() + .into_iter(); + let ig_root = self.ig_builder.build(); + Walk { + its: its, + it: None, + ig_root: ig_root.clone(), + ig: ig_root.clone(), + max_filesize: self.max_filesize, + skip: self.skip.clone(), + filter: self.filter.clone(), + } + } + + /// Build a new `WalkParallel` iterator. + /// + /// Note that this *doesn't* return something that implements `Iterator`. + /// Instead, the returned value must be run with a closure. e.g., + /// `builder.build_parallel().run(|| |path| println!("{:?}", path))`. + pub fn build_parallel(&self) -> WalkParallel { + WalkParallel { + paths: self.paths.clone().into_iter(), + ig_root: self.ig_builder.build(), + max_depth: self.max_depth, + max_filesize: self.max_filesize, + follow_links: self.follow_links, + same_file_system: self.same_file_system, + threads: self.threads, + skip: self.skip.clone(), + filter: self.filter.clone(), + } + } + + /// Add a file path to the iterator. + /// + /// Each additional file path added is traversed recursively. This should + /// be preferred over building multiple `Walk` iterators since this + /// enables reusing resources across iteration. + pub fn add<P: AsRef<Path>>(&mut self, path: P) -> &mut WalkBuilder { + self.paths.push(path.as_ref().to_path_buf()); + self + } + + /// The maximum depth to recurse. + /// + /// The default, `None`, imposes no depth restriction. + pub fn max_depth(&mut self, depth: Option<usize>) -> &mut WalkBuilder { + self.max_depth = depth; + self + } + + /// Whether to follow symbolic links or not. + pub fn follow_links(&mut self, yes: bool) -> &mut WalkBuilder { + self.follow_links = yes; + self + } + + /// Whether to ignore files above the specified limit. + pub fn max_filesize(&mut self, filesize: Option<u64>) -> &mut WalkBuilder { + self.max_filesize = filesize; + self + } + + /// The number of threads to use for traversal. + /// + /// Note that this only has an effect when using `build_parallel`. + /// + /// The default setting is `0`, which chooses the number of threads + /// automatically using heuristics. + pub fn threads(&mut self, n: usize) -> &mut WalkBuilder { + self.threads = n; + self + } + + /// Add a global ignore file to the matcher. + /// + /// This has lower precedence than all other sources of ignore rules. + /// + /// If there was a problem adding the ignore file, then an error is + /// returned. Note that the error may indicate *partial* failure. For + /// example, if an ignore file contains an invalid glob, all other globs + /// are still applied. + pub fn add_ignore<P: AsRef<Path>>(&mut self, path: P) -> Option<Error> { + let mut builder = GitignoreBuilder::new(""); + let mut errs = PartialErrorBuilder::default(); + errs.maybe_push(builder.add(path)); + match builder.build() { + Ok(gi) => { + self.ig_builder.add_ignore(gi); + } + Err(err) => { + errs.push(err); + } + } + errs.into_error_option() + } + + /// Add a custom ignore file name + /// + /// These ignore files have higher precedence than all other ignore files. + /// + /// When specifying multiple names, earlier names have lower precedence than + /// later names. + pub fn add_custom_ignore_filename<S: AsRef<OsStr>>( + &mut self, + file_name: S, + ) -> &mut WalkBuilder { + self.ig_builder.add_custom_ignore_filename(file_name); + self + } + + /// Add an override matcher. + /// + /// By default, no override matcher is used. + /// + /// This overrides any previous setting. + pub fn overrides(&mut self, overrides: Override) -> &mut WalkBuilder { + self.ig_builder.overrides(overrides); + self + } + + /// Add a file type matcher. + /// + /// By default, no file type matcher is used. + /// + /// This overrides any previous setting. + pub fn types(&mut self, types: Types) -> &mut WalkBuilder { + self.ig_builder.types(types); + self + } + + /// Enables all the standard ignore filters. + /// + /// This toggles, as a group, all the filters that are enabled by default: + /// + /// - [hidden()](#method.hidden) + /// - [parents()](#method.parents) + /// - [ignore()](#method.ignore) + /// - [git_ignore()](#method.git_ignore) + /// - [git_global()](#method.git_global) + /// - [git_exclude()](#method.git_exclude) + /// + /// They may still be toggled individually after calling this function. + /// + /// This is (by definition) enabled by default. + pub fn standard_filters(&mut self, yes: bool) -> &mut WalkBuilder { + self.hidden(yes) + .parents(yes) + .ignore(yes) + .git_ignore(yes) + .git_global(yes) + .git_exclude(yes) + } + + /// Enables ignoring hidden files. + /// + /// This is enabled by default. + pub fn hidden(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.hidden(yes); + self + } + + /// Enables reading ignore files from parent directories. + /// + /// If this is enabled, then .gitignore files in parent directories of each + /// file path given are respected. Otherwise, they are ignored. + /// + /// This is enabled by default. + pub fn parents(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.parents(yes); + self + } + + /// Enables reading `.ignore` files. + /// + /// `.ignore` files have the same semantics as `gitignore` files and are + /// supported by search tools such as ripgrep and The Silver Searcher. + /// + /// This is enabled by default. + pub fn ignore(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.ignore(yes); + self + } + + /// Enables reading a global gitignore file, whose path is specified in + /// git's `core.excludesFile` config option. + /// + /// Git's config file location is `$HOME/.gitconfig`. If `$HOME/.gitconfig` + /// does not exist or does not specify `core.excludesFile`, then + /// `$XDG_CONFIG_HOME/git/ignore` is read. If `$XDG_CONFIG_HOME` is not + /// set or is empty, then `$HOME/.config/git/ignore` is used instead. + /// + /// This is enabled by default. + pub fn git_global(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.git_global(yes); + self + } + + /// Enables reading `.gitignore` files. + /// + /// `.gitignore` files have match semantics as described in the `gitignore` + /// man page. + /// + /// This is enabled by default. + pub fn git_ignore(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.git_ignore(yes); + self + } + + /// Enables reading `.git/info/exclude` files. + /// + /// `.git/info/exclude` files have match semantics as described in the + /// `gitignore` man page. + /// + /// This is enabled by default. + pub fn git_exclude(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.git_exclude(yes); + self + } + + /// Whether a git repository is required to apply git-related ignore + /// rules (global rules, .gitignore and local exclude rules). + /// + /// When disabled, git-related ignore rules are applied even when searching + /// outside a git repository. + pub fn require_git(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.require_git(yes); + self + } + + /// Process ignore files case insensitively + /// + /// This is disabled by default. + pub fn ignore_case_insensitive(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.ignore_case_insensitive(yes); + self + } + + /// Set a function for sorting directory entries by their path. + /// + /// If a compare function is set, the resulting iterator will return all + /// paths in sorted order. The compare function will be called to compare + /// entries from the same directory. + /// + /// This is like `sort_by_file_name`, except the comparator accepts + /// a `&Path` instead of the base file name, which permits it to sort by + /// more criteria. + /// + /// This method will override any previous sorter set by this method or + /// by `sort_by_file_name`. + /// + /// Note that this is not used in the parallel iterator. + pub fn sort_by_file_path<F>(&mut self, cmp: F) -> &mut WalkBuilder + where + F: Fn(&Path, &Path) -> cmp::Ordering + Send + Sync + 'static, + { + self.sorter = Some(Sorter::ByPath(Arc::new(cmp))); + self + } + + /// Set a function for sorting directory entries by file name. + /// + /// If a compare function is set, the resulting iterator will return all + /// paths in sorted order. The compare function will be called to compare + /// names from entries from the same directory using only the name of the + /// entry. + /// + /// This method will override any previous sorter set by this method or + /// by `sort_by_file_path`. + /// + /// Note that this is not used in the parallel iterator. + pub fn sort_by_file_name<F>(&mut self, cmp: F) -> &mut WalkBuilder + where + F: Fn(&OsStr, &OsStr) -> cmp::Ordering + Send + Sync + 'static, + { + self.sorter = Some(Sorter::ByName(Arc::new(cmp))); + self + } + + /// Do not cross file system boundaries. + /// + /// When this option is enabled, directory traversal will not descend into + /// directories that are on a different file system from the root path. + /// + /// Currently, this option is only supported on Unix and Windows. If this + /// option is used on an unsupported platform, then directory traversal + /// will immediately return an error and will not yield any entries. + pub fn same_file_system(&mut self, yes: bool) -> &mut WalkBuilder { + self.same_file_system = yes; + self + } + + /// Do not yield directory entries that are believed to correspond to + /// stdout. + /// + /// This is useful when a command is invoked via shell redirection to a + /// file that is also being read. For example, `grep -r foo ./ > results` + /// might end up trying to search `results` even though it is also writing + /// to it, which could cause an unbounded feedback loop. Setting this + /// option prevents this from happening by skipping over the `results` + /// file. + /// + /// This is disabled by default. + pub fn skip_stdout(&mut self, yes: bool) -> &mut WalkBuilder { + if yes { + self.skip = stdout_handle().map(Arc::new); + } else { + self.skip = None; + } + self + } + + /// Yields only entries which satisfy the given predicate and skips + /// descending into directories that do not satisfy the given predicate. + /// + /// The predicate is applied to all entries. If the predicate is + /// true, iteration carries on as normal. If the predicate is false, the + /// entry is ignored and if it is a directory, it is not descended into. + /// + /// Note that the errors for reading entries that may not satisfy the + /// predicate will still be yielded. + pub fn filter_entry<P>(&mut self, filter: P) -> &mut WalkBuilder + where + P: Fn(&DirEntry) -> bool + Send + Sync + 'static, + { + self.filter = Some(Filter(Arc::new(filter))); + self + } +} + +/// Walk is a recursive directory iterator over file paths in one or more +/// directories. +/// +/// Only file and directory paths matching the rules are returned. By default, +/// ignore files like `.gitignore` are respected. The precise matching rules +/// and precedence is explained in the documentation for `WalkBuilder`. +pub struct Walk { + its: vec::IntoIter<(PathBuf, Option<WalkEventIter>)>, + it: Option<WalkEventIter>, + ig_root: Ignore, + ig: Ignore, + max_filesize: Option<u64>, + skip: Option<Arc<Handle>>, + filter: Option<Filter>, +} + +impl Walk { + /// Creates a new recursive directory iterator for the file path given. + /// + /// Note that this uses default settings, which include respecting + /// `.gitignore` files. To configure the iterator, use `WalkBuilder` + /// instead. + pub fn new<P: AsRef<Path>>(path: P) -> Walk { + WalkBuilder::new(path).build() + } + + fn skip_entry(&self, ent: &DirEntry) -> Result<bool, Error> { + if ent.depth() == 0 { + return Ok(false); + } + // We ensure that trivial skipping is done before any other potentially + // expensive operations (stat, filesystem other) are done. This seems + // like an obvious optimization but becomes critical when filesystem + // operations even as simple as stat can result in significant + // overheads; an example of this was a bespoke filesystem layer in + // Windows that hosted files remotely and would download them on-demand + // when particular filesystem operations occurred. Users of this system + // who ensured correct file-type filters were being used could still + // get unnecessary file access resulting in large downloads. + if should_skip_entry(&self.ig, ent) { + return Ok(true); + } + if let Some(ref stdout) = self.skip { + if path_equals(ent, stdout)? { + return Ok(true); + } + } + if self.max_filesize.is_some() && !ent.is_dir() { + return Ok(skip_filesize( + self.max_filesize.unwrap(), + ent.path(), + &ent.metadata().ok(), + )); + } + if let Some(Filter(filter)) = &self.filter { + if !filter(ent) { + return Ok(true); + } + } + Ok(false) + } +} + +impl Iterator for Walk { + type Item = Result<DirEntry, Error>; + + #[inline(always)] + fn next(&mut self) -> Option<Result<DirEntry, Error>> { + loop { + let ev = match self.it.as_mut().and_then(|it| it.next()) { + Some(ev) => ev, + None => { + match self.its.next() { + None => return None, + Some((_, None)) => { + return Some(Ok(DirEntry::new_stdin())); + } + Some((path, Some(it))) => { + self.it = Some(it); + if path.is_dir() { + let (ig, err) = self.ig_root.add_parents(path); + self.ig = ig; + if let Some(err) = err { + return Some(Err(err)); + } + } else { + self.ig = self.ig_root.clone(); + } + } + } + continue; + } + }; + match ev { + Err(err) => { + return Some(Err(Error::from_walkdir(err))); + } + Ok(WalkEvent::Exit) => { + self.ig = self.ig.parent().unwrap(); + } + Ok(WalkEvent::Dir(ent)) => { + let mut ent = DirEntry::new_walkdir(ent, None); + let should_skip = match self.skip_entry(&ent) { + Err(err) => return Some(Err(err)), + Ok(should_skip) => should_skip, + }; + if should_skip { + self.it.as_mut().unwrap().it.skip_current_dir(); + // Still need to push this on the stack because + // we'll get a WalkEvent::Exit event for this dir. + // We don't care if it errors though. + let (igtmp, _) = self.ig.add_child(ent.path()); + self.ig = igtmp; + continue; + } + let (igtmp, err) = self.ig.add_child(ent.path()); + self.ig = igtmp; + ent.err = err; + return Some(Ok(ent)); + } + Ok(WalkEvent::File(ent)) => { + let ent = DirEntry::new_walkdir(ent, None); + let should_skip = match self.skip_entry(&ent) { + Err(err) => return Some(Err(err)), + Ok(should_skip) => should_skip, + }; + if should_skip { + continue; + } + return Some(Ok(ent)); + } + } + } + } +} + +/// WalkEventIter transforms a WalkDir iterator into an iterator that more +/// accurately describes the directory tree. Namely, it emits events that are +/// one of three types: directory, file or "exit." An "exit" event means that +/// the entire contents of a directory have been enumerated. +struct WalkEventIter { + depth: usize, + it: walkdir::IntoIter, + next: Option<Result<walkdir::DirEntry, walkdir::Error>>, +} + +#[derive(Debug)] +enum WalkEvent { + Dir(walkdir::DirEntry), + File(walkdir::DirEntry), + Exit, +} + +impl From<WalkDir> for WalkEventIter { + fn from(it: WalkDir) -> WalkEventIter { + WalkEventIter { depth: 0, it: it.into_iter(), next: None } + } +} + +impl Iterator for WalkEventIter { + type Item = walkdir::Result<WalkEvent>; + + #[inline(always)] + fn next(&mut self) -> Option<walkdir::Result<WalkEvent>> { + let dent = self.next.take().or_else(|| self.it.next()); + let depth = match dent { + None => 0, + Some(Ok(ref dent)) => dent.depth(), + Some(Err(ref err)) => err.depth(), + }; + if depth < self.depth { + self.depth -= 1; + self.next = dent; + return Some(Ok(WalkEvent::Exit)); + } + self.depth = depth; + match dent { + None => None, + Some(Err(err)) => Some(Err(err)), + Some(Ok(dent)) => { + if walkdir_is_dir(&dent) { + self.depth += 1; + Some(Ok(WalkEvent::Dir(dent))) + } else { + Some(Ok(WalkEvent::File(dent))) + } + } + } + } +} + +/// WalkState is used in the parallel recursive directory iterator to indicate +/// whether walking should continue as normal, skip descending into a +/// particular directory or quit the walk entirely. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum WalkState { + /// Continue walking as normal. + Continue, + /// If the directory entry given is a directory, don't descend into it. + /// In all other cases, this has no effect. + Skip, + /// Quit the entire iterator as soon as possible. + /// + /// Note that this is an inherently asynchronous action. It is possible + /// for more entries to be yielded even after instructing the iterator + /// to quit. + Quit, +} + +impl WalkState { + fn is_continue(&self) -> bool { + *self == WalkState::Continue + } + + fn is_quit(&self) -> bool { + *self == WalkState::Quit + } +} + +/// A builder for constructing a visitor when using +/// [`WalkParallel::visit`](struct.WalkParallel.html#method.visit). The builder +/// will be called for each thread started by `WalkParallel`. The visitor +/// returned from each builder is then called for every directory entry. +pub trait ParallelVisitorBuilder<'s> { + /// Create per-thread `ParallelVisitor`s for `WalkParallel`. + fn build(&mut self) -> Box<dyn ParallelVisitor + 's>; +} + +impl<'a, 's, P: ParallelVisitorBuilder<'s>> ParallelVisitorBuilder<'s> + for &'a mut P +{ + fn build(&mut self) -> Box<dyn ParallelVisitor + 's> { + (**self).build() + } +} + +/// Receives files and directories for the current thread. +/// +/// Setup for the traversal can be implemented as part of +/// [`ParallelVisitorBuilder::build`](trait.ParallelVisitorBuilder.html#tymethod.build). +/// Teardown when traversal finishes can be implemented by implementing the +/// `Drop` trait on your traversal type. +pub trait ParallelVisitor: Send { + /// Receives files and directories for the current thread. This is called + /// once for every directory entry visited by traversal. + fn visit(&mut self, entry: Result<DirEntry, Error>) -> WalkState; +} + +struct FnBuilder<F> { + builder: F, +} + +impl<'s, F: FnMut() -> FnVisitor<'s>> ParallelVisitorBuilder<'s> + for FnBuilder<F> +{ + fn build(&mut self) -> Box<dyn ParallelVisitor + 's> { + let visitor = (self.builder)(); + Box::new(FnVisitorImp { visitor }) + } +} + +type FnVisitor<'s> = + Box<dyn FnMut(Result<DirEntry, Error>) -> WalkState + Send + 's>; + +struct FnVisitorImp<'s> { + visitor: FnVisitor<'s>, +} + +impl<'s> ParallelVisitor for FnVisitorImp<'s> { + fn visit(&mut self, entry: Result<DirEntry, Error>) -> WalkState { + (self.visitor)(entry) + } +} + +/// WalkParallel is a parallel recursive directory iterator over files paths +/// in one or more directories. +/// +/// Only file and directory paths matching the rules are returned. By default, +/// ignore files like `.gitignore` are respected. The precise matching rules +/// and precedence is explained in the documentation for `WalkBuilder`. +/// +/// Unlike `Walk`, this uses multiple threads for traversing a directory. +pub struct WalkParallel { + paths: vec::IntoIter<PathBuf>, + ig_root: Ignore, + max_filesize: Option<u64>, + max_depth: Option<usize>, + follow_links: bool, + same_file_system: bool, + threads: usize, + skip: Option<Arc<Handle>>, + filter: Option<Filter>, +} + +impl WalkParallel { + /// Execute the parallel recursive directory iterator. `mkf` is called + /// for each thread used for iteration. The function produced by `mkf` + /// is then in turn called for each visited file path. + pub fn run<'s, F>(self, mkf: F) + where + F: FnMut() -> FnVisitor<'s>, + { + self.visit(&mut FnBuilder { builder: mkf }) + } + + /// Execute the parallel recursive directory iterator using a custom + /// visitor. + /// + /// The builder given is used to construct a visitor for every thread + /// used by this traversal. The visitor returned from each builder is then + /// called for every directory entry seen by that thread. + /// + /// Typically, creating a custom visitor is useful if you need to perform + /// some kind of cleanup once traversal is finished. This can be achieved + /// by implementing `Drop` for your builder (or for your visitor, if you + /// want to execute cleanup for every thread that is launched). + /// + /// For example, each visitor might build up a data structure of results + /// corresponding to the directory entries seen for each thread. Since each + /// visitor runs on only one thread, this build-up can be done without + /// synchronization. Then, once traversal is complete, all of the results + /// can be merged together into a single data structure. + pub fn visit(mut self, builder: &mut dyn ParallelVisitorBuilder<'_>) { + let threads = self.threads(); + let stack = Arc::new(Mutex::new(vec![])); + { + let mut stack = stack.lock().unwrap(); + let mut visitor = builder.build(); + let mut paths = Vec::new().into_iter(); + std::mem::swap(&mut paths, &mut self.paths); + // Send the initial set of root paths to the pool of workers. Note + // that we only send directories. For files, we send to them the + // callback directly. + for path in paths { + let (dent, root_device) = if path == Path::new("-") { + (DirEntry::new_stdin(), None) + } else { + let root_device = if !self.same_file_system { + None + } else { + match device_num(&path) { + Ok(root_device) => Some(root_device), + Err(err) => { + let err = Error::Io(err).with_path(path); + if visitor.visit(Err(err)).is_quit() { + return; + } + continue; + } + } + }; + match DirEntryRaw::from_path(0, path, false) { + Ok(dent) => { + (DirEntry::new_raw(dent, None), root_device) + } + Err(err) => { + if visitor.visit(Err(err)).is_quit() { + return; + } + continue; + } + } + }; + stack.push(Message::Work(Work { + dent: dent, + ignore: self.ig_root.clone(), + root_device: root_device, + })); + } + // ... but there's no need to start workers if we don't need them. + if stack.is_empty() { + return; + } + } + // Create the workers and then wait for them to finish. + let quit_now = Arc::new(AtomicBool::new(false)); + let num_pending = + Arc::new(AtomicUsize::new(stack.lock().unwrap().len())); + std::thread::scope(|s| { + let mut handles = vec![]; + for _ in 0..threads { + let worker = Worker { + visitor: builder.build(), + stack: stack.clone(), + quit_now: quit_now.clone(), + num_pending: num_pending.clone(), + max_depth: self.max_depth, + max_filesize: self.max_filesize, + follow_links: self.follow_links, + skip: self.skip.clone(), + filter: self.filter.clone(), + }; + handles.push(s.spawn(|| worker.run())); + } + for handle in handles { + handle.join().unwrap(); + } + }); + } + + fn threads(&self) -> usize { + if self.threads == 0 { + 2 + } else { + self.threads + } + } +} + +/// Message is the set of instructions that a worker knows how to process. +enum Message { + /// A work item corresponds to a directory that should be descended into. + /// Work items for entries that should be skipped or ignored should not + /// be produced. + Work(Work), + /// This instruction indicates that the worker should quit. + Quit, +} + +/// A unit of work for each worker to process. +/// +/// Each unit of work corresponds to a directory that should be descended +/// into. +struct Work { + /// The directory entry. + dent: DirEntry, + /// Any ignore matchers that have been built for this directory's parents. + ignore: Ignore, + /// The root device number. When present, only files with the same device + /// number should be considered. + root_device: Option<u64>, +} + +impl Work { + /// Returns true if and only if this work item is a directory. + fn is_dir(&self) -> bool { + self.dent.is_dir() + } + + /// Returns true if and only if this work item is a symlink. + fn is_symlink(&self) -> bool { + self.dent.file_type().map_or(false, |ft| ft.is_symlink()) + } + + /// Adds ignore rules for parent directories. + /// + /// Note that this only applies to entries at depth 0. On all other + /// entries, this is a no-op. + fn add_parents(&mut self) -> Option<Error> { + if self.dent.depth() > 0 { + return None; + } + // At depth 0, the path of this entry is a root path, so we can + // use it directly to add parent ignore rules. + let (ig, err) = self.ignore.add_parents(self.dent.path()); + self.ignore = ig; + err + } + + /// Reads the directory contents of this work item and adds ignore + /// rules for this directory. + /// + /// If there was a problem with reading the directory contents, then + /// an error is returned. If there was a problem reading the ignore + /// rules for this directory, then the error is attached to this + /// work item's directory entry. + fn read_dir(&mut self) -> Result<fs::ReadDir, Error> { + let readdir = match fs::read_dir(self.dent.path()) { + Ok(readdir) => readdir, + Err(err) => { + let err = Error::from(err) + .with_path(self.dent.path()) + .with_depth(self.dent.depth()); + return Err(err); + } + }; + let (ig, err) = self.ignore.add_child(self.dent.path()); + self.ignore = ig; + self.dent.err = err; + Ok(readdir) + } +} + +/// A worker is responsible for descending into directories, updating the +/// ignore matchers, producing new work and invoking the caller's callback. +/// +/// Note that a worker is *both* a producer and a consumer. +struct Worker<'s> { + /// The caller's callback. + visitor: Box<dyn ParallelVisitor + 's>, + /// A stack of work to do. + /// + /// We use a stack instead of a channel because a stack lets us visit + /// directories in depth first order. This can substantially reduce peak + /// memory usage by keeping both the number of files path and gitignore + /// matchers in memory lower. + stack: Arc<Mutex<Vec<Message>>>, + /// Whether all workers should terminate at the next opportunity. Note + /// that we need this because we don't want other `Work` to be done after + /// we quit. We wouldn't need this if have a priority channel. + quit_now: Arc<AtomicBool>, + /// The number of outstanding work items. + num_pending: Arc<AtomicUsize>, + /// The maximum depth of directories to descend. A value of `0` means no + /// descension at all. + max_depth: Option<usize>, + /// The maximum size a searched file can be (in bytes). If a file exceeds + /// this size it will be skipped. + max_filesize: Option<u64>, + /// Whether to follow symbolic links or not. When this is enabled, loop + /// detection is performed. + follow_links: bool, + /// A file handle to skip, currently is either `None` or stdout, if it's + /// a file and it has been requested to skip files identical to stdout. + skip: Option<Arc<Handle>>, + /// A predicate applied to dir entries. If true, the entry and all + /// children will be skipped. + filter: Option<Filter>, +} + +impl<'s> Worker<'s> { + /// Runs this worker until there is no more work left to do. + /// + /// The worker will call the caller's callback for all entries that aren't + /// skipped by the ignore matcher. + fn run(mut self) { + while let Some(work) = self.get_work() { + if let WalkState::Quit = self.run_one(work) { + self.quit_now(); + } + self.work_done(); + } + } + + fn run_one(&mut self, mut work: Work) -> WalkState { + // If the work is not a directory, then we can just execute the + // caller's callback immediately and move on. + if work.is_symlink() || !work.is_dir() { + return self.visitor.visit(Ok(work.dent)); + } + if let Some(err) = work.add_parents() { + let state = self.visitor.visit(Err(err)); + if state.is_quit() { + return state; + } + } + + let descend = if let Some(root_device) = work.root_device { + match is_same_file_system(root_device, work.dent.path()) { + Ok(true) => true, + Ok(false) => false, + Err(err) => { + let state = self.visitor.visit(Err(err)); + if state.is_quit() { + return state; + } + false + } + } + } else { + true + }; + + // Try to read the directory first before we transfer ownership + // to the provided closure. Do not unwrap it immediately, though, + // as we may receive an `Err` value e.g. in the case when we do not + // have sufficient read permissions to list the directory. + // In that case we still want to provide the closure with a valid + // entry before passing the error value. + let readdir = work.read_dir(); + let depth = work.dent.depth(); + let state = self.visitor.visit(Ok(work.dent)); + if !state.is_continue() { + return state; + } + if !descend { + return WalkState::Skip; + } + + let readdir = match readdir { + Ok(readdir) => readdir, + Err(err) => { + return self.visitor.visit(Err(err)); + } + }; + + if self.max_depth.map_or(false, |max| depth >= max) { + return WalkState::Skip; + } + for result in readdir { + let state = self.generate_work( + &work.ignore, + depth + 1, + work.root_device, + result, + ); + if state.is_quit() { + return state; + } + } + WalkState::Continue + } + + /// Decides whether to submit the given directory entry as a file to + /// search. + /// + /// If the entry is a path that should be ignored, then this is a no-op. + /// Otherwise, the entry is pushed on to the queue. (The actual execution + /// of the callback happens in `run_one`.) + /// + /// If an error occurs while reading the entry, then it is sent to the + /// caller's callback. + /// + /// `ig` is the `Ignore` matcher for the parent directory. `depth` should + /// be the depth of this entry. `result` should be the item yielded by + /// a directory iterator. + fn generate_work( + &mut self, + ig: &Ignore, + depth: usize, + root_device: Option<u64>, + result: Result<fs::DirEntry, io::Error>, + ) -> WalkState { + let fs_dent = match result { + Ok(fs_dent) => fs_dent, + Err(err) => { + return self + .visitor + .visit(Err(Error::from(err).with_depth(depth))); + } + }; + let mut dent = match DirEntryRaw::from_entry(depth, &fs_dent) { + Ok(dent) => DirEntry::new_raw(dent, None), + Err(err) => { + return self.visitor.visit(Err(err)); + } + }; + let is_symlink = dent.file_type().map_or(false, |ft| ft.is_symlink()); + if self.follow_links && is_symlink { + let path = dent.path().to_path_buf(); + dent = match DirEntryRaw::from_path(depth, path, true) { + Ok(dent) => DirEntry::new_raw(dent, None), + Err(err) => { + return self.visitor.visit(Err(err)); + } + }; + if dent.is_dir() { + if let Err(err) = check_symlink_loop(ig, dent.path(), depth) { + return self.visitor.visit(Err(err)); + } + } + } + // N.B. See analogous call in the single-threaded implementation about + // why it's important for this to come before the checks below. + if should_skip_entry(ig, &dent) { + return WalkState::Continue; + } + if let Some(ref stdout) = self.skip { + let is_stdout = match path_equals(&dent, stdout) { + Ok(is_stdout) => is_stdout, + Err(err) => return self.visitor.visit(Err(err)), + }; + if is_stdout { + return WalkState::Continue; + } + } + let should_skip_filesize = + if self.max_filesize.is_some() && !dent.is_dir() { + skip_filesize( + self.max_filesize.unwrap(), + dent.path(), + &dent.metadata().ok(), + ) + } else { + false + }; + let should_skip_filtered = + if let Some(Filter(predicate)) = &self.filter { + !predicate(&dent) + } else { + false + }; + if !should_skip_filesize && !should_skip_filtered { + self.send(Work { dent, ignore: ig.clone(), root_device }); + } + WalkState::Continue + } + + /// Returns the next directory to descend into. + /// + /// If all work has been exhausted, then this returns None. The worker + /// should then subsequently quit. + fn get_work(&mut self) -> Option<Work> { + let mut value = self.recv(); + loop { + // Simulate a priority channel: If quit_now flag is set, we can + // receive only quit messages. + if self.is_quit_now() { + value = Some(Message::Quit) + } + match value { + Some(Message::Work(work)) => { + return Some(work); + } + Some(Message::Quit) => { + // Repeat quit message to wake up sleeping threads, if + // any. The domino effect will ensure that every thread + // will quit. + self.send_quit(); + return None; + } + None => { + // Once num_pending reaches 0, it is impossible for it to + // ever increase again. Namely, it only reaches 0 once + // all jobs have run such that no jobs have produced more + // work. We have this guarantee because num_pending is + // always incremented before each job is submitted and only + // decremented once each job is completely finished. + // Therefore, if this reaches zero, then there can be no + // other job running. + if self.num_pending() == 0 { + // Every other thread is blocked at the next recv(). + // Send the initial quit message and quit. + self.send_quit(); + return None; + } + // Wait for next `Work` or `Quit` message. + loop { + if let Some(v) = self.recv() { + value = Some(v); + break; + } + // Our stack isn't blocking. Instead of burning the + // CPU waiting, we let the thread sleep for a bit. In + // general, this tends to only occur once the search is + // approaching termination. + thread::sleep(Duration::from_millis(1)); + } + } + } + } + } + + /// Indicates that all workers should quit immediately. + fn quit_now(&self) { + self.quit_now.store(true, Ordering::SeqCst); + } + + /// Returns true if this worker should quit immediately. + fn is_quit_now(&self) -> bool { + self.quit_now.load(Ordering::SeqCst) + } + + /// Returns the number of pending jobs. + fn num_pending(&self) -> usize { + self.num_pending.load(Ordering::SeqCst) + } + + /// Send work. + fn send(&self, work: Work) { + self.num_pending.fetch_add(1, Ordering::SeqCst); + let mut stack = self.stack.lock().unwrap(); + stack.push(Message::Work(work)); + } + + /// Send a quit message. + fn send_quit(&self) { + let mut stack = self.stack.lock().unwrap(); + stack.push(Message::Quit); + } + + /// Receive work. + fn recv(&self) -> Option<Message> { + let mut stack = self.stack.lock().unwrap(); + stack.pop() + } + + /// Signal that work has been received. + fn work_done(&self) { + self.num_pending.fetch_sub(1, Ordering::SeqCst); + } +} + +fn check_symlink_loop( + ig_parent: &Ignore, + child_path: &Path, + child_depth: usize, +) -> Result<(), Error> { + let hchild = Handle::from_path(child_path).map_err(|err| { + Error::from(err).with_path(child_path).with_depth(child_depth) + })?; + for ig in ig_parent.parents().take_while(|ig| !ig.is_absolute_parent()) { + let h = Handle::from_path(ig.path()).map_err(|err| { + Error::from(err).with_path(child_path).with_depth(child_depth) + })?; + if hchild == h { + return Err(Error::Loop { + ancestor: ig.path().to_path_buf(), + child: child_path.to_path_buf(), + } + .with_depth(child_depth)); + } + } + Ok(()) +} + +// Before calling this function, make sure that you ensure that is really +// necessary as the arguments imply a file stat. +fn skip_filesize( + max_filesize: u64, + path: &Path, + ent: &Option<Metadata>, +) -> bool { + let filesize = match *ent { + Some(ref md) => Some(md.len()), + None => None, + }; + + if let Some(fs) = filesize { + if fs > max_filesize { + log::debug!("ignoring {}: {} bytes", path.display(), fs); + true + } else { + false + } + } else { + false + } +} + +fn should_skip_entry(ig: &Ignore, dent: &DirEntry) -> bool { + let m = ig.matched_dir_entry(dent); + if m.is_ignore() { + log::debug!("ignoring {}: {:?}", dent.path().display(), m); + true + } else if m.is_whitelist() { + log::debug!("whitelisting {}: {:?}", dent.path().display(), m); + false + } else { + false + } +} + +/// Returns a handle to stdout for filtering search. +/// +/// A handle is returned if and only if stdout is being redirected to a file. +/// The handle returned corresponds to that file. +/// +/// This can be used to ensure that we do not attempt to search a file that we +/// may also be writing to. +fn stdout_handle() -> Option<Handle> { + let h = match Handle::stdout() { + Err(_) => return None, + Ok(h) => h, + }; + let md = match h.as_file().metadata() { + Err(_) => return None, + Ok(md) => md, + }; + if !md.is_file() { + return None; + } + Some(h) +} + +/// Returns true if and only if the given directory entry is believed to be +/// equivalent to the given handle. If there was a problem querying the path +/// for information to determine equality, then that error is returned. +fn path_equals(dent: &DirEntry, handle: &Handle) -> Result<bool, Error> { + #[cfg(unix)] + fn never_equal(dent: &DirEntry, handle: &Handle) -> bool { + dent.ino() != Some(handle.ino()) + } + + #[cfg(not(unix))] + fn never_equal(_: &DirEntry, _: &Handle) -> bool { + false + } + + // If we know for sure that these two things aren't equal, then avoid + // the costly extra stat call to determine equality. + if dent.is_stdin() || never_equal(dent, handle) { + return Ok(false); + } + Handle::from_path(dent.path()) + .map(|h| &h == handle) + .map_err(|err| Error::Io(err).with_path(dent.path())) +} + +/// Returns true if the given walkdir entry corresponds to a directory. +/// +/// This is normally just `dent.file_type().is_dir()`, but when we aren't +/// following symlinks, the root directory entry may be a symlink to a +/// directory that we *do* follow---by virtue of it being specified by the user +/// explicitly. In that case, we need to follow the symlink and query whether +/// it's a directory or not. But we only do this for root entries to avoid an +/// additional stat check in most cases. +fn walkdir_is_dir(dent: &walkdir::DirEntry) -> bool { + if dent.file_type().is_dir() { + return true; + } + if !dent.file_type().is_symlink() || dent.depth() > 0 { + return false; + } + dent.path().metadata().ok().map_or(false, |md| md.file_type().is_dir()) +} + +/// Returns true if and only if the given path is on the same device as the +/// given root device. +fn is_same_file_system(root_device: u64, path: &Path) -> Result<bool, Error> { + let dent_device = + device_num(path).map_err(|err| Error::Io(err).with_path(path))?; + Ok(root_device == dent_device) +} + +#[cfg(unix)] +fn device_num<P: AsRef<Path>>(path: P) -> io::Result<u64> { + use std::os::unix::fs::MetadataExt; + + path.as_ref().metadata().map(|md| md.dev()) +} + +#[cfg(windows)] +fn device_num<P: AsRef<Path>>(path: P) -> io::Result<u64> { + use winapi_util::{file, Handle}; + + let h = Handle::from_path_any(path)?; + file::information(h).map(|info| info.volume_serial_number()) +} + +#[cfg(not(any(unix, windows)))] +fn device_num<P: AsRef<Path>>(_: P) -> io::Result<u64> { + Err(io::Error::new( + io::ErrorKind::Other, + "walkdir: same_file_system option not supported on this platform", + )) +} + +#[cfg(test)] +mod tests { + use std::ffi::OsStr; + use std::fs::{self, File}; + use std::io::Write; + use std::path::Path; + use std::sync::{Arc, Mutex}; + + use super::{DirEntry, WalkBuilder, WalkState}; + use crate::tests::TempDir; + + fn wfile<P: AsRef<Path>>(path: P, contents: &str) { + let mut file = File::create(path).unwrap(); + file.write_all(contents.as_bytes()).unwrap(); + } + + fn wfile_size<P: AsRef<Path>>(path: P, size: u64) { + let file = File::create(path).unwrap(); + file.set_len(size).unwrap(); + } + + #[cfg(unix)] + fn symlink<P: AsRef<Path>, Q: AsRef<Path>>(src: P, dst: Q) { + use std::os::unix::fs::symlink; + symlink(src, dst).unwrap(); + } + + fn mkdirp<P: AsRef<Path>>(path: P) { + fs::create_dir_all(path).unwrap(); + } + + fn normal_path(unix: &str) -> String { + if cfg!(windows) { + unix.replace("\\", "/") + } else { + unix.to_string() + } + } + + fn walk_collect(prefix: &Path, builder: &WalkBuilder) -> Vec<String> { + let mut paths = vec![]; + for result in builder.build() { + let dent = match result { + Err(_) => continue, + Ok(dent) => dent, + }; + let path = dent.path().strip_prefix(prefix).unwrap(); + if path.as_os_str().is_empty() { + continue; + } + paths.push(normal_path(path.to_str().unwrap())); + } + paths.sort(); + paths + } + + fn walk_collect_parallel( + prefix: &Path, + builder: &WalkBuilder, + ) -> Vec<String> { + let mut paths = vec![]; + for dent in walk_collect_entries_parallel(builder) { + let path = dent.path().strip_prefix(prefix).unwrap(); + if path.as_os_str().is_empty() { + continue; + } + paths.push(normal_path(path.to_str().unwrap())); + } + paths.sort(); + paths + } + + fn walk_collect_entries_parallel(builder: &WalkBuilder) -> Vec<DirEntry> { + let dents = Arc::new(Mutex::new(vec![])); + builder.build_parallel().run(|| { + let dents = dents.clone(); + Box::new(move |result| { + if let Ok(dent) = result { + dents.lock().unwrap().push(dent); + } + WalkState::Continue + }) + }); + + let dents = dents.lock().unwrap(); + dents.to_vec() + } + + fn mkpaths(paths: &[&str]) -> Vec<String> { + let mut paths: Vec<_> = paths.iter().map(|s| s.to_string()).collect(); + paths.sort(); + paths + } + + fn tmpdir() -> TempDir { + TempDir::new().unwrap() + } + + fn assert_paths(prefix: &Path, builder: &WalkBuilder, expected: &[&str]) { + let got = walk_collect(prefix, builder); + assert_eq!(got, mkpaths(expected), "single threaded"); + let got = walk_collect_parallel(prefix, builder); + assert_eq!(got, mkpaths(expected), "parallel"); + } + + #[test] + fn no_ignores() { + let td = tmpdir(); + mkdirp(td.path().join("a/b/c")); + mkdirp(td.path().join("x/y")); + wfile(td.path().join("a/b/foo"), ""); + wfile(td.path().join("x/y/foo"), ""); + + assert_paths( + td.path(), + &WalkBuilder::new(td.path()), + &["x", "x/y", "x/y/foo", "a", "a/b", "a/b/foo", "a/b/c"], + ); + } + + #[test] + fn custom_ignore() { + let td = tmpdir(); + let custom_ignore = ".customignore"; + mkdirp(td.path().join("a")); + wfile(td.path().join(custom_ignore), "foo"); + wfile(td.path().join("foo"), ""); + wfile(td.path().join("a/foo"), ""); + wfile(td.path().join("bar"), ""); + wfile(td.path().join("a/bar"), ""); + + let mut builder = WalkBuilder::new(td.path()); + builder.add_custom_ignore_filename(&custom_ignore); + assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]); + } + + #[test] + fn custom_ignore_exclusive_use() { + let td = tmpdir(); + let custom_ignore = ".customignore"; + mkdirp(td.path().join("a")); + wfile(td.path().join(custom_ignore), "foo"); + wfile(td.path().join("foo"), ""); + wfile(td.path().join("a/foo"), ""); + wfile(td.path().join("bar"), ""); + wfile(td.path().join("a/bar"), ""); + + let mut builder = WalkBuilder::new(td.path()); + builder.ignore(false); + builder.git_ignore(false); + builder.git_global(false); + builder.git_exclude(false); + builder.add_custom_ignore_filename(&custom_ignore); + assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]); + } + + #[test] + fn gitignore() { + let td = tmpdir(); + mkdirp(td.path().join(".git")); + mkdirp(td.path().join("a")); + wfile(td.path().join(".gitignore"), "foo"); + wfile(td.path().join("foo"), ""); + wfile(td.path().join("a/foo"), ""); + wfile(td.path().join("bar"), ""); + wfile(td.path().join("a/bar"), ""); + + assert_paths( + td.path(), + &WalkBuilder::new(td.path()), + &["bar", "a", "a/bar"], + ); + } + + #[test] + fn explicit_ignore() { + let td = tmpdir(); + let igpath = td.path().join(".not-an-ignore"); + mkdirp(td.path().join("a")); + wfile(&igpath, "foo"); + wfile(td.path().join("foo"), ""); + wfile(td.path().join("a/foo"), ""); + wfile(td.path().join("bar"), ""); + wfile(td.path().join("a/bar"), ""); + + let mut builder = WalkBuilder::new(td.path()); + assert!(builder.add_ignore(&igpath).is_none()); + assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]); + } + + #[test] + fn explicit_ignore_exclusive_use() { + let td = tmpdir(); + let igpath = td.path().join(".not-an-ignore"); + mkdirp(td.path().join("a")); + wfile(&igpath, "foo"); + wfile(td.path().join("foo"), ""); + wfile(td.path().join("a/foo"), ""); + wfile(td.path().join("bar"), ""); + wfile(td.path().join("a/bar"), ""); + + let mut builder = WalkBuilder::new(td.path()); + builder.standard_filters(false); + assert!(builder.add_ignore(&igpath).is_none()); + assert_paths( + td.path(), + &builder, + &[".not-an-ignore", "bar", "a", "a/bar"], + ); + } + + #[test] + fn gitignore_parent() { + let td = tmpdir(); + mkdirp(td.path().join(".git")); + mkdirp(td.path().join("a")); + wfile(td.path().join(".gitignore"), "foo"); + wfile(td.path().join("a/foo"), ""); + wfile(td.path().join("a/bar"), ""); + + let root = td.path().join("a"); + assert_paths(&root, &WalkBuilder::new(&root), &["bar"]); + } + + #[test] + fn max_depth() { + let td = tmpdir(); + mkdirp(td.path().join("a/b/c")); + wfile(td.path().join("foo"), ""); + wfile(td.path().join("a/foo"), ""); + wfile(td.path().join("a/b/foo"), ""); + wfile(td.path().join("a/b/c/foo"), ""); + + let mut builder = WalkBuilder::new(td.path()); + assert_paths( + td.path(), + &builder, + &["a", "a/b", "a/b/c", "foo", "a/foo", "a/b/foo", "a/b/c/foo"], + ); + assert_paths(td.path(), builder.max_depth(Some(0)), &[]); + assert_paths(td.path(), builder.max_depth(Some(1)), &["a", "foo"]); + assert_paths( + td.path(), + builder.max_depth(Some(2)), + &["a", "a/b", "foo", "a/foo"], + ); + } + + #[test] + fn max_filesize() { + let td = tmpdir(); + mkdirp(td.path().join("a/b")); + wfile_size(td.path().join("foo"), 0); + wfile_size(td.path().join("bar"), 400); + wfile_size(td.path().join("baz"), 600); + wfile_size(td.path().join("a/foo"), 600); + wfile_size(td.path().join("a/bar"), 500); + wfile_size(td.path().join("a/baz"), 200); + + let mut builder = WalkBuilder::new(td.path()); + assert_paths( + td.path(), + &builder, + &["a", "a/b", "foo", "bar", "baz", "a/foo", "a/bar", "a/baz"], + ); + assert_paths( + td.path(), + builder.max_filesize(Some(0)), + &["a", "a/b", "foo"], + ); + assert_paths( + td.path(), + builder.max_filesize(Some(500)), + &["a", "a/b", "foo", "bar", "a/bar", "a/baz"], + ); + assert_paths( + td.path(), + builder.max_filesize(Some(50000)), + &["a", "a/b", "foo", "bar", "baz", "a/foo", "a/bar", "a/baz"], + ); + } + + #[cfg(unix)] // because symlinks on windows are weird + #[test] + fn symlinks() { + let td = tmpdir(); + mkdirp(td.path().join("a/b")); + symlink(td.path().join("a/b"), td.path().join("z")); + wfile(td.path().join("a/b/foo"), ""); + + let mut builder = WalkBuilder::new(td.path()); + assert_paths(td.path(), &builder, &["a", "a/b", "a/b/foo", "z"]); + assert_paths( + td.path(), + &builder.follow_links(true), + &["a", "a/b", "a/b/foo", "z", "z/foo"], + ); + } + + #[cfg(unix)] // because symlinks on windows are weird + #[test] + fn first_path_not_symlink() { + let td = tmpdir(); + mkdirp(td.path().join("foo")); + + let dents = WalkBuilder::new(td.path().join("foo")) + .build() + .into_iter() + .collect::<Result<Vec<_>, _>>() + .unwrap(); + assert_eq!(1, dents.len()); + assert!(!dents[0].path_is_symlink()); + + let dents = walk_collect_entries_parallel(&WalkBuilder::new( + td.path().join("foo"), + )); + assert_eq!(1, dents.len()); + assert!(!dents[0].path_is_symlink()); + } + + #[cfg(unix)] // because symlinks on windows are weird + #[test] + fn symlink_loop() { + let td = tmpdir(); + mkdirp(td.path().join("a/b")); + symlink(td.path().join("a"), td.path().join("a/b/c")); + + let mut builder = WalkBuilder::new(td.path()); + assert_paths(td.path(), &builder, &["a", "a/b", "a/b/c"]); + assert_paths(td.path(), &builder.follow_links(true), &["a", "a/b"]); + } + + // It's a little tricky to test the 'same_file_system' option since + // we need an environment with more than one file system. We adopt a + // heuristic where /sys is typically a distinct volume on Linux and roll + // with that. + #[test] + #[cfg(target_os = "linux")] + fn same_file_system() { + use super::device_num; + + // If for some reason /sys doesn't exist or isn't a directory, just + // skip this test. + if !Path::new("/sys").is_dir() { + return; + } + + // If our test directory actually isn't a different volume from /sys, + // then this test is meaningless and we shouldn't run it. + let td = tmpdir(); + if device_num(td.path()).unwrap() == device_num("/sys").unwrap() { + return; + } + + mkdirp(td.path().join("same_file")); + symlink("/sys", td.path().join("same_file").join("alink")); + + // Create a symlink to sys and enable following symlinks. If the + // same_file_system option doesn't work, then this probably will hit a + // permission error. Otherwise, it should just skip over the symlink + // completely. + let mut builder = WalkBuilder::new(td.path()); + builder.follow_links(true).same_file_system(true); + assert_paths(td.path(), &builder, &["same_file", "same_file/alink"]); + } + + #[cfg(target_os = "linux")] + #[test] + fn no_read_permissions() { + let dir_path = Path::new("/root"); + + // There's no /etc/sudoers.d, skip the test. + if !dir_path.is_dir() { + return; + } + // We're the root, so the test won't check what we want it to. + if fs::read_dir(&dir_path).is_ok() { + return; + } + + // Check that we can't descend but get an entry for the parent dir. + let builder = WalkBuilder::new(&dir_path); + assert_paths(dir_path.parent().unwrap(), &builder, &["root"]); + } + + #[test] + fn filter() { + let td = tmpdir(); + mkdirp(td.path().join("a/b/c")); + mkdirp(td.path().join("x/y")); + wfile(td.path().join("a/b/foo"), ""); + wfile(td.path().join("x/y/foo"), ""); + + assert_paths( + td.path(), + &WalkBuilder::new(td.path()), + &["x", "x/y", "x/y/foo", "a", "a/b", "a/b/foo", "a/b/c"], + ); + + assert_paths( + td.path(), + &WalkBuilder::new(td.path()) + .filter_entry(|entry| entry.file_name() != OsStr::new("a")), + &["x", "x/y", "x/y/foo"], + ); + } +} |