summaryrefslogtreecommitdiffstats
path: root/vendor/globset
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:02:58 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:02:58 +0000
commit698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch)
tree173a775858bd501c378080a10dca74132f05bc50 /vendor/globset
parentInitial commit. (diff)
downloadrustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz
rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/globset')
-rw-r--r--vendor/globset/.cargo-checksum.json1
-rw-r--r--vendor/globset/COPYING3
-rw-r--r--vendor/globset/Cargo.toml78
-rw-r--r--vendor/globset/LICENSE-MIT21
-rw-r--r--vendor/globset/README.md119
-rw-r--r--vendor/globset/UNLICENSE24
-rw-r--r--vendor/globset/benches/bench.rs113
-rw-r--r--vendor/globset/src/glob.rs1528
-rw-r--r--vendor/globset/src/lib.rs912
-rw-r--r--vendor/globset/src/pathutil.rs129
-rw-r--r--vendor/globset/src/serde_impl.rs38
11 files changed, 2966 insertions, 0 deletions
diff --git a/vendor/globset/.cargo-checksum.json b/vendor/globset/.cargo-checksum.json
new file mode 100644
index 000000000..29f8dbafa
--- /dev/null
+++ b/vendor/globset/.cargo-checksum.json
@@ -0,0 +1 @@
+{"files":{"COPYING":"01c266bced4a434da0051174d6bee16a4c82cf634e2679b6155d40d75012390f","Cargo.toml":"79bfb32bb9c1e821401432cce035ca292b6cf97ee43947d19ce4bb8977b3a415","LICENSE-MIT":"0f96a83840e146e43c0ec96a22ec1f392e0680e6c1226e6f3ba87e0740af850f","README.md":"719f0882004ec6da729b532623db73a345f31b27766c36d86f483f37dee7831e","UNLICENSE":"7e12e5df4bae12cb21581ba157ced20e1986a0508dd10d0e8a4ab9a4cf94e85c","benches/bench.rs":"05a00ec1b35dbd7a202e280e862a784b3a972574b19201ef66ac7980521ce4ac","src/glob.rs":"ac79317d5dec80e7c4e8a1e8b3cc7d78080f110557330779fb9152f0b2ec20d5","src/lib.rs":"d750f25177c8ad825607f318be31d9afa16a14a362f9cc5a9bafb2ca79fcbcac","src/pathutil.rs":"1b11ea31529d14a7f98d56d3b46f9693d6cdabe812b366d344d4b786eb8a730d","src/serde_impl.rs":"3627c7f2fb35c678fa7b35a3c8ad7136f25a69c243fc8a449f75b4bc81df180b"},"package":"0a1e17342619edbc21a964c2afbeb6c820c6a2560032872f397bb97ea127bd0a"} \ No newline at end of file
diff --git a/vendor/globset/COPYING b/vendor/globset/COPYING
new file mode 100644
index 000000000..bb9c20a09
--- /dev/null
+++ b/vendor/globset/COPYING
@@ -0,0 +1,3 @@
+This project is dual-licensed under the Unlicense and MIT licenses.
+
+You may use this code under the terms of either license.
diff --git a/vendor/globset/Cargo.toml b/vendor/globset/Cargo.toml
new file mode 100644
index 000000000..4a52e0503
--- /dev/null
+++ b/vendor/globset/Cargo.toml
@@ -0,0 +1,78 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies.
+#
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
+
+[package]
+edition = "2018"
+name = "globset"
+version = "0.4.9"
+authors = ["Andrew Gallant <jamslam@gmail.com>"]
+description = """
+Cross platform single glob and glob set matching. Glob set matching is the
+process of matching one or more glob patterns against a single candidate path
+simultaneously, and returning all of the globs that matched.
+"""
+homepage = "https://github.com/BurntSushi/ripgrep/tree/master/crates/globset"
+documentation = "https://docs.rs/globset"
+readme = "README.md"
+keywords = [
+ "regex",
+ "glob",
+ "multiple",
+ "set",
+ "pattern",
+]
+license = "Unlicense OR MIT"
+repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/globset"
+
+[lib]
+name = "globset"
+bench = false
+
+[dependencies.aho-corasick]
+version = "0.7.3"
+
+[dependencies.bstr]
+version = "0.2.0"
+features = ["std"]
+default-features = false
+
+[dependencies.fnv]
+version = "1.0.6"
+
+[dependencies.log]
+version = "0.4.5"
+optional = true
+
+[dependencies.regex]
+version = "1.1.5"
+features = [
+ "perf",
+ "std",
+]
+default-features = false
+
+[dependencies.serde]
+version = "1.0.104"
+optional = true
+
+[dev-dependencies.glob]
+version = "0.3.0"
+
+[dev-dependencies.lazy_static]
+version = "1"
+
+[dev-dependencies.serde_json]
+version = "1.0.45"
+
+[features]
+default = ["log"]
+serde1 = ["serde"]
+simd-accel = []
diff --git a/vendor/globset/LICENSE-MIT b/vendor/globset/LICENSE-MIT
new file mode 100644
index 000000000..3b0a5dc09
--- /dev/null
+++ b/vendor/globset/LICENSE-MIT
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Andrew Gallant
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/vendor/globset/README.md b/vendor/globset/README.md
new file mode 100644
index 000000000..806c915b9
--- /dev/null
+++ b/vendor/globset/README.md
@@ -0,0 +1,119 @@
+globset
+=======
+Cross platform single glob and glob set matching. Glob set matching is the
+process of matching one or more glob patterns against a single candidate path
+simultaneously, and returning all of the globs that matched.
+
+[![Build status](https://github.com/BurntSushi/ripgrep/workflows/ci/badge.svg)](https://github.com/BurntSushi/ripgrep/actions)
+[![](https://img.shields.io/crates/v/globset.svg)](https://crates.io/crates/globset)
+
+Dual-licensed under MIT or the [UNLICENSE](https://unlicense.org/).
+
+### Documentation
+
+[https://docs.rs/globset](https://docs.rs/globset)
+
+### Usage
+
+Add this to your `Cargo.toml`:
+
+```toml
+[dependencies]
+globset = "0.3"
+```
+
+### Features
+
+* `serde1`: Enables implementing Serde traits on the `Glob` type.
+
+### Example: one glob
+
+This example shows how to match a single glob against a single file path.
+
+```rust
+use globset::Glob;
+
+let glob = Glob::new("*.rs")?.compile_matcher();
+
+assert!(glob.is_match("foo.rs"));
+assert!(glob.is_match("foo/bar.rs"));
+assert!(!glob.is_match("Cargo.toml"));
+```
+
+### Example: configuring a glob matcher
+
+This example shows how to use a `GlobBuilder` to configure aspects of match
+semantics. In this example, we prevent wildcards from matching path separators.
+
+```rust
+use globset::GlobBuilder;
+
+let glob = GlobBuilder::new("*.rs")
+ .literal_separator(true).build()?.compile_matcher();
+
+assert!(glob.is_match("foo.rs"));
+assert!(!glob.is_match("foo/bar.rs")); // no longer matches
+assert!(!glob.is_match("Cargo.toml"));
+```
+
+### Example: match multiple globs at once
+
+This example shows how to match multiple glob patterns at once.
+
+```rust
+use globset::{Glob, GlobSetBuilder};
+
+let mut builder = GlobSetBuilder::new();
+// A GlobBuilder can be used to configure each glob's match semantics
+// independently.
+builder.add(Glob::new("*.rs")?);
+builder.add(Glob::new("src/lib.rs")?);
+builder.add(Glob::new("src/**/foo.rs")?);
+let set = builder.build()?;
+
+assert_eq!(set.matches("src/bar/baz/foo.rs"), vec![0, 2]);
+```
+
+### Performance
+
+This crate implements globs by converting them to regular expressions, and
+executing them with the
+[`regex`](https://github.com/rust-lang-nursery/regex)
+crate.
+
+For single glob matching, performance of this crate should be roughly on par
+with the performance of the
+[`glob`](https://github.com/rust-lang-nursery/glob)
+crate. (`*_regex` correspond to benchmarks for this library while `*_glob`
+correspond to benchmarks for the `glob` library.)
+Optimizations in the `regex` crate may propel this library past `glob`,
+particularly when matching longer paths.
+
+```
+test ext_glob ... bench: 425 ns/iter (+/- 21)
+test ext_regex ... bench: 175 ns/iter (+/- 10)
+test long_glob ... bench: 182 ns/iter (+/- 11)
+test long_regex ... bench: 173 ns/iter (+/- 10)
+test short_glob ... bench: 69 ns/iter (+/- 4)
+test short_regex ... bench: 83 ns/iter (+/- 2)
+```
+
+The primary performance advantage of this crate is when matching multiple
+globs against a single path. With the `glob` crate, one must match each glob
+synchronously, one after the other. In this crate, many can be matched
+simultaneously. For example:
+
+```
+test many_short_glob ... bench: 1,063 ns/iter (+/- 47)
+test many_short_regex_set ... bench: 186 ns/iter (+/- 11)
+```
+
+### Comparison with the [`glob`](https://github.com/rust-lang-nursery/glob) crate
+
+* Supports alternate "or" globs, e.g., `*.{foo,bar}`.
+* Can match non-UTF-8 file paths correctly.
+* Supports matching multiple globs at once.
+* Doesn't provide a recursive directory iterator of matching file paths,
+ although I believe this crate should grow one eventually.
+* Supports case insensitive and require-literal-separator match options, but
+ **doesn't** support the require-literal-leading-dot option.
diff --git a/vendor/globset/UNLICENSE b/vendor/globset/UNLICENSE
new file mode 100644
index 000000000..68a49daad
--- /dev/null
+++ b/vendor/globset/UNLICENSE
@@ -0,0 +1,24 @@
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org/>
diff --git a/vendor/globset/benches/bench.rs b/vendor/globset/benches/bench.rs
new file mode 100644
index 000000000..1344a8f65
--- /dev/null
+++ b/vendor/globset/benches/bench.rs
@@ -0,0 +1,113 @@
+/*!
+This module benchmarks the glob implementation. For benchmarks on the ripgrep
+tool itself, see the benchsuite directory.
+*/
+#![feature(test)]
+
+extern crate test;
+
+use globset::{Candidate, Glob, GlobMatcher, GlobSet, GlobSetBuilder};
+
+const EXT: &'static str = "some/a/bigger/path/to/the/crazy/needle.txt";
+const EXT_PAT: &'static str = "*.txt";
+
+const SHORT: &'static str = "some/needle.txt";
+const SHORT_PAT: &'static str = "some/**/needle.txt";
+
+const LONG: &'static str = "some/a/bigger/path/to/the/crazy/needle.txt";
+const LONG_PAT: &'static str = "some/**/needle.txt";
+
+fn new_glob(pat: &str) -> glob::Pattern {
+ glob::Pattern::new(pat).unwrap()
+}
+
+fn new_reglob(pat: &str) -> GlobMatcher {
+ Glob::new(pat).unwrap().compile_matcher()
+}
+
+fn new_reglob_many(pats: &[&str]) -> GlobSet {
+ let mut builder = GlobSetBuilder::new();
+ for pat in pats {
+ builder.add(Glob::new(pat).unwrap());
+ }
+ builder.build().unwrap()
+}
+
+#[bench]
+fn ext_glob(b: &mut test::Bencher) {
+ let pat = new_glob(EXT_PAT);
+ b.iter(|| assert!(pat.matches(EXT)));
+}
+
+#[bench]
+fn ext_regex(b: &mut test::Bencher) {
+ let set = new_reglob(EXT_PAT);
+ let cand = Candidate::new(EXT);
+ b.iter(|| assert!(set.is_match_candidate(&cand)));
+}
+
+#[bench]
+fn short_glob(b: &mut test::Bencher) {
+ let pat = new_glob(SHORT_PAT);
+ b.iter(|| assert!(pat.matches(SHORT)));
+}
+
+#[bench]
+fn short_regex(b: &mut test::Bencher) {
+ let set = new_reglob(SHORT_PAT);
+ let cand = Candidate::new(SHORT);
+ b.iter(|| assert!(set.is_match_candidate(&cand)));
+}
+
+#[bench]
+fn long_glob(b: &mut test::Bencher) {
+ let pat = new_glob(LONG_PAT);
+ b.iter(|| assert!(pat.matches(LONG)));
+}
+
+#[bench]
+fn long_regex(b: &mut test::Bencher) {
+ let set = new_reglob(LONG_PAT);
+ let cand = Candidate::new(LONG);
+ b.iter(|| assert!(set.is_match_candidate(&cand)));
+}
+
+const MANY_SHORT_GLOBS: &'static [&'static str] = &[
+ // Taken from a random .gitignore on my system.
+ ".*.swp",
+ "tags",
+ "target",
+ "*.lock",
+ "tmp",
+ "*.csv",
+ "*.fst",
+ "*-got",
+ "*.csv.idx",
+ "words",
+ "98m*",
+ "dict",
+ "test",
+ "months",
+];
+
+const MANY_SHORT_SEARCH: &'static str = "98m-blah.csv.idx";
+
+#[bench]
+fn many_short_glob(b: &mut test::Bencher) {
+ let pats: Vec<_> = MANY_SHORT_GLOBS.iter().map(|&s| new_glob(s)).collect();
+ b.iter(|| {
+ let mut count = 0;
+ for pat in &pats {
+ if pat.matches(MANY_SHORT_SEARCH) {
+ count += 1;
+ }
+ }
+ assert_eq!(2, count);
+ })
+}
+
+#[bench]
+fn many_short_regex_set(b: &mut test::Bencher) {
+ let set = new_reglob_many(MANY_SHORT_GLOBS);
+ b.iter(|| assert_eq!(2, set.matches(MANY_SHORT_SEARCH).iter().count()));
+}
diff --git a/vendor/globset/src/glob.rs b/vendor/globset/src/glob.rs
new file mode 100644
index 000000000..6e35aeec3
--- /dev/null
+++ b/vendor/globset/src/glob.rs
@@ -0,0 +1,1528 @@
+use std::fmt;
+use std::hash;
+use std::iter;
+use std::ops::{Deref, DerefMut};
+use std::path::{is_separator, Path};
+use std::str;
+
+use regex;
+use regex::bytes::Regex;
+
+use crate::{new_regex, Candidate, Error, ErrorKind};
+
+/// Describes a matching strategy for a particular pattern.
+///
+/// This provides a way to more quickly determine whether a pattern matches
+/// a particular file path in a way that scales with a large number of
+/// patterns. For example, if many patterns are of the form `*.ext`, then it's
+/// possible to test whether any of those patterns matches by looking up a
+/// file path's extension in a hash table.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum MatchStrategy {
+ /// A pattern matches if and only if the entire file path matches this
+ /// literal string.
+ Literal(String),
+ /// A pattern matches if and only if the file path's basename matches this
+ /// literal string.
+ BasenameLiteral(String),
+ /// A pattern matches if and only if the file path's extension matches this
+ /// literal string.
+ Extension(String),
+ /// A pattern matches if and only if this prefix literal is a prefix of the
+ /// candidate file path.
+ Prefix(String),
+ /// A pattern matches if and only if this prefix literal is a prefix of the
+ /// candidate file path.
+ ///
+ /// An exception: if `component` is true, then `suffix` must appear at the
+ /// beginning of a file path or immediately following a `/`.
+ Suffix {
+ /// The actual suffix.
+ suffix: String,
+ /// Whether this must start at the beginning of a path component.
+ component: bool,
+ },
+ /// A pattern matches only if the given extension matches the file path's
+ /// extension. Note that this is a necessary but NOT sufficient criterion.
+ /// Namely, if the extension matches, then a full regex search is still
+ /// required.
+ RequiredExtension(String),
+ /// A regex needs to be used for matching.
+ Regex,
+}
+
+impl MatchStrategy {
+ /// Returns a matching strategy for the given pattern.
+ pub fn new(pat: &Glob) -> MatchStrategy {
+ if let Some(lit) = pat.basename_literal() {
+ MatchStrategy::BasenameLiteral(lit)
+ } else if let Some(lit) = pat.literal() {
+ MatchStrategy::Literal(lit)
+ } else if let Some(ext) = pat.ext() {
+ MatchStrategy::Extension(ext)
+ } else if let Some(prefix) = pat.prefix() {
+ MatchStrategy::Prefix(prefix)
+ } else if let Some((suffix, component)) = pat.suffix() {
+ MatchStrategy::Suffix { suffix: suffix, component: component }
+ } else if let Some(ext) = pat.required_ext() {
+ MatchStrategy::RequiredExtension(ext)
+ } else {
+ MatchStrategy::Regex
+ }
+ }
+}
+
+/// Glob represents a successfully parsed shell glob pattern.
+///
+/// It cannot be used directly to match file paths, but it can be converted
+/// to a regular expression string or a matcher.
+#[derive(Clone, Debug, Eq)]
+pub struct Glob {
+ glob: String,
+ re: String,
+ opts: GlobOptions,
+ tokens: Tokens,
+}
+
+impl PartialEq for Glob {
+ fn eq(&self, other: &Glob) -> bool {
+ self.glob == other.glob && self.opts == other.opts
+ }
+}
+
+impl hash::Hash for Glob {
+ fn hash<H: hash::Hasher>(&self, state: &mut H) {
+ self.glob.hash(state);
+ self.opts.hash(state);
+ }
+}
+
+impl fmt::Display for Glob {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ self.glob.fmt(f)
+ }
+}
+
+impl str::FromStr for Glob {
+ type Err = Error;
+
+ fn from_str(glob: &str) -> Result<Self, Self::Err> {
+ Self::new(glob)
+ }
+}
+
+/// A matcher for a single pattern.
+#[derive(Clone, Debug)]
+pub struct GlobMatcher {
+ /// The underlying pattern.
+ pat: Glob,
+ /// The pattern, as a compiled regex.
+ re: Regex,
+}
+
+impl GlobMatcher {
+ /// Tests whether the given path matches this pattern or not.
+ pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
+ self.is_match_candidate(&Candidate::new(path.as_ref()))
+ }
+
+ /// Tests whether the given path matches this pattern or not.
+ pub fn is_match_candidate(&self, path: &Candidate<'_>) -> bool {
+ self.re.is_match(&path.path)
+ }
+
+ /// Returns the `Glob` used to compile this matcher.
+ pub fn glob(&self) -> &Glob {
+ &self.pat
+ }
+}
+
+/// A strategic matcher for a single pattern.
+#[cfg(test)]
+#[derive(Clone, Debug)]
+struct GlobStrategic {
+ /// The match strategy to use.
+ strategy: MatchStrategy,
+ /// The pattern, as a compiled regex.
+ re: Regex,
+}
+
+#[cfg(test)]
+impl GlobStrategic {
+ /// Tests whether the given path matches this pattern or not.
+ fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
+ self.is_match_candidate(&Candidate::new(path.as_ref()))
+ }
+
+ /// Tests whether the given path matches this pattern or not.
+ fn is_match_candidate(&self, candidate: &Candidate<'_>) -> bool {
+ let byte_path = &*candidate.path;
+
+ match self.strategy {
+ MatchStrategy::Literal(ref lit) => lit.as_bytes() == byte_path,
+ MatchStrategy::BasenameLiteral(ref lit) => {
+ lit.as_bytes() == &*candidate.basename
+ }
+ MatchStrategy::Extension(ref ext) => {
+ ext.as_bytes() == &*candidate.ext
+ }
+ MatchStrategy::Prefix(ref pre) => {
+ starts_with(pre.as_bytes(), byte_path)
+ }
+ MatchStrategy::Suffix { ref suffix, component } => {
+ if component && byte_path == &suffix.as_bytes()[1..] {
+ return true;
+ }
+ ends_with(suffix.as_bytes(), byte_path)
+ }
+ MatchStrategy::RequiredExtension(ref ext) => {
+ let ext = ext.as_bytes();
+ &*candidate.ext == ext && self.re.is_match(byte_path)
+ }
+ MatchStrategy::Regex => self.re.is_match(byte_path),
+ }
+ }
+}
+
+/// A builder for a pattern.
+///
+/// This builder enables configuring the match semantics of a pattern. For
+/// example, one can make matching case insensitive.
+///
+/// The lifetime `'a` refers to the lifetime of the pattern string.
+#[derive(Clone, Debug)]
+pub struct GlobBuilder<'a> {
+ /// The glob pattern to compile.
+ glob: &'a str,
+ /// Options for the pattern.
+ opts: GlobOptions,
+}
+
+#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
+struct GlobOptions {
+ /// Whether to match case insensitively.
+ case_insensitive: bool,
+ /// Whether to require a literal separator to match a separator in a file
+ /// path. e.g., when enabled, `*` won't match `/`.
+ literal_separator: bool,
+ /// Whether or not to use `\` to escape special characters.
+ /// e.g., when enabled, `\*` will match a literal `*`.
+ backslash_escape: bool,
+}
+
+impl GlobOptions {
+ fn default() -> GlobOptions {
+ GlobOptions {
+ case_insensitive: false,
+ literal_separator: false,
+ backslash_escape: !is_separator('\\'),
+ }
+ }
+}
+
+#[derive(Clone, Debug, Default, Eq, PartialEq)]
+struct Tokens(Vec<Token>);
+
+impl Deref for Tokens {
+ type Target = Vec<Token>;
+ fn deref(&self) -> &Vec<Token> {
+ &self.0
+ }
+}
+
+impl DerefMut for Tokens {
+ fn deref_mut(&mut self) -> &mut Vec<Token> {
+ &mut self.0
+ }
+}
+
+#[derive(Clone, Debug, Eq, PartialEq)]
+enum Token {
+ Literal(char),
+ Any,
+ ZeroOrMore,
+ RecursivePrefix,
+ RecursiveSuffix,
+ RecursiveZeroOrMore,
+ Class { negated: bool, ranges: Vec<(char, char)> },
+ Alternates(Vec<Tokens>),
+}
+
+impl Glob {
+ /// Builds a new pattern with default options.
+ pub fn new(glob: &str) -> Result<Glob, Error> {
+ GlobBuilder::new(glob).build()
+ }
+
+ /// Returns a matcher for this pattern.
+ pub fn compile_matcher(&self) -> GlobMatcher {
+ let re =
+ new_regex(&self.re).expect("regex compilation shouldn't fail");
+ GlobMatcher { pat: self.clone(), re: re }
+ }
+
+ /// Returns a strategic matcher.
+ ///
+ /// This isn't exposed because it's not clear whether it's actually
+ /// faster than just running a regex for a *single* pattern. If it
+ /// is faster, then GlobMatcher should do it automatically.
+ #[cfg(test)]
+ fn compile_strategic_matcher(&self) -> GlobStrategic {
+ let strategy = MatchStrategy::new(self);
+ let re =
+ new_regex(&self.re).expect("regex compilation shouldn't fail");
+ GlobStrategic { strategy: strategy, re: re }
+ }
+
+ /// Returns the original glob pattern used to build this pattern.
+ pub fn glob(&self) -> &str {
+ &self.glob
+ }
+
+ /// Returns the regular expression string for this glob.
+ ///
+ /// Note that regular expressions for globs are intended to be matched on
+ /// arbitrary bytes (`&[u8]`) instead of Unicode strings (`&str`). In
+ /// particular, globs are frequently used on file paths, where there is no
+ /// general guarantee that file paths are themselves valid UTF-8. As a
+ /// result, callers will need to ensure that they are using a regex API
+ /// that can match on arbitrary bytes. For example, the
+ /// [`regex`](https://crates.io/regex)
+ /// crate's
+ /// [`Regex`](https://docs.rs/regex/*/regex/struct.Regex.html)
+ /// API is not suitable for this since it matches on `&str`, but its
+ /// [`bytes::Regex`](https://docs.rs/regex/*/regex/bytes/struct.Regex.html)
+ /// API is suitable for this.
+ pub fn regex(&self) -> &str {
+ &self.re
+ }
+
+ /// Returns the pattern as a literal if and only if the pattern must match
+ /// an entire path exactly.
+ ///
+ /// The basic format of these patterns is `{literal}`.
+ fn literal(&self) -> Option<String> {
+ if self.opts.case_insensitive {
+ return None;
+ }
+ let mut lit = String::new();
+ for t in &*self.tokens {
+ match *t {
+ Token::Literal(c) => lit.push(c),
+ _ => return None,
+ }
+ }
+ if lit.is_empty() {
+ None
+ } else {
+ Some(lit)
+ }
+ }
+
+ /// Returns an extension if this pattern matches a file path if and only
+ /// if the file path has the extension returned.
+ ///
+ /// Note that this extension returned differs from the extension that
+ /// std::path::Path::extension returns. Namely, this extension includes
+ /// the '.'. Also, paths like `.rs` are considered to have an extension
+ /// of `.rs`.
+ fn ext(&self) -> Option<String> {
+ if self.opts.case_insensitive {
+ return None;
+ }
+ let start = match self.tokens.get(0) {
+ Some(&Token::RecursivePrefix) => 1,
+ Some(_) => 0,
+ _ => return None,
+ };
+ match self.tokens.get(start) {
+ Some(&Token::ZeroOrMore) => {
+ // If there was no recursive prefix, then we only permit
+ // `*` if `*` can match a `/`. For example, if `*` can't
+ // match `/`, then `*.c` doesn't match `foo/bar.c`.
+ if start == 0 && self.opts.literal_separator {
+ return None;
+ }
+ }
+ _ => return None,
+ }
+ match self.tokens.get(start + 1) {
+ Some(&Token::Literal('.')) => {}
+ _ => return None,
+ }
+ let mut lit = ".".to_string();
+ for t in self.tokens[start + 2..].iter() {
+ match *t {
+ Token::Literal('.') | Token::Literal('/') => return None,
+ Token::Literal(c) => lit.push(c),
+ _ => return None,
+ }
+ }
+ if lit.is_empty() {
+ None
+ } else {
+ Some(lit)
+ }
+ }
+
+ /// This is like `ext`, but returns an extension even if it isn't sufficient
+ /// to imply a match. Namely, if an extension is returned, then it is
+ /// necessary but not sufficient for a match.
+ fn required_ext(&self) -> Option<String> {
+ if self.opts.case_insensitive {
+ return None;
+ }
+ // We don't care at all about the beginning of this pattern. All we
+ // need to check for is if it ends with a literal of the form `.ext`.
+ let mut ext: Vec<char> = vec![]; // built in reverse
+ for t in self.tokens.iter().rev() {
+ match *t {
+ Token::Literal('/') => return None,
+ Token::Literal(c) => {
+ ext.push(c);
+ if c == '.' {
+ break;
+ }
+ }
+ _ => return None,
+ }
+ }
+ if ext.last() != Some(&'.') {
+ None
+ } else {
+ ext.reverse();
+ Some(ext.into_iter().collect())
+ }
+ }
+
+ /// Returns a literal prefix of this pattern if the entire pattern matches
+ /// if the literal prefix matches.
+ fn prefix(&self) -> Option<String> {
+ if self.opts.case_insensitive {
+ return None;
+ }
+ let (end, need_sep) = match self.tokens.last() {
+ Some(&Token::ZeroOrMore) => {
+ if self.opts.literal_separator {
+ // If a trailing `*` can't match a `/`, then we can't
+ // assume a match of the prefix corresponds to a match
+ // of the overall pattern. e.g., `foo/*` with
+ // `literal_separator` enabled matches `foo/bar` but not
+ // `foo/bar/baz`, even though `foo/bar/baz` has a `foo/`
+ // literal prefix.
+ return None;
+ }
+ (self.tokens.len() - 1, false)
+ }
+ Some(&Token::RecursiveSuffix) => (self.tokens.len() - 1, true),
+ _ => (self.tokens.len(), false),
+ };
+ let mut lit = String::new();
+ for t in &self.tokens[0..end] {
+ match *t {
+ Token::Literal(c) => lit.push(c),
+ _ => return None,
+ }
+ }
+ if need_sep {
+ lit.push('/');
+ }
+ if lit.is_empty() {
+ None
+ } else {
+ Some(lit)
+ }
+ }
+
+ /// Returns a literal suffix of this pattern if the entire pattern matches
+ /// if the literal suffix matches.
+ ///
+ /// If a literal suffix is returned and it must match either the entire
+ /// file path or be preceded by a `/`, then also return true. This happens
+ /// with a pattern like `**/foo/bar`. Namely, this pattern matches
+ /// `foo/bar` and `baz/foo/bar`, but not `foofoo/bar`. In this case, the
+ /// suffix returned is `/foo/bar` (but should match the entire path
+ /// `foo/bar`).
+ ///
+ /// When this returns true, the suffix literal is guaranteed to start with
+ /// a `/`.
+ fn suffix(&self) -> Option<(String, bool)> {
+ if self.opts.case_insensitive {
+ return None;
+ }
+ let mut lit = String::new();
+ let (start, entire) = match self.tokens.get(0) {
+ Some(&Token::RecursivePrefix) => {
+ // We only care if this follows a path component if the next
+ // token is a literal.
+ if let Some(&Token::Literal(_)) = self.tokens.get(1) {
+ lit.push('/');
+ (1, true)
+ } else {
+ (1, false)
+ }
+ }
+ _ => (0, false),
+ };
+ let start = match self.tokens.get(start) {
+ Some(&Token::ZeroOrMore) => {
+ // If literal_separator is enabled, then a `*` can't
+ // necessarily match everything, so reporting a suffix match
+ // as a match of the pattern would be a false positive.
+ if self.opts.literal_separator {
+ return None;
+ }
+ start + 1
+ }
+ _ => start,
+ };
+ for t in &self.tokens[start..] {
+ match *t {
+ Token::Literal(c) => lit.push(c),
+ _ => return None,
+ }
+ }
+ if lit.is_empty() || lit == "/" {
+ None
+ } else {
+ Some((lit, entire))
+ }
+ }
+
+ /// If this pattern only needs to inspect the basename of a file path,
+ /// then the tokens corresponding to only the basename match are returned.
+ ///
+ /// For example, given a pattern of `**/*.foo`, only the tokens
+ /// corresponding to `*.foo` are returned.
+ ///
+ /// Note that this will return None if any match of the basename tokens
+ /// doesn't correspond to a match of the entire pattern. For example, the
+ /// glob `foo` only matches when a file path has a basename of `foo`, but
+ /// doesn't *always* match when a file path has a basename of `foo`. e.g.,
+ /// `foo` doesn't match `abc/foo`.
+ fn basename_tokens(&self) -> Option<&[Token]> {
+ if self.opts.case_insensitive {
+ return None;
+ }
+ let start = match self.tokens.get(0) {
+ Some(&Token::RecursivePrefix) => 1,
+ _ => {
+ // With nothing to gobble up the parent portion of a path,
+ // we can't assume that matching on only the basename is
+ // correct.
+ return None;
+ }
+ };
+ if self.tokens[start..].is_empty() {
+ return None;
+ }
+ for t in &self.tokens[start..] {
+ match *t {
+ Token::Literal('/') => return None,
+ Token::Literal(_) => {} // OK
+ Token::Any | Token::ZeroOrMore => {
+ if !self.opts.literal_separator {
+ // In this case, `*` and `?` can match a path
+ // separator, which means this could reach outside
+ // the basename.
+ return None;
+ }
+ }
+ Token::RecursivePrefix
+ | Token::RecursiveSuffix
+ | Token::RecursiveZeroOrMore => {
+ return None;
+ }
+ Token::Class { .. } | Token::Alternates(..) => {
+ // We *could* be a little smarter here, but either one
+ // of these is going to prevent our literal optimizations
+ // anyway, so give up.
+ return None;
+ }
+ }
+ }
+ Some(&self.tokens[start..])
+ }
+
+ /// Returns the pattern as a literal if and only if the pattern exclusively
+ /// matches the basename of a file path *and* is a literal.
+ ///
+ /// The basic format of these patterns is `**/{literal}`, where `{literal}`
+ /// does not contain a path separator.
+ fn basename_literal(&self) -> Option<String> {
+ let tokens = match self.basename_tokens() {
+ None => return None,
+ Some(tokens) => tokens,
+ };
+ let mut lit = String::new();
+ for t in tokens {
+ match *t {
+ Token::Literal(c) => lit.push(c),
+ _ => return None,
+ }
+ }
+ Some(lit)
+ }
+}
+
+impl<'a> GlobBuilder<'a> {
+ /// Create a new builder for the pattern given.
+ ///
+ /// The pattern is not compiled until `build` is called.
+ pub fn new(glob: &'a str) -> GlobBuilder<'a> {
+ GlobBuilder { glob: glob, opts: GlobOptions::default() }
+ }
+
+ /// Parses and builds the pattern.
+ pub fn build(&self) -> Result<Glob, Error> {
+ let mut p = Parser {
+ glob: &self.glob,
+ stack: vec![Tokens::default()],
+ chars: self.glob.chars().peekable(),
+ prev: None,
+ cur: None,
+ opts: &self.opts,
+ };
+ p.parse()?;
+ if p.stack.is_empty() {
+ Err(Error {
+ glob: Some(self.glob.to_string()),
+ kind: ErrorKind::UnopenedAlternates,
+ })
+ } else if p.stack.len() > 1 {
+ Err(Error {
+ glob: Some(self.glob.to_string()),
+ kind: ErrorKind::UnclosedAlternates,
+ })
+ } else {
+ let tokens = p.stack.pop().unwrap();
+ Ok(Glob {
+ glob: self.glob.to_string(),
+ re: tokens.to_regex_with(&self.opts),
+ opts: self.opts,
+ tokens: tokens,
+ })
+ }
+ }
+
+ /// Toggle whether the pattern matches case insensitively or not.
+ ///
+ /// This is disabled by default.
+ pub fn case_insensitive(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
+ self.opts.case_insensitive = yes;
+ self
+ }
+
+ /// Toggle whether a literal `/` is required to match a path separator.
+ ///
+ /// By default this is false: `*` and `?` will match `/`.
+ pub fn literal_separator(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
+ self.opts.literal_separator = yes;
+ self
+ }
+
+ /// When enabled, a back slash (`\`) may be used to escape
+ /// special characters in a glob pattern. Additionally, this will
+ /// prevent `\` from being interpreted as a path separator on all
+ /// platforms.
+ ///
+ /// This is enabled by default on platforms where `\` is not a
+ /// path separator and disabled by default on platforms where `\`
+ /// is a path separator.
+ pub fn backslash_escape(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
+ self.opts.backslash_escape = yes;
+ self
+ }
+}
+
+impl Tokens {
+ /// Convert this pattern to a string that is guaranteed to be a valid
+ /// regular expression and will represent the matching semantics of this
+ /// glob pattern and the options given.
+ fn to_regex_with(&self, options: &GlobOptions) -> String {
+ let mut re = String::new();
+ re.push_str("(?-u)");
+ if options.case_insensitive {
+ re.push_str("(?i)");
+ }
+ re.push('^');
+ // Special case. If the entire glob is just `**`, then it should match
+ // everything.
+ if self.len() == 1 && self[0] == Token::RecursivePrefix {
+ re.push_str(".*");
+ re.push('$');
+ return re;
+ }
+ self.tokens_to_regex(options, &self, &mut re);
+ re.push('$');
+ re
+ }
+
+ fn tokens_to_regex(
+ &self,
+ options: &GlobOptions,
+ tokens: &[Token],
+ re: &mut String,
+ ) {
+ for tok in tokens {
+ match *tok {
+ Token::Literal(c) => {
+ re.push_str(&char_to_escaped_literal(c));
+ }
+ Token::Any => {
+ if options.literal_separator {
+ re.push_str("[^/]");
+ } else {
+ re.push_str(".");
+ }
+ }
+ Token::ZeroOrMore => {
+ if options.literal_separator {
+ re.push_str("[^/]*");
+ } else {
+ re.push_str(".*");
+ }
+ }
+ Token::RecursivePrefix => {
+ re.push_str("(?:/?|.*/)");
+ }
+ Token::RecursiveSuffix => {
+ re.push_str("/.*");
+ }
+ Token::RecursiveZeroOrMore => {
+ re.push_str("(?:/|/.*/)");
+ }
+ Token::Class { negated, ref ranges } => {
+ re.push('[');
+ if negated {
+ re.push('^');
+ }
+ for r in ranges {
+ if r.0 == r.1 {
+ // Not strictly necessary, but nicer to look at.
+ re.push_str(&char_to_escaped_literal(r.0));
+ } else {
+ re.push_str(&char_to_escaped_literal(r.0));
+ re.push('-');
+ re.push_str(&char_to_escaped_literal(r.1));
+ }
+ }
+ re.push(']');
+ }
+ Token::Alternates(ref patterns) => {
+ let mut parts = vec![];
+ for pat in patterns {
+ let mut altre = String::new();
+ self.tokens_to_regex(options, &pat, &mut altre);
+ if !altre.is_empty() {
+ parts.push(altre);
+ }
+ }
+
+ // It is possible to have an empty set in which case the
+ // resulting alternation '()' would be an error.
+ if !parts.is_empty() {
+ re.push('(');
+ re.push_str(&parts.join("|"));
+ re.push(')');
+ }
+ }
+ }
+ }
+ }
+}
+
+/// Convert a Unicode scalar value to an escaped string suitable for use as
+/// a literal in a non-Unicode regex.
+fn char_to_escaped_literal(c: char) -> String {
+ bytes_to_escaped_literal(&c.to_string().into_bytes())
+}
+
+/// Converts an arbitrary sequence of bytes to a UTF-8 string. All non-ASCII
+/// code units are converted to their escaped form.
+fn bytes_to_escaped_literal(bs: &[u8]) -> String {
+ let mut s = String::with_capacity(bs.len());
+ for &b in bs {
+ if b <= 0x7F {
+ s.push_str(&regex::escape(&(b as char).to_string()));
+ } else {
+ s.push_str(&format!("\\x{:02x}", b));
+ }
+ }
+ s
+}
+
+struct Parser<'a> {
+ glob: &'a str,
+ stack: Vec<Tokens>,
+ chars: iter::Peekable<str::Chars<'a>>,
+ prev: Option<char>,
+ cur: Option<char>,
+ opts: &'a GlobOptions,
+}
+
+impl<'a> Parser<'a> {
+ fn error(&self, kind: ErrorKind) -> Error {
+ Error { glob: Some(self.glob.to_string()), kind: kind }
+ }
+
+ fn parse(&mut self) -> Result<(), Error> {
+ while let Some(c) = self.bump() {
+ match c {
+ '?' => self.push_token(Token::Any)?,
+ '*' => self.parse_star()?,
+ '[' => self.parse_class()?,
+ '{' => self.push_alternate()?,
+ '}' => self.pop_alternate()?,
+ ',' => self.parse_comma()?,
+ '\\' => self.parse_backslash()?,
+ c => self.push_token(Token::Literal(c))?,
+ }
+ }
+ Ok(())
+ }
+
+ fn push_alternate(&mut self) -> Result<(), Error> {
+ if self.stack.len() > 1 {
+ return Err(self.error(ErrorKind::NestedAlternates));
+ }
+ Ok(self.stack.push(Tokens::default()))
+ }
+
+ fn pop_alternate(&mut self) -> Result<(), Error> {
+ let mut alts = vec![];
+ while self.stack.len() >= 2 {
+ alts.push(self.stack.pop().unwrap());
+ }
+ self.push_token(Token::Alternates(alts))
+ }
+
+ fn push_token(&mut self, tok: Token) -> Result<(), Error> {
+ if let Some(ref mut pat) = self.stack.last_mut() {
+ return Ok(pat.push(tok));
+ }
+ Err(self.error(ErrorKind::UnopenedAlternates))
+ }
+
+ fn pop_token(&mut self) -> Result<Token, Error> {
+ if let Some(ref mut pat) = self.stack.last_mut() {
+ return Ok(pat.pop().unwrap());
+ }
+ Err(self.error(ErrorKind::UnopenedAlternates))
+ }
+
+ fn have_tokens(&self) -> Result<bool, Error> {
+ match self.stack.last() {
+ None => Err(self.error(ErrorKind::UnopenedAlternates)),
+ Some(ref pat) => Ok(!pat.is_empty()),
+ }
+ }
+
+ fn parse_comma(&mut self) -> Result<(), Error> {
+ // If we aren't inside a group alternation, then don't
+ // treat commas specially. Otherwise, we need to start
+ // a new alternate.
+ if self.stack.len() <= 1 {
+ self.push_token(Token::Literal(','))
+ } else {
+ Ok(self.stack.push(Tokens::default()))
+ }
+ }
+
+ fn parse_backslash(&mut self) -> Result<(), Error> {
+ if self.opts.backslash_escape {
+ match self.bump() {
+ None => Err(self.error(ErrorKind::DanglingEscape)),
+ Some(c) => self.push_token(Token::Literal(c)),
+ }
+ } else if is_separator('\\') {
+ // Normalize all patterns to use / as a separator.
+ self.push_token(Token::Literal('/'))
+ } else {
+ self.push_token(Token::Literal('\\'))
+ }
+ }
+
+ fn parse_star(&mut self) -> Result<(), Error> {
+ let prev = self.prev;
+ if self.peek() != Some('*') {
+ self.push_token(Token::ZeroOrMore)?;
+ return Ok(());
+ }
+ assert!(self.bump() == Some('*'));
+ if !self.have_tokens()? {
+ if !self.peek().map_or(true, is_separator) {
+ self.push_token(Token::ZeroOrMore)?;
+ self.push_token(Token::ZeroOrMore)?;
+ } else {
+ self.push_token(Token::RecursivePrefix)?;
+ assert!(self.bump().map_or(true, is_separator));
+ }
+ return Ok(());
+ }
+
+ if !prev.map(is_separator).unwrap_or(false) {
+ if self.stack.len() <= 1
+ || (prev != Some(',') && prev != Some('{'))
+ {
+ self.push_token(Token::ZeroOrMore)?;
+ self.push_token(Token::ZeroOrMore)?;
+ return Ok(());
+ }
+ }
+ let is_suffix = match self.peek() {
+ None => {
+ assert!(self.bump().is_none());
+ true
+ }
+ Some(',') | Some('}') if self.stack.len() >= 2 => true,
+ Some(c) if is_separator(c) => {
+ assert!(self.bump().map(is_separator).unwrap_or(false));
+ false
+ }
+ _ => {
+ self.push_token(Token::ZeroOrMore)?;
+ self.push_token(Token::ZeroOrMore)?;
+ return Ok(());
+ }
+ };
+ match self.pop_token()? {
+ Token::RecursivePrefix => {
+ self.push_token(Token::RecursivePrefix)?;
+ }
+ Token::RecursiveSuffix => {
+ self.push_token(Token::RecursiveSuffix)?;
+ }
+ _ => {
+ if is_suffix {
+ self.push_token(Token::RecursiveSuffix)?;
+ } else {
+ self.push_token(Token::RecursiveZeroOrMore)?;
+ }
+ }
+ }
+ Ok(())
+ }
+
+ fn parse_class(&mut self) -> Result<(), Error> {
+ fn add_to_last_range(
+ glob: &str,
+ r: &mut (char, char),
+ add: char,
+ ) -> Result<(), Error> {
+ r.1 = add;
+ if r.1 < r.0 {
+ Err(Error {
+ glob: Some(glob.to_string()),
+ kind: ErrorKind::InvalidRange(r.0, r.1),
+ })
+ } else {
+ Ok(())
+ }
+ }
+ let mut ranges = vec![];
+ let negated = match self.chars.peek() {
+ Some(&'!') | Some(&'^') => {
+ let bump = self.bump();
+ assert!(bump == Some('!') || bump == Some('^'));
+ true
+ }
+ _ => false,
+ };
+ let mut first = true;
+ let mut in_range = false;
+ loop {
+ let c = match self.bump() {
+ Some(c) => c,
+ // The only way to successfully break this loop is to observe
+ // a ']'.
+ None => return Err(self.error(ErrorKind::UnclosedClass)),
+ };
+ match c {
+ ']' => {
+ if first {
+ ranges.push((']', ']'));
+ } else {
+ break;
+ }
+ }
+ '-' => {
+ if first {
+ ranges.push(('-', '-'));
+ } else if in_range {
+ // invariant: in_range is only set when there is
+ // already at least one character seen.
+ let r = ranges.last_mut().unwrap();
+ add_to_last_range(&self.glob, r, '-')?;
+ in_range = false;
+ } else {
+ assert!(!ranges.is_empty());
+ in_range = true;
+ }
+ }
+ c => {
+ if in_range {
+ // invariant: in_range is only set when there is
+ // already at least one character seen.
+ add_to_last_range(
+ &self.glob,
+ ranges.last_mut().unwrap(),
+ c,
+ )?;
+ } else {
+ ranges.push((c, c));
+ }
+ in_range = false;
+ }
+ }
+ first = false;
+ }
+ if in_range {
+ // Means that the last character in the class was a '-', so add
+ // it as a literal.
+ ranges.push(('-', '-'));
+ }
+ self.push_token(Token::Class { negated: negated, ranges: ranges })
+ }
+
+ fn bump(&mut self) -> Option<char> {
+ self.prev = self.cur;
+ self.cur = self.chars.next();
+ self.cur
+ }
+
+ fn peek(&mut self) -> Option<char> {
+ self.chars.peek().map(|&ch| ch)
+ }
+}
+
+#[cfg(test)]
+fn starts_with(needle: &[u8], haystack: &[u8]) -> bool {
+ needle.len() <= haystack.len() && needle == &haystack[..needle.len()]
+}
+
+#[cfg(test)]
+fn ends_with(needle: &[u8], haystack: &[u8]) -> bool {
+ if needle.len() > haystack.len() {
+ return false;
+ }
+ needle == &haystack[haystack.len() - needle.len()..]
+}
+
+#[cfg(test)]
+mod tests {
+ use super::Token::*;
+ use super::{Glob, GlobBuilder, Token};
+ use crate::{ErrorKind, GlobSetBuilder};
+
+ #[derive(Clone, Copy, Debug, Default)]
+ struct Options {
+ casei: Option<bool>,
+ litsep: Option<bool>,
+ bsesc: Option<bool>,
+ }
+
+ macro_rules! syntax {
+ ($name:ident, $pat:expr, $tokens:expr) => {
+ #[test]
+ fn $name() {
+ let pat = Glob::new($pat).unwrap();
+ assert_eq!($tokens, pat.tokens.0);
+ }
+ };
+ }
+
+ macro_rules! syntaxerr {
+ ($name:ident, $pat:expr, $err:expr) => {
+ #[test]
+ fn $name() {
+ let err = Glob::new($pat).unwrap_err();
+ assert_eq!(&$err, err.kind());
+ }
+ };
+ }
+
+ macro_rules! toregex {
+ ($name:ident, $pat:expr, $re:expr) => {
+ toregex!($name, $pat, $re, Options::default());
+ };
+ ($name:ident, $pat:expr, $re:expr, $options:expr) => {
+ #[test]
+ fn $name() {
+ let mut builder = GlobBuilder::new($pat);
+ if let Some(casei) = $options.casei {
+ builder.case_insensitive(casei);
+ }
+ if let Some(litsep) = $options.litsep {
+ builder.literal_separator(litsep);
+ }
+ if let Some(bsesc) = $options.bsesc {
+ builder.backslash_escape(bsesc);
+ }
+ let pat = builder.build().unwrap();
+ assert_eq!(format!("(?-u){}", $re), pat.regex());
+ }
+ };
+ }
+
+ macro_rules! matches {
+ ($name:ident, $pat:expr, $path:expr) => {
+ matches!($name, $pat, $path, Options::default());
+ };
+ ($name:ident, $pat:expr, $path:expr, $options:expr) => {
+ #[test]
+ fn $name() {
+ let mut builder = GlobBuilder::new($pat);
+ if let Some(casei) = $options.casei {
+ builder.case_insensitive(casei);
+ }
+ if let Some(litsep) = $options.litsep {
+ builder.literal_separator(litsep);
+ }
+ if let Some(bsesc) = $options.bsesc {
+ builder.backslash_escape(bsesc);
+ }
+ let pat = builder.build().unwrap();
+ let matcher = pat.compile_matcher();
+ let strategic = pat.compile_strategic_matcher();
+ let set = GlobSetBuilder::new().add(pat).build().unwrap();
+ assert!(matcher.is_match($path));
+ assert!(strategic.is_match($path));
+ assert!(set.is_match($path));
+ }
+ };
+ }
+
+ macro_rules! nmatches {
+ ($name:ident, $pat:expr, $path:expr) => {
+ nmatches!($name, $pat, $path, Options::default());
+ };
+ ($name:ident, $pat:expr, $path:expr, $options:expr) => {
+ #[test]
+ fn $name() {
+ let mut builder = GlobBuilder::new($pat);
+ if let Some(casei) = $options.casei {
+ builder.case_insensitive(casei);
+ }
+ if let Some(litsep) = $options.litsep {
+ builder.literal_separator(litsep);
+ }
+ if let Some(bsesc) = $options.bsesc {
+ builder.backslash_escape(bsesc);
+ }
+ let pat = builder.build().unwrap();
+ let matcher = pat.compile_matcher();
+ let strategic = pat.compile_strategic_matcher();
+ let set = GlobSetBuilder::new().add(pat).build().unwrap();
+ assert!(!matcher.is_match($path));
+ assert!(!strategic.is_match($path));
+ assert!(!set.is_match($path));
+ }
+ };
+ }
+
+ fn s(string: &str) -> String {
+ string.to_string()
+ }
+
+ fn class(s: char, e: char) -> Token {
+ Class { negated: false, ranges: vec![(s, e)] }
+ }
+
+ fn classn(s: char, e: char) -> Token {
+ Class { negated: true, ranges: vec![(s, e)] }
+ }
+
+ fn rclass(ranges: &[(char, char)]) -> Token {
+ Class { negated: false, ranges: ranges.to_vec() }
+ }
+
+ fn rclassn(ranges: &[(char, char)]) -> Token {
+ Class { negated: true, ranges: ranges.to_vec() }
+ }
+
+ syntax!(literal1, "a", vec![Literal('a')]);
+ syntax!(literal2, "ab", vec![Literal('a'), Literal('b')]);
+ syntax!(any1, "?", vec![Any]);
+ syntax!(any2, "a?b", vec![Literal('a'), Any, Literal('b')]);
+ syntax!(seq1, "*", vec![ZeroOrMore]);
+ syntax!(seq2, "a*b", vec![Literal('a'), ZeroOrMore, Literal('b')]);
+ syntax!(
+ seq3,
+ "*a*b*",
+ vec![ZeroOrMore, Literal('a'), ZeroOrMore, Literal('b'), ZeroOrMore,]
+ );
+ syntax!(rseq1, "**", vec![RecursivePrefix]);
+ syntax!(rseq2, "**/", vec![RecursivePrefix]);
+ syntax!(rseq3, "/**", vec![RecursiveSuffix]);
+ syntax!(rseq4, "/**/", vec![RecursiveZeroOrMore]);
+ syntax!(
+ rseq5,
+ "a/**/b",
+ vec![Literal('a'), RecursiveZeroOrMore, Literal('b'),]
+ );
+ syntax!(cls1, "[a]", vec![class('a', 'a')]);
+ syntax!(cls2, "[!a]", vec![classn('a', 'a')]);
+ syntax!(cls3, "[a-z]", vec![class('a', 'z')]);
+ syntax!(cls4, "[!a-z]", vec![classn('a', 'z')]);
+ syntax!(cls5, "[-]", vec![class('-', '-')]);
+ syntax!(cls6, "[]]", vec![class(']', ']')]);
+ syntax!(cls7, "[*]", vec![class('*', '*')]);
+ syntax!(cls8, "[!!]", vec![classn('!', '!')]);
+ syntax!(cls9, "[a-]", vec![rclass(&[('a', 'a'), ('-', '-')])]);
+ syntax!(cls10, "[-a-z]", vec![rclass(&[('-', '-'), ('a', 'z')])]);
+ syntax!(cls11, "[a-z-]", vec![rclass(&[('a', 'z'), ('-', '-')])]);
+ syntax!(
+ cls12,
+ "[-a-z-]",
+ vec![rclass(&[('-', '-'), ('a', 'z'), ('-', '-')]),]
+ );
+ syntax!(cls13, "[]-z]", vec![class(']', 'z')]);
+ syntax!(cls14, "[--z]", vec![class('-', 'z')]);
+ syntax!(cls15, "[ --]", vec![class(' ', '-')]);
+ syntax!(cls16, "[0-9a-z]", vec![rclass(&[('0', '9'), ('a', 'z')])]);
+ syntax!(cls17, "[a-z0-9]", vec![rclass(&[('a', 'z'), ('0', '9')])]);
+ syntax!(cls18, "[!0-9a-z]", vec![rclassn(&[('0', '9'), ('a', 'z')])]);
+ syntax!(cls19, "[!a-z0-9]", vec![rclassn(&[('a', 'z'), ('0', '9')])]);
+ syntax!(cls20, "[^a]", vec![classn('a', 'a')]);
+ syntax!(cls21, "[^a-z]", vec![classn('a', 'z')]);
+
+ syntaxerr!(err_unclosed1, "[", ErrorKind::UnclosedClass);
+ syntaxerr!(err_unclosed2, "[]", ErrorKind::UnclosedClass);
+ syntaxerr!(err_unclosed3, "[!", ErrorKind::UnclosedClass);
+ syntaxerr!(err_unclosed4, "[!]", ErrorKind::UnclosedClass);
+ syntaxerr!(err_range1, "[z-a]", ErrorKind::InvalidRange('z', 'a'));
+ syntaxerr!(err_range2, "[z--]", ErrorKind::InvalidRange('z', '-'));
+
+ const CASEI: Options =
+ Options { casei: Some(true), litsep: None, bsesc: None };
+ const SLASHLIT: Options =
+ Options { casei: None, litsep: Some(true), bsesc: None };
+ const NOBSESC: Options =
+ Options { casei: None, litsep: None, bsesc: Some(false) };
+ const BSESC: Options =
+ Options { casei: None, litsep: None, bsesc: Some(true) };
+
+ toregex!(re_casei, "a", "(?i)^a$", &CASEI);
+
+ toregex!(re_slash1, "?", r"^[^/]$", SLASHLIT);
+ toregex!(re_slash2, "*", r"^[^/]*$", SLASHLIT);
+
+ toregex!(re1, "a", "^a$");
+ toregex!(re2, "?", "^.$");
+ toregex!(re3, "*", "^.*$");
+ toregex!(re4, "a?", "^a.$");
+ toregex!(re5, "?a", "^.a$");
+ toregex!(re6, "a*", "^a.*$");
+ toregex!(re7, "*a", "^.*a$");
+ toregex!(re8, "[*]", r"^[\*]$");
+ toregex!(re9, "[+]", r"^[\+]$");
+ toregex!(re10, "+", r"^\+$");
+ toregex!(re11, "☃", r"^\xe2\x98\x83$");
+ toregex!(re12, "**", r"^.*$");
+ toregex!(re13, "**/", r"^.*$");
+ toregex!(re14, "**/*", r"^(?:/?|.*/).*$");
+ toregex!(re15, "**/**", r"^.*$");
+ toregex!(re16, "**/**/*", r"^(?:/?|.*/).*$");
+ toregex!(re17, "**/**/**", r"^.*$");
+ toregex!(re18, "**/**/**/*", r"^(?:/?|.*/).*$");
+ toregex!(re19, "a/**", r"^a/.*$");
+ toregex!(re20, "a/**/**", r"^a/.*$");
+ toregex!(re21, "a/**/**/**", r"^a/.*$");
+ toregex!(re22, "a/**/b", r"^a(?:/|/.*/)b$");
+ toregex!(re23, "a/**/**/b", r"^a(?:/|/.*/)b$");
+ toregex!(re24, "a/**/**/**/b", r"^a(?:/|/.*/)b$");
+ toregex!(re25, "**/b", r"^(?:/?|.*/)b$");
+ toregex!(re26, "**/**/b", r"^(?:/?|.*/)b$");
+ toregex!(re27, "**/**/**/b", r"^(?:/?|.*/)b$");
+ toregex!(re28, "a**", r"^a.*.*$");
+ toregex!(re29, "**a", r"^.*.*a$");
+ toregex!(re30, "a**b", r"^a.*.*b$");
+ toregex!(re31, "***", r"^.*.*.*$");
+ toregex!(re32, "/a**", r"^/a.*.*$");
+ toregex!(re33, "/**a", r"^/.*.*a$");
+ toregex!(re34, "/a**b", r"^/a.*.*b$");
+
+ matches!(match1, "a", "a");
+ matches!(match2, "a*b", "a_b");
+ matches!(match3, "a*b*c", "abc");
+ matches!(match4, "a*b*c", "a_b_c");
+ matches!(match5, "a*b*c", "a___b___c");
+ matches!(match6, "abc*abc*abc", "abcabcabcabcabcabcabc");
+ matches!(match7, "a*a*a*a*a*a*a*a*a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
+ matches!(match8, "a*b[xyz]c*d", "abxcdbxcddd");
+ matches!(match9, "*.rs", ".rs");
+ matches!(match10, "☃", "☃");
+
+ matches!(matchrec1, "some/**/needle.txt", "some/needle.txt");
+ matches!(matchrec2, "some/**/needle.txt", "some/one/needle.txt");
+ matches!(matchrec3, "some/**/needle.txt", "some/one/two/needle.txt");
+ matches!(matchrec4, "some/**/needle.txt", "some/other/needle.txt");
+ matches!(matchrec5, "**", "abcde");
+ matches!(matchrec6, "**", "");
+ matches!(matchrec7, "**", ".asdf");
+ matches!(matchrec8, "**", "/x/.asdf");
+ matches!(matchrec9, "some/**/**/needle.txt", "some/needle.txt");
+ matches!(matchrec10, "some/**/**/needle.txt", "some/one/needle.txt");
+ matches!(matchrec11, "some/**/**/needle.txt", "some/one/two/needle.txt");
+ matches!(matchrec12, "some/**/**/needle.txt", "some/other/needle.txt");
+ matches!(matchrec13, "**/test", "one/two/test");
+ matches!(matchrec14, "**/test", "one/test");
+ matches!(matchrec15, "**/test", "test");
+ matches!(matchrec16, "/**/test", "/one/two/test");
+ matches!(matchrec17, "/**/test", "/one/test");
+ matches!(matchrec18, "/**/test", "/test");
+ matches!(matchrec19, "**/.*", ".abc");
+ matches!(matchrec20, "**/.*", "abc/.abc");
+ matches!(matchrec21, "**/foo/bar", "foo/bar");
+ matches!(matchrec22, ".*/**", ".abc/abc");
+ matches!(matchrec23, "test/**", "test/");
+ matches!(matchrec24, "test/**", "test/one");
+ matches!(matchrec25, "test/**", "test/one/two");
+ matches!(matchrec26, "some/*/needle.txt", "some/one/needle.txt");
+
+ matches!(matchrange1, "a[0-9]b", "a0b");
+ matches!(matchrange2, "a[0-9]b", "a9b");
+ matches!(matchrange3, "a[!0-9]b", "a_b");
+ matches!(matchrange4, "[a-z123]", "1");
+ matches!(matchrange5, "[1a-z23]", "1");
+ matches!(matchrange6, "[123a-z]", "1");
+ matches!(matchrange7, "[abc-]", "-");
+ matches!(matchrange8, "[-abc]", "-");
+ matches!(matchrange9, "[-a-c]", "b");
+ matches!(matchrange10, "[a-c-]", "b");
+ matches!(matchrange11, "[-]", "-");
+ matches!(matchrange12, "a[^0-9]b", "a_b");
+
+ matches!(matchpat1, "*hello.txt", "hello.txt");
+ matches!(matchpat2, "*hello.txt", "gareth_says_hello.txt");
+ matches!(matchpat3, "*hello.txt", "some/path/to/hello.txt");
+ matches!(matchpat4, "*hello.txt", "some\\path\\to\\hello.txt");
+ matches!(matchpat5, "*hello.txt", "/an/absolute/path/to/hello.txt");
+ matches!(matchpat6, "*some/path/to/hello.txt", "some/path/to/hello.txt");
+ matches!(
+ matchpat7,
+ "*some/path/to/hello.txt",
+ "a/bigger/some/path/to/hello.txt"
+ );
+
+ matches!(matchescape, "_[[]_[]]_[?]_[*]_!_", "_[_]_?_*_!_");
+
+ matches!(matchcasei1, "aBcDeFg", "aBcDeFg", CASEI);
+ matches!(matchcasei2, "aBcDeFg", "abcdefg", CASEI);
+ matches!(matchcasei3, "aBcDeFg", "ABCDEFG", CASEI);
+ matches!(matchcasei4, "aBcDeFg", "AbCdEfG", CASEI);
+
+ matches!(matchalt1, "a,b", "a,b");
+ matches!(matchalt2, ",", ",");
+ matches!(matchalt3, "{a,b}", "a");
+ matches!(matchalt4, "{a,b}", "b");
+ matches!(matchalt5, "{**/src/**,foo}", "abc/src/bar");
+ matches!(matchalt6, "{**/src/**,foo}", "foo");
+ matches!(matchalt7, "{[}],foo}", "}");
+ matches!(matchalt8, "{foo}", "foo");
+ matches!(matchalt9, "{}", "");
+ matches!(matchalt10, "{,}", "");
+ matches!(matchalt11, "{*.foo,*.bar,*.wat}", "test.foo");
+ matches!(matchalt12, "{*.foo,*.bar,*.wat}", "test.bar");
+ matches!(matchalt13, "{*.foo,*.bar,*.wat}", "test.wat");
+
+ matches!(matchslash1, "abc/def", "abc/def", SLASHLIT);
+ #[cfg(unix)]
+ nmatches!(matchslash2, "abc?def", "abc/def", SLASHLIT);
+ #[cfg(not(unix))]
+ nmatches!(matchslash2, "abc?def", "abc\\def", SLASHLIT);
+ nmatches!(matchslash3, "abc*def", "abc/def", SLASHLIT);
+ matches!(matchslash4, "abc[/]def", "abc/def", SLASHLIT); // differs
+ #[cfg(unix)]
+ nmatches!(matchslash5, "abc\\def", "abc/def", SLASHLIT);
+ #[cfg(not(unix))]
+ matches!(matchslash5, "abc\\def", "abc/def", SLASHLIT);
+
+ matches!(matchbackslash1, "\\[", "[", BSESC);
+ matches!(matchbackslash2, "\\?", "?", BSESC);
+ matches!(matchbackslash3, "\\*", "*", BSESC);
+ matches!(matchbackslash4, "\\[a-z]", "\\a", NOBSESC);
+ matches!(matchbackslash5, "\\?", "\\a", NOBSESC);
+ matches!(matchbackslash6, "\\*", "\\\\", NOBSESC);
+ #[cfg(unix)]
+ matches!(matchbackslash7, "\\a", "a");
+ #[cfg(not(unix))]
+ matches!(matchbackslash8, "\\a", "/a");
+
+ nmatches!(matchnot1, "a*b*c", "abcd");
+ nmatches!(matchnot2, "abc*abc*abc", "abcabcabcabcabcabcabca");
+ nmatches!(matchnot3, "some/**/needle.txt", "some/other/notthis.txt");
+ nmatches!(matchnot4, "some/**/**/needle.txt", "some/other/notthis.txt");
+ nmatches!(matchnot5, "/**/test", "test");
+ nmatches!(matchnot6, "/**/test", "/one/notthis");
+ nmatches!(matchnot7, "/**/test", "/notthis");
+ nmatches!(matchnot8, "**/.*", "ab.c");
+ nmatches!(matchnot9, "**/.*", "abc/ab.c");
+ nmatches!(matchnot10, ".*/**", "a.bc");
+ nmatches!(matchnot11, ".*/**", "abc/a.bc");
+ nmatches!(matchnot12, "a[0-9]b", "a_b");
+ nmatches!(matchnot13, "a[!0-9]b", "a0b");
+ nmatches!(matchnot14, "a[!0-9]b", "a9b");
+ nmatches!(matchnot15, "[!-]", "-");
+ nmatches!(matchnot16, "*hello.txt", "hello.txt-and-then-some");
+ nmatches!(matchnot17, "*hello.txt", "goodbye.txt");
+ nmatches!(
+ matchnot18,
+ "*some/path/to/hello.txt",
+ "some/path/to/hello.txt-and-then-some"
+ );
+ nmatches!(
+ matchnot19,
+ "*some/path/to/hello.txt",
+ "some/other/path/to/hello.txt"
+ );
+ nmatches!(matchnot20, "a", "foo/a");
+ nmatches!(matchnot21, "./foo", "foo");
+ nmatches!(matchnot22, "**/foo", "foofoo");
+ nmatches!(matchnot23, "**/foo/bar", "foofoo/bar");
+ nmatches!(matchnot24, "/*.c", "mozilla-sha1/sha1.c");
+ nmatches!(matchnot25, "*.c", "mozilla-sha1/sha1.c", SLASHLIT);
+ nmatches!(
+ matchnot26,
+ "**/m4/ltoptions.m4",
+ "csharp/src/packages/repositories.config",
+ SLASHLIT
+ );
+ nmatches!(matchnot27, "a[^0-9]b", "a0b");
+ nmatches!(matchnot28, "a[^0-9]b", "a9b");
+ nmatches!(matchnot29, "[^-]", "-");
+ nmatches!(matchnot30, "some/*/needle.txt", "some/needle.txt");
+ nmatches!(
+ matchrec31,
+ "some/*/needle.txt",
+ "some/one/two/needle.txt",
+ SLASHLIT
+ );
+ nmatches!(
+ matchrec32,
+ "some/*/needle.txt",
+ "some/one/two/three/needle.txt",
+ SLASHLIT
+ );
+ nmatches!(matchrec33, ".*/**", ".abc");
+ nmatches!(matchrec34, "foo/**", "foo");
+
+ macro_rules! extract {
+ ($which:ident, $name:ident, $pat:expr, $expect:expr) => {
+ extract!($which, $name, $pat, $expect, Options::default());
+ };
+ ($which:ident, $name:ident, $pat:expr, $expect:expr, $options:expr) => {
+ #[test]
+ fn $name() {
+ let mut builder = GlobBuilder::new($pat);
+ if let Some(casei) = $options.casei {
+ builder.case_insensitive(casei);
+ }
+ if let Some(litsep) = $options.litsep {
+ builder.literal_separator(litsep);
+ }
+ if let Some(bsesc) = $options.bsesc {
+ builder.backslash_escape(bsesc);
+ }
+ let pat = builder.build().unwrap();
+ assert_eq!($expect, pat.$which());
+ }
+ };
+ }
+
+ macro_rules! literal {
+ ($($tt:tt)*) => { extract!(literal, $($tt)*); }
+ }
+
+ macro_rules! basetokens {
+ ($($tt:tt)*) => { extract!(basename_tokens, $($tt)*); }
+ }
+
+ macro_rules! ext {
+ ($($tt:tt)*) => { extract!(ext, $($tt)*); }
+ }
+
+ macro_rules! required_ext {
+ ($($tt:tt)*) => { extract!(required_ext, $($tt)*); }
+ }
+
+ macro_rules! prefix {
+ ($($tt:tt)*) => { extract!(prefix, $($tt)*); }
+ }
+
+ macro_rules! suffix {
+ ($($tt:tt)*) => { extract!(suffix, $($tt)*); }
+ }
+
+ macro_rules! baseliteral {
+ ($($tt:tt)*) => { extract!(basename_literal, $($tt)*); }
+ }
+
+ literal!(extract_lit1, "foo", Some(s("foo")));
+ literal!(extract_lit2, "foo", None, CASEI);
+ literal!(extract_lit3, "/foo", Some(s("/foo")));
+ literal!(extract_lit4, "/foo/", Some(s("/foo/")));
+ literal!(extract_lit5, "/foo/bar", Some(s("/foo/bar")));
+ literal!(extract_lit6, "*.foo", None);
+ literal!(extract_lit7, "foo/bar", Some(s("foo/bar")));
+ literal!(extract_lit8, "**/foo/bar", None);
+
+ basetokens!(
+ extract_basetoks1,
+ "**/foo",
+ Some(&*vec![Literal('f'), Literal('o'), Literal('o'),])
+ );
+ basetokens!(extract_basetoks2, "**/foo", None, CASEI);
+ basetokens!(
+ extract_basetoks3,
+ "**/foo",
+ Some(&*vec![Literal('f'), Literal('o'), Literal('o'),]),
+ SLASHLIT
+ );
+ basetokens!(extract_basetoks4, "*foo", None, SLASHLIT);
+ basetokens!(extract_basetoks5, "*foo", None);
+ basetokens!(extract_basetoks6, "**/fo*o", None);
+ basetokens!(
+ extract_basetoks7,
+ "**/fo*o",
+ Some(&*vec![Literal('f'), Literal('o'), ZeroOrMore, Literal('o'),]),
+ SLASHLIT
+ );
+
+ ext!(extract_ext1, "**/*.rs", Some(s(".rs")));
+ ext!(extract_ext2, "**/*.rs.bak", None);
+ ext!(extract_ext3, "*.rs", Some(s(".rs")));
+ ext!(extract_ext4, "a*.rs", None);
+ ext!(extract_ext5, "/*.c", None);
+ ext!(extract_ext6, "*.c", None, SLASHLIT);
+ ext!(extract_ext7, "*.c", Some(s(".c")));
+
+ required_ext!(extract_req_ext1, "*.rs", Some(s(".rs")));
+ required_ext!(extract_req_ext2, "/foo/bar/*.rs", Some(s(".rs")));
+ required_ext!(extract_req_ext3, "/foo/bar/*.rs", Some(s(".rs")));
+ required_ext!(extract_req_ext4, "/foo/bar/.rs", Some(s(".rs")));
+ required_ext!(extract_req_ext5, ".rs", Some(s(".rs")));
+ required_ext!(extract_req_ext6, "./rs", None);
+ required_ext!(extract_req_ext7, "foo", None);
+ required_ext!(extract_req_ext8, ".foo/", None);
+ required_ext!(extract_req_ext9, "foo/", None);
+
+ prefix!(extract_prefix1, "/foo", Some(s("/foo")));
+ prefix!(extract_prefix2, "/foo/*", Some(s("/foo/")));
+ prefix!(extract_prefix3, "**/foo", None);
+ prefix!(extract_prefix4, "foo/**", Some(s("foo/")));
+
+ suffix!(extract_suffix1, "**/foo/bar", Some((s("/foo/bar"), true)));
+ suffix!(extract_suffix2, "*/foo/bar", Some((s("/foo/bar"), false)));
+ suffix!(extract_suffix3, "*/foo/bar", None, SLASHLIT);
+ suffix!(extract_suffix4, "foo/bar", Some((s("foo/bar"), false)));
+ suffix!(extract_suffix5, "*.foo", Some((s(".foo"), false)));
+ suffix!(extract_suffix6, "*.foo", None, SLASHLIT);
+ suffix!(extract_suffix7, "**/*_test", Some((s("_test"), false)));
+
+ baseliteral!(extract_baselit1, "**/foo", Some(s("foo")));
+ baseliteral!(extract_baselit2, "foo", None);
+ baseliteral!(extract_baselit3, "*foo", None);
+ baseliteral!(extract_baselit4, "*/foo", None);
+}
diff --git a/vendor/globset/src/lib.rs b/vendor/globset/src/lib.rs
new file mode 100644
index 000000000..c8072b2db
--- /dev/null
+++ b/vendor/globset/src/lib.rs
@@ -0,0 +1,912 @@
+/*!
+The globset crate provides cross platform single glob and glob set matching.
+
+Glob set matching is the process of matching one or more glob patterns against
+a single candidate path simultaneously, and returning all of the globs that
+matched. For example, given this set of globs:
+
+```ignore
+*.rs
+src/lib.rs
+src/**/foo.rs
+```
+
+and a path `src/bar/baz/foo.rs`, then the set would report the first and third
+globs as matching.
+
+# Example: one glob
+
+This example shows how to match a single glob against a single file path.
+
+```
+# fn example() -> Result<(), globset::Error> {
+use globset::Glob;
+
+let glob = Glob::new("*.rs")?.compile_matcher();
+
+assert!(glob.is_match("foo.rs"));
+assert!(glob.is_match("foo/bar.rs"));
+assert!(!glob.is_match("Cargo.toml"));
+# Ok(()) } example().unwrap();
+```
+
+# Example: configuring a glob matcher
+
+This example shows how to use a `GlobBuilder` to configure aspects of match
+semantics. In this example, we prevent wildcards from matching path separators.
+
+```
+# fn example() -> Result<(), globset::Error> {
+use globset::GlobBuilder;
+
+let glob = GlobBuilder::new("*.rs")
+ .literal_separator(true).build()?.compile_matcher();
+
+assert!(glob.is_match("foo.rs"));
+assert!(!glob.is_match("foo/bar.rs")); // no longer matches
+assert!(!glob.is_match("Cargo.toml"));
+# Ok(()) } example().unwrap();
+```
+
+# Example: match multiple globs at once
+
+This example shows how to match multiple glob patterns at once.
+
+```
+# fn example() -> Result<(), globset::Error> {
+use globset::{Glob, GlobSetBuilder};
+
+let mut builder = GlobSetBuilder::new();
+// A GlobBuilder can be used to configure each glob's match semantics
+// independently.
+builder.add(Glob::new("*.rs")?);
+builder.add(Glob::new("src/lib.rs")?);
+builder.add(Glob::new("src/**/foo.rs")?);
+let set = builder.build()?;
+
+assert_eq!(set.matches("src/bar/baz/foo.rs"), vec![0, 2]);
+# Ok(()) } example().unwrap();
+```
+
+# Syntax
+
+Standard Unix-style glob syntax is supported:
+
+* `?` matches any single character. (If the `literal_separator` option is
+ enabled, then `?` can never match a path separator.)
+* `*` matches zero or more characters. (If the `literal_separator` option is
+ enabled, then `*` can never match a path separator.)
+* `**` recursively matches directories but are only legal in three situations.
+ First, if the glob starts with <code>\*\*&#x2F;</code>, then it matches
+ all directories. For example, <code>\*\*&#x2F;foo</code> matches `foo`
+ and `bar/foo` but not `foo/bar`. Secondly, if the glob ends with
+ <code>&#x2F;\*\*</code>, then it matches all sub-entries. For example,
+ <code>foo&#x2F;\*\*</code> matches `foo/a` and `foo/a/b`, but not `foo`.
+ Thirdly, if the glob contains <code>&#x2F;\*\*&#x2F;</code> anywhere within
+ the pattern, then it matches zero or more directories. Using `**` anywhere
+ else is illegal (N.B. the glob `**` is allowed and means "match everything").
+* `{a,b}` matches `a` or `b` where `a` and `b` are arbitrary glob patterns.
+ (N.B. Nesting `{...}` is not currently allowed.)
+* `[ab]` matches `a` or `b` where `a` and `b` are characters. Use
+ `[!ab]` to match any character except for `a` and `b`.
+* Metacharacters such as `*` and `?` can be escaped with character class
+ notation. e.g., `[*]` matches `*`.
+* When backslash escapes are enabled, a backslash (`\`) will escape all meta
+ characters in a glob. If it precedes a non-meta character, then the slash is
+ ignored. A `\\` will match a literal `\\`. Note that this mode is only
+ enabled on Unix platforms by default, but can be enabled on any platform
+ via the `backslash_escape` setting on `Glob`.
+
+A `GlobBuilder` can be used to prevent wildcards from matching path separators,
+or to enable case insensitive matching.
+*/
+
+#![deny(missing_docs)]
+
+use std::borrow::Cow;
+use std::collections::{BTreeMap, HashMap};
+use std::error::Error as StdError;
+use std::fmt;
+use std::hash;
+use std::path::Path;
+use std::str;
+
+use aho_corasick::AhoCorasick;
+use bstr::{ByteSlice, ByteVec, B};
+use regex::bytes::{Regex, RegexBuilder, RegexSet};
+
+use crate::glob::MatchStrategy;
+pub use crate::glob::{Glob, GlobBuilder, GlobMatcher};
+use crate::pathutil::{file_name, file_name_ext, normalize_path};
+
+mod glob;
+mod pathutil;
+
+#[cfg(feature = "serde1")]
+mod serde_impl;
+
+#[cfg(feature = "log")]
+macro_rules! debug {
+ ($($token:tt)*) => (::log::debug!($($token)*);)
+}
+
+#[cfg(not(feature = "log"))]
+macro_rules! debug {
+ ($($token:tt)*) => {};
+}
+
+/// Represents an error that can occur when parsing a glob pattern.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Error {
+ /// The original glob provided by the caller.
+ glob: Option<String>,
+ /// The kind of error.
+ kind: ErrorKind,
+}
+
+/// The kind of error that can occur when parsing a glob pattern.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum ErrorKind {
+ /// **DEPRECATED**.
+ ///
+ /// This error used to occur for consistency with git's glob specification,
+ /// but the specification now accepts all uses of `**`. When `**` does not
+ /// appear adjacent to a path separator or at the beginning/end of a glob,
+ /// it is now treated as two consecutive `*` patterns. As such, this error
+ /// is no longer used.
+ InvalidRecursive,
+ /// Occurs when a character class (e.g., `[abc]`) is not closed.
+ UnclosedClass,
+ /// Occurs when a range in a character (e.g., `[a-z]`) is invalid. For
+ /// example, if the range starts with a lexicographically larger character
+ /// than it ends with.
+ InvalidRange(char, char),
+ /// Occurs when a `}` is found without a matching `{`.
+ UnopenedAlternates,
+ /// Occurs when a `{` is found without a matching `}`.
+ UnclosedAlternates,
+ /// Occurs when an alternating group is nested inside another alternating
+ /// group, e.g., `{{a,b},{c,d}}`.
+ NestedAlternates,
+ /// Occurs when an unescaped '\' is found at the end of a glob.
+ DanglingEscape,
+ /// An error associated with parsing or compiling a regex.
+ Regex(String),
+ /// Hints that destructuring should not be exhaustive.
+ ///
+ /// This enum may grow additional variants, so this makes sure clients
+ /// don't count on exhaustive matching. (Otherwise, adding a new variant
+ /// could break existing code.)
+ #[doc(hidden)]
+ __Nonexhaustive,
+}
+
+impl StdError for Error {
+ fn description(&self) -> &str {
+ self.kind.description()
+ }
+}
+
+impl Error {
+ /// Return the glob that caused this error, if one exists.
+ pub fn glob(&self) -> Option<&str> {
+ self.glob.as_ref().map(|s| &**s)
+ }
+
+ /// Return the kind of this error.
+ pub fn kind(&self) -> &ErrorKind {
+ &self.kind
+ }
+}
+
+impl ErrorKind {
+ fn description(&self) -> &str {
+ match *self {
+ ErrorKind::InvalidRecursive => {
+ "invalid use of **; must be one path component"
+ }
+ ErrorKind::UnclosedClass => {
+ "unclosed character class; missing ']'"
+ }
+ ErrorKind::InvalidRange(_, _) => "invalid character range",
+ ErrorKind::UnopenedAlternates => {
+ "unopened alternate group; missing '{' \
+ (maybe escape '}' with '[}]'?)"
+ }
+ ErrorKind::UnclosedAlternates => {
+ "unclosed alternate group; missing '}' \
+ (maybe escape '{' with '[{]'?)"
+ }
+ ErrorKind::NestedAlternates => {
+ "nested alternate groups are not allowed"
+ }
+ ErrorKind::DanglingEscape => "dangling '\\'",
+ ErrorKind::Regex(ref err) => err,
+ ErrorKind::__Nonexhaustive => unreachable!(),
+ }
+ }
+}
+
+impl fmt::Display for Error {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match self.glob {
+ None => self.kind.fmt(f),
+ Some(ref glob) => {
+ write!(f, "error parsing glob '{}': {}", glob, self.kind)
+ }
+ }
+ }
+}
+
+impl fmt::Display for ErrorKind {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match *self {
+ ErrorKind::InvalidRecursive
+ | ErrorKind::UnclosedClass
+ | ErrorKind::UnopenedAlternates
+ | ErrorKind::UnclosedAlternates
+ | ErrorKind::NestedAlternates
+ | ErrorKind::DanglingEscape
+ | ErrorKind::Regex(_) => write!(f, "{}", self.description()),
+ ErrorKind::InvalidRange(s, e) => {
+ write!(f, "invalid range; '{}' > '{}'", s, e)
+ }
+ ErrorKind::__Nonexhaustive => unreachable!(),
+ }
+ }
+}
+
+fn new_regex(pat: &str) -> Result<Regex, Error> {
+ RegexBuilder::new(pat)
+ .dot_matches_new_line(true)
+ .size_limit(10 * (1 << 20))
+ .dfa_size_limit(10 * (1 << 20))
+ .build()
+ .map_err(|err| Error {
+ glob: Some(pat.to_string()),
+ kind: ErrorKind::Regex(err.to_string()),
+ })
+}
+
+fn new_regex_set<I, S>(pats: I) -> Result<RegexSet, Error>
+where
+ S: AsRef<str>,
+ I: IntoIterator<Item = S>,
+{
+ RegexSet::new(pats).map_err(|err| Error {
+ glob: None,
+ kind: ErrorKind::Regex(err.to_string()),
+ })
+}
+
+type Fnv = hash::BuildHasherDefault<fnv::FnvHasher>;
+
+/// GlobSet represents a group of globs that can be matched together in a
+/// single pass.
+#[derive(Clone, Debug)]
+pub struct GlobSet {
+ len: usize,
+ strats: Vec<GlobSetMatchStrategy>,
+}
+
+impl GlobSet {
+ /// Create an empty `GlobSet`. An empty set matches nothing.
+ #[inline]
+ pub fn empty() -> GlobSet {
+ GlobSet { len: 0, strats: vec![] }
+ }
+
+ /// Returns true if this set is empty, and therefore matches nothing.
+ #[inline]
+ pub fn is_empty(&self) -> bool {
+ self.len == 0
+ }
+
+ /// Returns the number of globs in this set.
+ #[inline]
+ pub fn len(&self) -> usize {
+ self.len
+ }
+
+ /// Returns true if any glob in this set matches the path given.
+ pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
+ self.is_match_candidate(&Candidate::new(path.as_ref()))
+ }
+
+ /// Returns true if any glob in this set matches the path given.
+ ///
+ /// This takes a Candidate as input, which can be used to amortize the
+ /// cost of preparing a path for matching.
+ pub fn is_match_candidate(&self, path: &Candidate<'_>) -> bool {
+ if self.is_empty() {
+ return false;
+ }
+ for strat in &self.strats {
+ if strat.is_match(path) {
+ return true;
+ }
+ }
+ false
+ }
+
+ /// Returns the sequence number of every glob pattern that matches the
+ /// given path.
+ pub fn matches<P: AsRef<Path>>(&self, path: P) -> Vec<usize> {
+ self.matches_candidate(&Candidate::new(path.as_ref()))
+ }
+
+ /// Returns the sequence number of every glob pattern that matches the
+ /// given path.
+ ///
+ /// This takes a Candidate as input, which can be used to amortize the
+ /// cost of preparing a path for matching.
+ pub fn matches_candidate(&self, path: &Candidate<'_>) -> Vec<usize> {
+ let mut into = vec![];
+ if self.is_empty() {
+ return into;
+ }
+ self.matches_candidate_into(path, &mut into);
+ into
+ }
+
+ /// Adds the sequence number of every glob pattern that matches the given
+ /// path to the vec given.
+ ///
+ /// `into` is cleared before matching begins, and contains the set of
+ /// sequence numbers (in ascending order) after matching ends. If no globs
+ /// were matched, then `into` will be empty.
+ pub fn matches_into<P: AsRef<Path>>(
+ &self,
+ path: P,
+ into: &mut Vec<usize>,
+ ) {
+ self.matches_candidate_into(&Candidate::new(path.as_ref()), into);
+ }
+
+ /// Adds the sequence number of every glob pattern that matches the given
+ /// path to the vec given.
+ ///
+ /// `into` is cleared before matching begins, and contains the set of
+ /// sequence numbers (in ascending order) after matching ends. If no globs
+ /// were matched, then `into` will be empty.
+ ///
+ /// This takes a Candidate as input, which can be used to amortize the
+ /// cost of preparing a path for matching.
+ pub fn matches_candidate_into(
+ &self,
+ path: &Candidate<'_>,
+ into: &mut Vec<usize>,
+ ) {
+ into.clear();
+ if self.is_empty() {
+ return;
+ }
+ for strat in &self.strats {
+ strat.matches_into(path, into);
+ }
+ into.sort();
+ into.dedup();
+ }
+
+ fn new(pats: &[Glob]) -> Result<GlobSet, Error> {
+ if pats.is_empty() {
+ return Ok(GlobSet { len: 0, strats: vec![] });
+ }
+ let mut lits = LiteralStrategy::new();
+ let mut base_lits = BasenameLiteralStrategy::new();
+ let mut exts = ExtensionStrategy::new();
+ let mut prefixes = MultiStrategyBuilder::new();
+ let mut suffixes = MultiStrategyBuilder::new();
+ let mut required_exts = RequiredExtensionStrategyBuilder::new();
+ let mut regexes = MultiStrategyBuilder::new();
+ for (i, p) in pats.iter().enumerate() {
+ match MatchStrategy::new(p) {
+ MatchStrategy::Literal(lit) => {
+ lits.add(i, lit);
+ }
+ MatchStrategy::BasenameLiteral(lit) => {
+ base_lits.add(i, lit);
+ }
+ MatchStrategy::Extension(ext) => {
+ exts.add(i, ext);
+ }
+ MatchStrategy::Prefix(prefix) => {
+ prefixes.add(i, prefix);
+ }
+ MatchStrategy::Suffix { suffix, component } => {
+ if component {
+ lits.add(i, suffix[1..].to_string());
+ }
+ suffixes.add(i, suffix);
+ }
+ MatchStrategy::RequiredExtension(ext) => {
+ required_exts.add(i, ext, p.regex().to_owned());
+ }
+ MatchStrategy::Regex => {
+ debug!("glob converted to regex: {:?}", p);
+ regexes.add(i, p.regex().to_owned());
+ }
+ }
+ }
+ debug!(
+ "built glob set; {} literals, {} basenames, {} extensions, \
+ {} prefixes, {} suffixes, {} required extensions, {} regexes",
+ lits.0.len(),
+ base_lits.0.len(),
+ exts.0.len(),
+ prefixes.literals.len(),
+ suffixes.literals.len(),
+ required_exts.0.len(),
+ regexes.literals.len()
+ );
+ Ok(GlobSet {
+ len: pats.len(),
+ strats: vec![
+ GlobSetMatchStrategy::Extension(exts),
+ GlobSetMatchStrategy::BasenameLiteral(base_lits),
+ GlobSetMatchStrategy::Literal(lits),
+ GlobSetMatchStrategy::Suffix(suffixes.suffix()),
+ GlobSetMatchStrategy::Prefix(prefixes.prefix()),
+ GlobSetMatchStrategy::RequiredExtension(
+ required_exts.build()?,
+ ),
+ GlobSetMatchStrategy::Regex(regexes.regex_set()?),
+ ],
+ })
+ }
+}
+
+impl Default for GlobSet {
+ /// Create a default empty GlobSet.
+ fn default() -> Self {
+ GlobSet::empty()
+ }
+}
+
+/// GlobSetBuilder builds a group of patterns that can be used to
+/// simultaneously match a file path.
+#[derive(Clone, Debug)]
+pub struct GlobSetBuilder {
+ pats: Vec<Glob>,
+}
+
+impl GlobSetBuilder {
+ /// Create a new GlobSetBuilder. A GlobSetBuilder can be used to add new
+ /// patterns. Once all patterns have been added, `build` should be called
+ /// to produce a `GlobSet`, which can then be used for matching.
+ pub fn new() -> GlobSetBuilder {
+ GlobSetBuilder { pats: vec![] }
+ }
+
+ /// Builds a new matcher from all of the glob patterns added so far.
+ ///
+ /// Once a matcher is built, no new patterns can be added to it.
+ pub fn build(&self) -> Result<GlobSet, Error> {
+ GlobSet::new(&self.pats)
+ }
+
+ /// Add a new pattern to this set.
+ pub fn add(&mut self, pat: Glob) -> &mut GlobSetBuilder {
+ self.pats.push(pat);
+ self
+ }
+}
+
+/// A candidate path for matching.
+///
+/// All glob matching in this crate operates on `Candidate` values.
+/// Constructing candidates has a very small cost associated with it, so
+/// callers may find it beneficial to amortize that cost when matching a single
+/// path against multiple globs or sets of globs.
+#[derive(Clone, Debug)]
+pub struct Candidate<'a> {
+ path: Cow<'a, [u8]>,
+ basename: Cow<'a, [u8]>,
+ ext: Cow<'a, [u8]>,
+}
+
+impl<'a> Candidate<'a> {
+ /// Create a new candidate for matching from the given path.
+ pub fn new<P: AsRef<Path> + ?Sized>(path: &'a P) -> Candidate<'a> {
+ let path = normalize_path(Vec::from_path_lossy(path.as_ref()));
+ let basename = file_name(&path).unwrap_or(Cow::Borrowed(B("")));
+ let ext = file_name_ext(&basename).unwrap_or(Cow::Borrowed(B("")));
+ Candidate { path: path, basename: basename, ext: ext }
+ }
+
+ fn path_prefix(&self, max: usize) -> &[u8] {
+ if self.path.len() <= max {
+ &*self.path
+ } else {
+ &self.path[..max]
+ }
+ }
+
+ fn path_suffix(&self, max: usize) -> &[u8] {
+ if self.path.len() <= max {
+ &*self.path
+ } else {
+ &self.path[self.path.len() - max..]
+ }
+ }
+}
+
+#[derive(Clone, Debug)]
+enum GlobSetMatchStrategy {
+ Literal(LiteralStrategy),
+ BasenameLiteral(BasenameLiteralStrategy),
+ Extension(ExtensionStrategy),
+ Prefix(PrefixStrategy),
+ Suffix(SuffixStrategy),
+ RequiredExtension(RequiredExtensionStrategy),
+ Regex(RegexSetStrategy),
+}
+
+impl GlobSetMatchStrategy {
+ fn is_match(&self, candidate: &Candidate<'_>) -> bool {
+ use self::GlobSetMatchStrategy::*;
+ match *self {
+ Literal(ref s) => s.is_match(candidate),
+ BasenameLiteral(ref s) => s.is_match(candidate),
+ Extension(ref s) => s.is_match(candidate),
+ Prefix(ref s) => s.is_match(candidate),
+ Suffix(ref s) => s.is_match(candidate),
+ RequiredExtension(ref s) => s.is_match(candidate),
+ Regex(ref s) => s.is_match(candidate),
+ }
+ }
+
+ fn matches_into(
+ &self,
+ candidate: &Candidate<'_>,
+ matches: &mut Vec<usize>,
+ ) {
+ use self::GlobSetMatchStrategy::*;
+ match *self {
+ Literal(ref s) => s.matches_into(candidate, matches),
+ BasenameLiteral(ref s) => s.matches_into(candidate, matches),
+ Extension(ref s) => s.matches_into(candidate, matches),
+ Prefix(ref s) => s.matches_into(candidate, matches),
+ Suffix(ref s) => s.matches_into(candidate, matches),
+ RequiredExtension(ref s) => s.matches_into(candidate, matches),
+ Regex(ref s) => s.matches_into(candidate, matches),
+ }
+ }
+}
+
+#[derive(Clone, Debug)]
+struct LiteralStrategy(BTreeMap<Vec<u8>, Vec<usize>>);
+
+impl LiteralStrategy {
+ fn new() -> LiteralStrategy {
+ LiteralStrategy(BTreeMap::new())
+ }
+
+ fn add(&mut self, global_index: usize, lit: String) {
+ self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index);
+ }
+
+ fn is_match(&self, candidate: &Candidate<'_>) -> bool {
+ self.0.contains_key(candidate.path.as_bytes())
+ }
+
+ #[inline(never)]
+ fn matches_into(
+ &self,
+ candidate: &Candidate<'_>,
+ matches: &mut Vec<usize>,
+ ) {
+ if let Some(hits) = self.0.get(candidate.path.as_bytes()) {
+ matches.extend(hits);
+ }
+ }
+}
+
+#[derive(Clone, Debug)]
+struct BasenameLiteralStrategy(BTreeMap<Vec<u8>, Vec<usize>>);
+
+impl BasenameLiteralStrategy {
+ fn new() -> BasenameLiteralStrategy {
+ BasenameLiteralStrategy(BTreeMap::new())
+ }
+
+ fn add(&mut self, global_index: usize, lit: String) {
+ self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index);
+ }
+
+ fn is_match(&self, candidate: &Candidate<'_>) -> bool {
+ if candidate.basename.is_empty() {
+ return false;
+ }
+ self.0.contains_key(candidate.basename.as_bytes())
+ }
+
+ #[inline(never)]
+ fn matches_into(
+ &self,
+ candidate: &Candidate<'_>,
+ matches: &mut Vec<usize>,
+ ) {
+ if candidate.basename.is_empty() {
+ return;
+ }
+ if let Some(hits) = self.0.get(candidate.basename.as_bytes()) {
+ matches.extend(hits);
+ }
+ }
+}
+
+#[derive(Clone, Debug)]
+struct ExtensionStrategy(HashMap<Vec<u8>, Vec<usize>, Fnv>);
+
+impl ExtensionStrategy {
+ fn new() -> ExtensionStrategy {
+ ExtensionStrategy(HashMap::with_hasher(Fnv::default()))
+ }
+
+ fn add(&mut self, global_index: usize, ext: String) {
+ self.0.entry(ext.into_bytes()).or_insert(vec![]).push(global_index);
+ }
+
+ fn is_match(&self, candidate: &Candidate<'_>) -> bool {
+ if candidate.ext.is_empty() {
+ return false;
+ }
+ self.0.contains_key(candidate.ext.as_bytes())
+ }
+
+ #[inline(never)]
+ fn matches_into(
+ &self,
+ candidate: &Candidate<'_>,
+ matches: &mut Vec<usize>,
+ ) {
+ if candidate.ext.is_empty() {
+ return;
+ }
+ if let Some(hits) = self.0.get(candidate.ext.as_bytes()) {
+ matches.extend(hits);
+ }
+ }
+}
+
+#[derive(Clone, Debug)]
+struct PrefixStrategy {
+ matcher: AhoCorasick,
+ map: Vec<usize>,
+ longest: usize,
+}
+
+impl PrefixStrategy {
+ fn is_match(&self, candidate: &Candidate<'_>) -> bool {
+ let path = candidate.path_prefix(self.longest);
+ for m in self.matcher.find_overlapping_iter(path) {
+ if m.start() == 0 {
+ return true;
+ }
+ }
+ false
+ }
+
+ fn matches_into(
+ &self,
+ candidate: &Candidate<'_>,
+ matches: &mut Vec<usize>,
+ ) {
+ let path = candidate.path_prefix(self.longest);
+ for m in self.matcher.find_overlapping_iter(path) {
+ if m.start() == 0 {
+ matches.push(self.map[m.pattern()]);
+ }
+ }
+ }
+}
+
+#[derive(Clone, Debug)]
+struct SuffixStrategy {
+ matcher: AhoCorasick,
+ map: Vec<usize>,
+ longest: usize,
+}
+
+impl SuffixStrategy {
+ fn is_match(&self, candidate: &Candidate<'_>) -> bool {
+ let path = candidate.path_suffix(self.longest);
+ for m in self.matcher.find_overlapping_iter(path) {
+ if m.end() == path.len() {
+ return true;
+ }
+ }
+ false
+ }
+
+ fn matches_into(
+ &self,
+ candidate: &Candidate<'_>,
+ matches: &mut Vec<usize>,
+ ) {
+ let path = candidate.path_suffix(self.longest);
+ for m in self.matcher.find_overlapping_iter(path) {
+ if m.end() == path.len() {
+ matches.push(self.map[m.pattern()]);
+ }
+ }
+ }
+}
+
+#[derive(Clone, Debug)]
+struct RequiredExtensionStrategy(HashMap<Vec<u8>, Vec<(usize, Regex)>, Fnv>);
+
+impl RequiredExtensionStrategy {
+ fn is_match(&self, candidate: &Candidate<'_>) -> bool {
+ if candidate.ext.is_empty() {
+ return false;
+ }
+ match self.0.get(candidate.ext.as_bytes()) {
+ None => false,
+ Some(regexes) => {
+ for &(_, ref re) in regexes {
+ if re.is_match(candidate.path.as_bytes()) {
+ return true;
+ }
+ }
+ false
+ }
+ }
+ }
+
+ #[inline(never)]
+ fn matches_into(
+ &self,
+ candidate: &Candidate<'_>,
+ matches: &mut Vec<usize>,
+ ) {
+ if candidate.ext.is_empty() {
+ return;
+ }
+ if let Some(regexes) = self.0.get(candidate.ext.as_bytes()) {
+ for &(global_index, ref re) in regexes {
+ if re.is_match(candidate.path.as_bytes()) {
+ matches.push(global_index);
+ }
+ }
+ }
+ }
+}
+
+#[derive(Clone, Debug)]
+struct RegexSetStrategy {
+ matcher: RegexSet,
+ map: Vec<usize>,
+}
+
+impl RegexSetStrategy {
+ fn is_match(&self, candidate: &Candidate<'_>) -> bool {
+ self.matcher.is_match(candidate.path.as_bytes())
+ }
+
+ fn matches_into(
+ &self,
+ candidate: &Candidate<'_>,
+ matches: &mut Vec<usize>,
+ ) {
+ for i in self.matcher.matches(candidate.path.as_bytes()) {
+ matches.push(self.map[i]);
+ }
+ }
+}
+
+#[derive(Clone, Debug)]
+struct MultiStrategyBuilder {
+ literals: Vec<String>,
+ map: Vec<usize>,
+ longest: usize,
+}
+
+impl MultiStrategyBuilder {
+ fn new() -> MultiStrategyBuilder {
+ MultiStrategyBuilder { literals: vec![], map: vec![], longest: 0 }
+ }
+
+ fn add(&mut self, global_index: usize, literal: String) {
+ if literal.len() > self.longest {
+ self.longest = literal.len();
+ }
+ self.map.push(global_index);
+ self.literals.push(literal);
+ }
+
+ fn prefix(self) -> PrefixStrategy {
+ PrefixStrategy {
+ matcher: AhoCorasick::new_auto_configured(&self.literals),
+ map: self.map,
+ longest: self.longest,
+ }
+ }
+
+ fn suffix(self) -> SuffixStrategy {
+ SuffixStrategy {
+ matcher: AhoCorasick::new_auto_configured(&self.literals),
+ map: self.map,
+ longest: self.longest,
+ }
+ }
+
+ fn regex_set(self) -> Result<RegexSetStrategy, Error> {
+ Ok(RegexSetStrategy {
+ matcher: new_regex_set(self.literals)?,
+ map: self.map,
+ })
+ }
+}
+
+#[derive(Clone, Debug)]
+struct RequiredExtensionStrategyBuilder(
+ HashMap<Vec<u8>, Vec<(usize, String)>>,
+);
+
+impl RequiredExtensionStrategyBuilder {
+ fn new() -> RequiredExtensionStrategyBuilder {
+ RequiredExtensionStrategyBuilder(HashMap::new())
+ }
+
+ fn add(&mut self, global_index: usize, ext: String, regex: String) {
+ self.0
+ .entry(ext.into_bytes())
+ .or_insert(vec![])
+ .push((global_index, regex));
+ }
+
+ fn build(self) -> Result<RequiredExtensionStrategy, Error> {
+ let mut exts = HashMap::with_hasher(Fnv::default());
+ for (ext, regexes) in self.0.into_iter() {
+ exts.insert(ext.clone(), vec![]);
+ for (global_index, regex) in regexes {
+ let compiled = new_regex(&regex)?;
+ exts.get_mut(&ext).unwrap().push((global_index, compiled));
+ }
+ }
+ Ok(RequiredExtensionStrategy(exts))
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::{GlobSet, GlobSetBuilder};
+ use crate::glob::Glob;
+
+ #[test]
+ fn set_works() {
+ let mut builder = GlobSetBuilder::new();
+ builder.add(Glob::new("src/**/*.rs").unwrap());
+ builder.add(Glob::new("*.c").unwrap());
+ builder.add(Glob::new("src/lib.rs").unwrap());
+ let set = builder.build().unwrap();
+
+ assert!(set.is_match("foo.c"));
+ assert!(set.is_match("src/foo.c"));
+ assert!(!set.is_match("foo.rs"));
+ assert!(!set.is_match("tests/foo.rs"));
+ assert!(set.is_match("src/foo.rs"));
+ assert!(set.is_match("src/grep/src/main.rs"));
+
+ let matches = set.matches("src/lib.rs");
+ assert_eq!(2, matches.len());
+ assert_eq!(0, matches[0]);
+ assert_eq!(2, matches[1]);
+ }
+
+ #[test]
+ fn empty_set_works() {
+ let set = GlobSetBuilder::new().build().unwrap();
+ assert!(!set.is_match(""));
+ assert!(!set.is_match("a"));
+ }
+
+ #[test]
+ fn default_set_is_empty_works() {
+ let set: GlobSet = Default::default();
+ assert!(!set.is_match(""));
+ assert!(!set.is_match("a"));
+ }
+}
diff --git a/vendor/globset/src/pathutil.rs b/vendor/globset/src/pathutil.rs
new file mode 100644
index 000000000..2bd34e1dd
--- /dev/null
+++ b/vendor/globset/src/pathutil.rs
@@ -0,0 +1,129 @@
+use std::borrow::Cow;
+
+use bstr::{ByteSlice, ByteVec};
+
+/// The final component of the path, if it is a normal file.
+///
+/// If the path terminates in ., .., or consists solely of a root of prefix,
+/// file_name will return None.
+pub fn file_name<'a>(path: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> {
+ if path.is_empty() {
+ return None;
+ } else if path.last_byte() == Some(b'.') {
+ return None;
+ }
+ let last_slash = path.rfind_byte(b'/').map(|i| i + 1).unwrap_or(0);
+ Some(match *path {
+ Cow::Borrowed(path) => Cow::Borrowed(&path[last_slash..]),
+ Cow::Owned(ref path) => {
+ let mut path = path.clone();
+ path.drain_bytes(..last_slash);
+ Cow::Owned(path)
+ }
+ })
+}
+
+/// Return a file extension given a path's file name.
+///
+/// Note that this does NOT match the semantics of std::path::Path::extension.
+/// Namely, the extension includes the `.` and matching is otherwise more
+/// liberal. Specifically, the extenion is:
+///
+/// * None, if the file name given is empty;
+/// * None, if there is no embedded `.`;
+/// * Otherwise, the portion of the file name starting with the final `.`.
+///
+/// e.g., A file name of `.rs` has an extension `.rs`.
+///
+/// N.B. This is done to make certain glob match optimizations easier. Namely,
+/// a pattern like `*.rs` is obviously trying to match files with a `rs`
+/// extension, but it also matches files like `.rs`, which doesn't have an
+/// extension according to std::path::Path::extension.
+pub fn file_name_ext<'a>(name: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> {
+ if name.is_empty() {
+ return None;
+ }
+ let last_dot_at = match name.rfind_byte(b'.') {
+ None => return None,
+ Some(i) => i,
+ };
+ Some(match *name {
+ Cow::Borrowed(name) => Cow::Borrowed(&name[last_dot_at..]),
+ Cow::Owned(ref name) => {
+ let mut name = name.clone();
+ name.drain_bytes(..last_dot_at);
+ Cow::Owned(name)
+ }
+ })
+}
+
+/// Normalizes a path to use `/` as a separator everywhere, even on platforms
+/// that recognize other characters as separators.
+#[cfg(unix)]
+pub fn normalize_path(path: Cow<'_, [u8]>) -> Cow<'_, [u8]> {
+ // UNIX only uses /, so we're good.
+ path
+}
+
+/// Normalizes a path to use `/` as a separator everywhere, even on platforms
+/// that recognize other characters as separators.
+#[cfg(not(unix))]
+pub fn normalize_path(mut path: Cow<[u8]>) -> Cow<[u8]> {
+ use std::path::is_separator;
+
+ for i in 0..path.len() {
+ if path[i] == b'/' || !is_separator(path[i] as char) {
+ continue;
+ }
+ path.to_mut()[i] = b'/';
+ }
+ path
+}
+
+#[cfg(test)]
+mod tests {
+ use std::borrow::Cow;
+
+ use bstr::{ByteVec, B};
+
+ use super::{file_name_ext, normalize_path};
+
+ macro_rules! ext {
+ ($name:ident, $file_name:expr, $ext:expr) => {
+ #[test]
+ fn $name() {
+ let bs = Vec::from($file_name);
+ let got = file_name_ext(&Cow::Owned(bs));
+ assert_eq!($ext.map(|s| Cow::Borrowed(B(s))), got);
+ }
+ };
+ }
+
+ ext!(ext1, "foo.rs", Some(".rs"));
+ ext!(ext2, ".rs", Some(".rs"));
+ ext!(ext3, "..rs", Some(".rs"));
+ ext!(ext4, "", None::<&str>);
+ ext!(ext5, "foo", None::<&str>);
+
+ macro_rules! normalize {
+ ($name:ident, $path:expr, $expected:expr) => {
+ #[test]
+ fn $name() {
+ let bs = Vec::from_slice($path);
+ let got = normalize_path(Cow::Owned(bs));
+ assert_eq!($expected.to_vec(), got.into_owned());
+ }
+ };
+ }
+
+ normalize!(normal1, b"foo", b"foo");
+ normalize!(normal2, b"foo/bar", b"foo/bar");
+ #[cfg(unix)]
+ normalize!(normal3, b"foo\\bar", b"foo\\bar");
+ #[cfg(not(unix))]
+ normalize!(normal3, b"foo\\bar", b"foo/bar");
+ #[cfg(unix)]
+ normalize!(normal4, b"foo\\bar/baz", b"foo\\bar/baz");
+ #[cfg(not(unix))]
+ normalize!(normal4, b"foo\\bar/baz", b"foo/bar/baz");
+}
diff --git a/vendor/globset/src/serde_impl.rs b/vendor/globset/src/serde_impl.rs
new file mode 100644
index 000000000..6affc5904
--- /dev/null
+++ b/vendor/globset/src/serde_impl.rs
@@ -0,0 +1,38 @@
+use serde::de::Error;
+use serde::{Deserialize, Deserializer, Serialize, Serializer};
+
+use crate::Glob;
+
+impl Serialize for Glob {
+ fn serialize<S: Serializer>(
+ &self,
+ serializer: S,
+ ) -> Result<S::Ok, S::Error> {
+ serializer.serialize_str(self.glob())
+ }
+}
+
+impl<'de> Deserialize<'de> for Glob {
+ fn deserialize<D: Deserializer<'de>>(
+ deserializer: D,
+ ) -> Result<Self, D::Error> {
+ let glob = <&str as Deserialize>::deserialize(deserializer)?;
+ Glob::new(glob).map_err(D::Error::custom)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use Glob;
+
+ #[test]
+ fn glob_json_works() {
+ let test_glob = Glob::new("src/**/*.rs").unwrap();
+
+ let ser = serde_json::to_string(&test_glob).unwrap();
+ assert_eq!(ser, "\"src/**/*.rs\"");
+
+ let de: Glob = serde_json::from_str(&ser).unwrap();
+ assert_eq!(test_glob, de);
+ }
+}