/*! The globset crate provides cross platform single glob and glob set matching. Glob set matching is the process of matching one or more glob patterns against a single candidate path simultaneously, and returning all of the globs that matched. For example, given this set of globs: * `*.rs` * `src/lib.rs` * `src/**/foo.rs` and a path `src/bar/baz/foo.rs`, then the set would report the first and third globs as matching. # Example: one glob This example shows how to match a single glob against a single file path. ``` use globset::Glob; let glob = Glob::new("*.rs")?.compile_matcher(); assert!(glob.is_match("foo.rs")); assert!(glob.is_match("foo/bar.rs")); assert!(!glob.is_match("Cargo.toml")); # Ok::<(), Box>(()) ``` # Example: configuring a glob matcher This example shows how to use a `GlobBuilder` to configure aspects of match semantics. In this example, we prevent wildcards from matching path separators. ``` use globset::GlobBuilder; let glob = GlobBuilder::new("*.rs") .literal_separator(true).build()?.compile_matcher(); assert!(glob.is_match("foo.rs")); assert!(!glob.is_match("foo/bar.rs")); // no longer matches assert!(!glob.is_match("Cargo.toml")); # Ok::<(), Box>(()) ``` # Example: match multiple globs at once This example shows how to match multiple glob patterns at once. ``` use globset::{Glob, GlobSetBuilder}; let mut builder = GlobSetBuilder::new(); // A GlobBuilder can be used to configure each glob's match semantics // independently. builder.add(Glob::new("*.rs")?); builder.add(Glob::new("src/lib.rs")?); builder.add(Glob::new("src/**/foo.rs")?); let set = builder.build()?; assert_eq!(set.matches("src/bar/baz/foo.rs"), vec![0, 2]); # Ok::<(), Box>(()) ``` # Syntax Standard Unix-style glob syntax is supported: * `?` matches any single character. (If the `literal_separator` option is enabled, then `?` can never match a path separator.) * `*` matches zero or more characters. (If the `literal_separator` option is enabled, then `*` can never match a path separator.) * `**` recursively matches directories but are only legal in three situations. First, if the glob starts with \*\*/, then it matches all directories. For example, \*\*/foo matches `foo` and `bar/foo` but not `foo/bar`. Secondly, if the glob ends with /\*\*, then it matches all sub-entries. For example, foo/\*\* matches `foo/a` and `foo/a/b`, but not `foo`. Thirdly, if the glob contains /\*\*/ anywhere within the pattern, then it matches zero or more directories. Using `**` anywhere else is illegal (N.B. the glob `**` is allowed and means "match everything"). * `{a,b}` matches `a` or `b` where `a` and `b` are arbitrary glob patterns. (N.B. Nesting `{...}` is not currently allowed.) * `[ab]` matches `a` or `b` where `a` and `b` are characters. Use `[!ab]` to match any character except for `a` and `b`. * Metacharacters such as `*` and `?` can be escaped with character class notation. e.g., `[*]` matches `*`. * When backslash escapes are enabled, a backslash (`\`) will escape all meta characters in a glob. If it precedes a non-meta character, then the slash is ignored. A `\\` will match a literal `\\`. Note that this mode is only enabled on Unix platforms by default, but can be enabled on any platform via the `backslash_escape` setting on `Glob`. A `GlobBuilder` can be used to prevent wildcards from matching path separators, or to enable case insensitive matching. */ #![deny(missing_docs)] use std::{ borrow::Cow, panic::{RefUnwindSafe, UnwindSafe}, path::Path, sync::Arc, }; use { aho_corasick::AhoCorasick, bstr::{ByteSlice, ByteVec, B}, regex_automata::{ meta::Regex, util::pool::{Pool, PoolGuard}, PatternSet, }, }; use crate::{ glob::MatchStrategy, pathutil::{file_name, file_name_ext, normalize_path}, }; pub use crate::glob::{Glob, GlobBuilder, GlobMatcher}; mod fnv; mod glob; mod pathutil; #[cfg(feature = "serde1")] mod serde_impl; #[cfg(feature = "log")] macro_rules! debug { ($($token:tt)*) => (::log::debug!($($token)*);) } #[cfg(not(feature = "log"))] macro_rules! debug { ($($token:tt)*) => {}; } /// Represents an error that can occur when parsing a glob pattern. #[derive(Clone, Debug, Eq, PartialEq)] pub struct Error { /// The original glob provided by the caller. glob: Option, /// The kind of error. kind: ErrorKind, } /// The kind of error that can occur when parsing a glob pattern. #[derive(Clone, Debug, Eq, PartialEq)] pub enum ErrorKind { /// **DEPRECATED**. /// /// This error used to occur for consistency with git's glob specification, /// but the specification now accepts all uses of `**`. When `**` does not /// appear adjacent to a path separator or at the beginning/end of a glob, /// it is now treated as two consecutive `*` patterns. As such, this error /// is no longer used. InvalidRecursive, /// Occurs when a character class (e.g., `[abc]`) is not closed. UnclosedClass, /// Occurs when a range in a character (e.g., `[a-z]`) is invalid. For /// example, if the range starts with a lexicographically larger character /// than it ends with. InvalidRange(char, char), /// Occurs when a `}` is found without a matching `{`. UnopenedAlternates, /// Occurs when a `{` is found without a matching `}`. UnclosedAlternates, /// Occurs when an alternating group is nested inside another alternating /// group, e.g., `{{a,b},{c,d}}`. NestedAlternates, /// Occurs when an unescaped '\' is found at the end of a glob. DanglingEscape, /// An error associated with parsing or compiling a regex. Regex(String), /// Hints that destructuring should not be exhaustive. /// /// This enum may grow additional variants, so this makes sure clients /// don't count on exhaustive matching. (Otherwise, adding a new variant /// could break existing code.) #[doc(hidden)] __Nonexhaustive, } impl std::error::Error for Error { fn description(&self) -> &str { self.kind.description() } } impl Error { /// Return the glob that caused this error, if one exists. pub fn glob(&self) -> Option<&str> { self.glob.as_ref().map(|s| &**s) } /// Return the kind of this error. pub fn kind(&self) -> &ErrorKind { &self.kind } } impl ErrorKind { fn description(&self) -> &str { match *self { ErrorKind::InvalidRecursive => { "invalid use of **; must be one path component" } ErrorKind::UnclosedClass => { "unclosed character class; missing ']'" } ErrorKind::InvalidRange(_, _) => "invalid character range", ErrorKind::UnopenedAlternates => { "unopened alternate group; missing '{' \ (maybe escape '}' with '[}]'?)" } ErrorKind::UnclosedAlternates => { "unclosed alternate group; missing '}' \ (maybe escape '{' with '[{]'?)" } ErrorKind::NestedAlternates => { "nested alternate groups are not allowed" } ErrorKind::DanglingEscape => "dangling '\\'", ErrorKind::Regex(ref err) => err, ErrorKind::__Nonexhaustive => unreachable!(), } } } impl std::fmt::Display for Error { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self.glob { None => self.kind.fmt(f), Some(ref glob) => { write!(f, "error parsing glob '{}': {}", glob, self.kind) } } } } impl std::fmt::Display for ErrorKind { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match *self { ErrorKind::InvalidRecursive | ErrorKind::UnclosedClass | ErrorKind::UnopenedAlternates | ErrorKind::UnclosedAlternates | ErrorKind::NestedAlternates | ErrorKind::DanglingEscape | ErrorKind::Regex(_) => write!(f, "{}", self.description()), ErrorKind::InvalidRange(s, e) => { write!(f, "invalid range; '{}' > '{}'", s, e) } ErrorKind::__Nonexhaustive => unreachable!(), } } } fn new_regex(pat: &str) -> Result { let syntax = regex_automata::util::syntax::Config::new() .utf8(false) .dot_matches_new_line(true); let config = Regex::config() .utf8_empty(false) .nfa_size_limit(Some(10 * (1 << 20))) .hybrid_cache_capacity(10 * (1 << 20)); Regex::builder().syntax(syntax).configure(config).build(pat).map_err( |err| Error { glob: Some(pat.to_string()), kind: ErrorKind::Regex(err.to_string()), }, ) } fn new_regex_set(pats: Vec) -> Result { let syntax = regex_automata::util::syntax::Config::new() .utf8(false) .dot_matches_new_line(true); let config = Regex::config() .match_kind(regex_automata::MatchKind::All) .utf8_empty(false) .nfa_size_limit(Some(10 * (1 << 20))) .hybrid_cache_capacity(10 * (1 << 20)); Regex::builder() .syntax(syntax) .configure(config) .build_many(&pats) .map_err(|err| Error { glob: None, kind: ErrorKind::Regex(err.to_string()), }) } /// GlobSet represents a group of globs that can be matched together in a /// single pass. #[derive(Clone, Debug)] pub struct GlobSet { len: usize, strats: Vec, } impl GlobSet { /// Create a new [`GlobSetBuilder`]. A `GlobSetBuilder` can be used to add /// new patterns. Once all patterns have been added, `build` should be /// called to produce a `GlobSet`, which can then be used for matching. #[inline] pub fn builder() -> GlobSetBuilder { GlobSetBuilder::new() } /// Create an empty `GlobSet`. An empty set matches nothing. #[inline] pub fn empty() -> GlobSet { GlobSet { len: 0, strats: vec![] } } /// Returns true if this set is empty, and therefore matches nothing. #[inline] pub fn is_empty(&self) -> bool { self.len == 0 } /// Returns the number of globs in this set. #[inline] pub fn len(&self) -> usize { self.len } /// Returns true if any glob in this set matches the path given. pub fn is_match>(&self, path: P) -> bool { self.is_match_candidate(&Candidate::new(path.as_ref())) } /// Returns true if any glob in this set matches the path given. /// /// This takes a Candidate as input, which can be used to amortize the /// cost of preparing a path for matching. pub fn is_match_candidate(&self, path: &Candidate<'_>) -> bool { if self.is_empty() { return false; } for strat in &self.strats { if strat.is_match(path) { return true; } } false } /// Returns the sequence number of every glob pattern that matches the /// given path. pub fn matches>(&self, path: P) -> Vec { self.matches_candidate(&Candidate::new(path.as_ref())) } /// Returns the sequence number of every glob pattern that matches the /// given path. /// /// This takes a Candidate as input, which can be used to amortize the /// cost of preparing a path for matching. pub fn matches_candidate(&self, path: &Candidate<'_>) -> Vec { let mut into = vec![]; if self.is_empty() { return into; } self.matches_candidate_into(path, &mut into); into } /// Adds the sequence number of every glob pattern that matches the given /// path to the vec given. /// /// `into` is cleared before matching begins, and contains the set of /// sequence numbers (in ascending order) after matching ends. If no globs /// were matched, then `into` will be empty. pub fn matches_into>( &self, path: P, into: &mut Vec, ) { self.matches_candidate_into(&Candidate::new(path.as_ref()), into); } /// Adds the sequence number of every glob pattern that matches the given /// path to the vec given. /// /// `into` is cleared before matching begins, and contains the set of /// sequence numbers (in ascending order) after matching ends. If no globs /// were matched, then `into` will be empty. /// /// This takes a Candidate as input, which can be used to amortize the /// cost of preparing a path for matching. pub fn matches_candidate_into( &self, path: &Candidate<'_>, into: &mut Vec, ) { into.clear(); if self.is_empty() { return; } for strat in &self.strats { strat.matches_into(path, into); } into.sort(); into.dedup(); } fn new(pats: &[Glob]) -> Result { if pats.is_empty() { return Ok(GlobSet { len: 0, strats: vec![] }); } let mut lits = LiteralStrategy::new(); let mut base_lits = BasenameLiteralStrategy::new(); let mut exts = ExtensionStrategy::new(); let mut prefixes = MultiStrategyBuilder::new(); let mut suffixes = MultiStrategyBuilder::new(); let mut required_exts = RequiredExtensionStrategyBuilder::new(); let mut regexes = MultiStrategyBuilder::new(); for (i, p) in pats.iter().enumerate() { match MatchStrategy::new(p) { MatchStrategy::Literal(lit) => { lits.add(i, lit); } MatchStrategy::BasenameLiteral(lit) => { base_lits.add(i, lit); } MatchStrategy::Extension(ext) => { exts.add(i, ext); } MatchStrategy::Prefix(prefix) => { prefixes.add(i, prefix); } MatchStrategy::Suffix { suffix, component } => { if component { lits.add(i, suffix[1..].to_string()); } suffixes.add(i, suffix); } MatchStrategy::RequiredExtension(ext) => { required_exts.add(i, ext, p.regex().to_owned()); } MatchStrategy::Regex => { debug!("glob converted to regex: {:?}", p); regexes.add(i, p.regex().to_owned()); } } } debug!( "built glob set; {} literals, {} basenames, {} extensions, \ {} prefixes, {} suffixes, {} required extensions, {} regexes", lits.0.len(), base_lits.0.len(), exts.0.len(), prefixes.literals.len(), suffixes.literals.len(), required_exts.0.len(), regexes.literals.len() ); Ok(GlobSet { len: pats.len(), strats: vec![ GlobSetMatchStrategy::Extension(exts), GlobSetMatchStrategy::BasenameLiteral(base_lits), GlobSetMatchStrategy::Literal(lits), GlobSetMatchStrategy::Suffix(suffixes.suffix()), GlobSetMatchStrategy::Prefix(prefixes.prefix()), GlobSetMatchStrategy::RequiredExtension( required_exts.build()?, ), GlobSetMatchStrategy::Regex(regexes.regex_set()?), ], }) } } impl Default for GlobSet { /// Create a default empty GlobSet. fn default() -> Self { GlobSet::empty() } } /// GlobSetBuilder builds a group of patterns that can be used to /// simultaneously match a file path. #[derive(Clone, Debug)] pub struct GlobSetBuilder { pats: Vec, } impl GlobSetBuilder { /// Create a new `GlobSetBuilder`. A `GlobSetBuilder` can be used to add new /// patterns. Once all patterns have been added, `build` should be called /// to produce a [`GlobSet`], which can then be used for matching. pub fn new() -> GlobSetBuilder { GlobSetBuilder { pats: vec![] } } /// Builds a new matcher from all of the glob patterns added so far. /// /// Once a matcher is built, no new patterns can be added to it. pub fn build(&self) -> Result { GlobSet::new(&self.pats) } /// Add a new pattern to this set. pub fn add(&mut self, pat: Glob) -> &mut GlobSetBuilder { self.pats.push(pat); self } } /// A candidate path for matching. /// /// All glob matching in this crate operates on `Candidate` values. /// Constructing candidates has a very small cost associated with it, so /// callers may find it beneficial to amortize that cost when matching a single /// path against multiple globs or sets of globs. #[derive(Clone)] pub struct Candidate<'a> { path: Cow<'a, [u8]>, basename: Cow<'a, [u8]>, ext: Cow<'a, [u8]>, } impl<'a> std::fmt::Debug for Candidate<'a> { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { f.debug_struct("Candidate") .field("path", &self.path.as_bstr()) .field("basename", &self.basename.as_bstr()) .field("ext", &self.ext.as_bstr()) .finish() } } impl<'a> Candidate<'a> { /// Create a new candidate for matching from the given path. pub fn new + ?Sized>(path: &'a P) -> Candidate<'a> { let path = normalize_path(Vec::from_path_lossy(path.as_ref())); let basename = file_name(&path).unwrap_or(Cow::Borrowed(B(""))); let ext = file_name_ext(&basename).unwrap_or(Cow::Borrowed(B(""))); Candidate { path, basename, ext } } fn path_prefix(&self, max: usize) -> &[u8] { if self.path.len() <= max { &*self.path } else { &self.path[..max] } } fn path_suffix(&self, max: usize) -> &[u8] { if self.path.len() <= max { &*self.path } else { &self.path[self.path.len() - max..] } } } #[derive(Clone, Debug)] enum GlobSetMatchStrategy { Literal(LiteralStrategy), BasenameLiteral(BasenameLiteralStrategy), Extension(ExtensionStrategy), Prefix(PrefixStrategy), Suffix(SuffixStrategy), RequiredExtension(RequiredExtensionStrategy), Regex(RegexSetStrategy), } impl GlobSetMatchStrategy { fn is_match(&self, candidate: &Candidate<'_>) -> bool { use self::GlobSetMatchStrategy::*; match *self { Literal(ref s) => s.is_match(candidate), BasenameLiteral(ref s) => s.is_match(candidate), Extension(ref s) => s.is_match(candidate), Prefix(ref s) => s.is_match(candidate), Suffix(ref s) => s.is_match(candidate), RequiredExtension(ref s) => s.is_match(candidate), Regex(ref s) => s.is_match(candidate), } } fn matches_into( &self, candidate: &Candidate<'_>, matches: &mut Vec, ) { use self::GlobSetMatchStrategy::*; match *self { Literal(ref s) => s.matches_into(candidate, matches), BasenameLiteral(ref s) => s.matches_into(candidate, matches), Extension(ref s) => s.matches_into(candidate, matches), Prefix(ref s) => s.matches_into(candidate, matches), Suffix(ref s) => s.matches_into(candidate, matches), RequiredExtension(ref s) => s.matches_into(candidate, matches), Regex(ref s) => s.matches_into(candidate, matches), } } } #[derive(Clone, Debug)] struct LiteralStrategy(fnv::HashMap, Vec>); impl LiteralStrategy { fn new() -> LiteralStrategy { LiteralStrategy(fnv::HashMap::default()) } fn add(&mut self, global_index: usize, lit: String) { self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index); } fn is_match(&self, candidate: &Candidate<'_>) -> bool { self.0.contains_key(candidate.path.as_bytes()) } #[inline(never)] fn matches_into( &self, candidate: &Candidate<'_>, matches: &mut Vec, ) { if let Some(hits) = self.0.get(candidate.path.as_bytes()) { matches.extend(hits); } } } #[derive(Clone, Debug)] struct BasenameLiteralStrategy(fnv::HashMap, Vec>); impl BasenameLiteralStrategy { fn new() -> BasenameLiteralStrategy { BasenameLiteralStrategy(fnv::HashMap::default()) } fn add(&mut self, global_index: usize, lit: String) { self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index); } fn is_match(&self, candidate: &Candidate<'_>) -> bool { if candidate.basename.is_empty() { return false; } self.0.contains_key(candidate.basename.as_bytes()) } #[inline(never)] fn matches_into( &self, candidate: &Candidate<'_>, matches: &mut Vec, ) { if candidate.basename.is_empty() { return; } if let Some(hits) = self.0.get(candidate.basename.as_bytes()) { matches.extend(hits); } } } #[derive(Clone, Debug)] struct ExtensionStrategy(fnv::HashMap, Vec>); impl ExtensionStrategy { fn new() -> ExtensionStrategy { ExtensionStrategy(fnv::HashMap::default()) } fn add(&mut self, global_index: usize, ext: String) { self.0.entry(ext.into_bytes()).or_insert(vec![]).push(global_index); } fn is_match(&self, candidate: &Candidate<'_>) -> bool { if candidate.ext.is_empty() { return false; } self.0.contains_key(candidate.ext.as_bytes()) } #[inline(never)] fn matches_into( &self, candidate: &Candidate<'_>, matches: &mut Vec, ) { if candidate.ext.is_empty() { return; } if let Some(hits) = self.0.get(candidate.ext.as_bytes()) { matches.extend(hits); } } } #[derive(Clone, Debug)] struct PrefixStrategy { matcher: AhoCorasick, map: Vec, longest: usize, } impl PrefixStrategy { fn is_match(&self, candidate: &Candidate<'_>) -> bool { let path = candidate.path_prefix(self.longest); for m in self.matcher.find_overlapping_iter(path) { if m.start() == 0 { return true; } } false } fn matches_into( &self, candidate: &Candidate<'_>, matches: &mut Vec, ) { let path = candidate.path_prefix(self.longest); for m in self.matcher.find_overlapping_iter(path) { if m.start() == 0 { matches.push(self.map[m.pattern()]); } } } } #[derive(Clone, Debug)] struct SuffixStrategy { matcher: AhoCorasick, map: Vec, longest: usize, } impl SuffixStrategy { fn is_match(&self, candidate: &Candidate<'_>) -> bool { let path = candidate.path_suffix(self.longest); for m in self.matcher.find_overlapping_iter(path) { if m.end() == path.len() { return true; } } false } fn matches_into( &self, candidate: &Candidate<'_>, matches: &mut Vec, ) { let path = candidate.path_suffix(self.longest); for m in self.matcher.find_overlapping_iter(path) { if m.end() == path.len() { matches.push(self.map[m.pattern()]); } } } } #[derive(Clone, Debug)] struct RequiredExtensionStrategy(fnv::HashMap, Vec<(usize, Regex)>>); impl RequiredExtensionStrategy { fn is_match(&self, candidate: &Candidate<'_>) -> bool { if candidate.ext.is_empty() { return false; } match self.0.get(candidate.ext.as_bytes()) { None => false, Some(regexes) => { for &(_, ref re) in regexes { if re.is_match(candidate.path.as_bytes()) { return true; } } false } } } #[inline(never)] fn matches_into( &self, candidate: &Candidate<'_>, matches: &mut Vec, ) { if candidate.ext.is_empty() { return; } if let Some(regexes) = self.0.get(candidate.ext.as_bytes()) { for &(global_index, ref re) in regexes { if re.is_match(candidate.path.as_bytes()) { matches.push(global_index); } } } } } #[derive(Clone, Debug)] struct RegexSetStrategy { matcher: Regex, map: Vec, // We use a pool of PatternSets to hopefully allocating a fresh one on each // call. // // TODO: In the next semver breaking release, we should drop this pool and // expose an opaque type that wraps PatternSet. Then callers can provide // it to `matches_into` directly. Callers might still want to use a pool // or similar to amortize allocation, but that matches the status quo and // absolves us of needing to do it here. patset: Arc>, } type PatternSetPoolFn = Box PatternSet + Send + Sync + UnwindSafe + RefUnwindSafe>; impl RegexSetStrategy { fn is_match(&self, candidate: &Candidate<'_>) -> bool { self.matcher.is_match(candidate.path.as_bytes()) } fn matches_into( &self, candidate: &Candidate<'_>, matches: &mut Vec, ) { let input = regex_automata::Input::new(candidate.path.as_bytes()); let mut patset = self.patset.get(); patset.clear(); self.matcher.which_overlapping_matches(&input, &mut patset); for i in patset.iter() { matches.push(self.map[i]); } PoolGuard::put(patset); } } #[derive(Clone, Debug)] struct MultiStrategyBuilder { literals: Vec, map: Vec, longest: usize, } impl MultiStrategyBuilder { fn new() -> MultiStrategyBuilder { MultiStrategyBuilder { literals: vec![], map: vec![], longest: 0 } } fn add(&mut self, global_index: usize, literal: String) { if literal.len() > self.longest { self.longest = literal.len(); } self.map.push(global_index); self.literals.push(literal); } fn prefix(self) -> PrefixStrategy { PrefixStrategy { matcher: AhoCorasick::new(&self.literals).unwrap(), map: self.map, longest: self.longest, } } fn suffix(self) -> SuffixStrategy { SuffixStrategy { matcher: AhoCorasick::new(&self.literals).unwrap(), map: self.map, longest: self.longest, } } fn regex_set(self) -> Result { let matcher = new_regex_set(self.literals)?; let pattern_len = matcher.pattern_len(); let create: PatternSetPoolFn = Box::new(move || PatternSet::new(pattern_len)); Ok(RegexSetStrategy { matcher, map: self.map, patset: Arc::new(Pool::new(create)), }) } } #[derive(Clone, Debug)] struct RequiredExtensionStrategyBuilder( fnv::HashMap, Vec<(usize, String)>>, ); impl RequiredExtensionStrategyBuilder { fn new() -> RequiredExtensionStrategyBuilder { RequiredExtensionStrategyBuilder(fnv::HashMap::default()) } fn add(&mut self, global_index: usize, ext: String, regex: String) { self.0 .entry(ext.into_bytes()) .or_insert(vec![]) .push((global_index, regex)); } fn build(self) -> Result { let mut exts = fnv::HashMap::default(); for (ext, regexes) in self.0.into_iter() { exts.insert(ext.clone(), vec![]); for (global_index, regex) in regexes { let compiled = new_regex(®ex)?; exts.get_mut(&ext).unwrap().push((global_index, compiled)); } } Ok(RequiredExtensionStrategy(exts)) } } /// Escape meta-characters within the given glob pattern. /// /// The escaping works by surrounding meta-characters with brackets. For /// example, `*` becomes `[*]`. pub fn escape(s: &str) -> String { let mut escaped = String::with_capacity(s.len()); for c in s.chars() { match c { // note that ! does not need escaping because it is only special // inside brackets '?' | '*' | '[' | ']' => { escaped.push('['); escaped.push(c); escaped.push(']'); } c => { escaped.push(c); } } } escaped } #[cfg(test)] mod tests { use crate::glob::Glob; use super::{GlobSet, GlobSetBuilder}; #[test] fn set_works() { let mut builder = GlobSetBuilder::new(); builder.add(Glob::new("src/**/*.rs").unwrap()); builder.add(Glob::new("*.c").unwrap()); builder.add(Glob::new("src/lib.rs").unwrap()); let set = builder.build().unwrap(); assert!(set.is_match("foo.c")); assert!(set.is_match("src/foo.c")); assert!(!set.is_match("foo.rs")); assert!(!set.is_match("tests/foo.rs")); assert!(set.is_match("src/foo.rs")); assert!(set.is_match("src/grep/src/main.rs")); let matches = set.matches("src/lib.rs"); assert_eq!(2, matches.len()); assert_eq!(0, matches[0]); assert_eq!(2, matches[1]); } #[test] fn empty_set_works() { let set = GlobSetBuilder::new().build().unwrap(); assert!(!set.is_match("")); assert!(!set.is_match("a")); } #[test] fn default_set_is_empty_works() { let set: GlobSet = Default::default(); assert!(!set.is_match("")); assert!(!set.is_match("a")); } #[test] fn escape() { use super::escape; assert_eq!("foo", escape("foo")); assert_eq!("foo[*]", escape("foo*")); assert_eq!("[[][]]", escape("[]")); assert_eq!("[*][?]", escape("*?")); assert_eq!("src/[*][*]/[*].rs", escape("src/**/*.rs")); assert_eq!("bar[[]ab[]]baz", escape("bar[ab]baz")); assert_eq!("bar[[]!![]]!baz", escape("bar[!!]!baz")); } // This tests that regex matching doesn't "remember" the results of // previous searches. That is, if any memory is reused from a previous // search, then it should be cleared first. #[test] fn set_does_not_remember() { let mut builder = GlobSetBuilder::new(); builder.add(Glob::new("*foo*").unwrap()); builder.add(Glob::new("*bar*").unwrap()); builder.add(Glob::new("*quux*").unwrap()); let set = builder.build().unwrap(); let matches = set.matches("ZfooZquuxZ"); assert_eq!(2, matches.len()); assert_eq!(0, matches[0]); assert_eq!(2, matches[1]); let matches = set.matches("nada"); assert_eq!(0, matches.len()); } }