diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
commit | 698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch) | |
tree | 173a775858bd501c378080a10dca74132f05bc50 /vendor/globset/src | |
parent | Initial commit. (diff) | |
download | rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip |
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/globset/src')
-rw-r--r-- | vendor/globset/src/glob.rs | 1528 | ||||
-rw-r--r-- | vendor/globset/src/lib.rs | 912 | ||||
-rw-r--r-- | vendor/globset/src/pathutil.rs | 129 | ||||
-rw-r--r-- | vendor/globset/src/serde_impl.rs | 38 |
4 files changed, 2607 insertions, 0 deletions
diff --git a/vendor/globset/src/glob.rs b/vendor/globset/src/glob.rs new file mode 100644 index 000000000..6e35aeec3 --- /dev/null +++ b/vendor/globset/src/glob.rs @@ -0,0 +1,1528 @@ +use std::fmt; +use std::hash; +use std::iter; +use std::ops::{Deref, DerefMut}; +use std::path::{is_separator, Path}; +use std::str; + +use regex; +use regex::bytes::Regex; + +use crate::{new_regex, Candidate, Error, ErrorKind}; + +/// Describes a matching strategy for a particular pattern. +/// +/// This provides a way to more quickly determine whether a pattern matches +/// a particular file path in a way that scales with a large number of +/// patterns. For example, if many patterns are of the form `*.ext`, then it's +/// possible to test whether any of those patterns matches by looking up a +/// file path's extension in a hash table. +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum MatchStrategy { + /// A pattern matches if and only if the entire file path matches this + /// literal string. + Literal(String), + /// A pattern matches if and only if the file path's basename matches this + /// literal string. + BasenameLiteral(String), + /// A pattern matches if and only if the file path's extension matches this + /// literal string. + Extension(String), + /// A pattern matches if and only if this prefix literal is a prefix of the + /// candidate file path. + Prefix(String), + /// A pattern matches if and only if this prefix literal is a prefix of the + /// candidate file path. + /// + /// An exception: if `component` is true, then `suffix` must appear at the + /// beginning of a file path or immediately following a `/`. + Suffix { + /// The actual suffix. + suffix: String, + /// Whether this must start at the beginning of a path component. + component: bool, + }, + /// A pattern matches only if the given extension matches the file path's + /// extension. Note that this is a necessary but NOT sufficient criterion. + /// Namely, if the extension matches, then a full regex search is still + /// required. + RequiredExtension(String), + /// A regex needs to be used for matching. + Regex, +} + +impl MatchStrategy { + /// Returns a matching strategy for the given pattern. + pub fn new(pat: &Glob) -> MatchStrategy { + if let Some(lit) = pat.basename_literal() { + MatchStrategy::BasenameLiteral(lit) + } else if let Some(lit) = pat.literal() { + MatchStrategy::Literal(lit) + } else if let Some(ext) = pat.ext() { + MatchStrategy::Extension(ext) + } else if let Some(prefix) = pat.prefix() { + MatchStrategy::Prefix(prefix) + } else if let Some((suffix, component)) = pat.suffix() { + MatchStrategy::Suffix { suffix: suffix, component: component } + } else if let Some(ext) = pat.required_ext() { + MatchStrategy::RequiredExtension(ext) + } else { + MatchStrategy::Regex + } + } +} + +/// Glob represents a successfully parsed shell glob pattern. +/// +/// It cannot be used directly to match file paths, but it can be converted +/// to a regular expression string or a matcher. +#[derive(Clone, Debug, Eq)] +pub struct Glob { + glob: String, + re: String, + opts: GlobOptions, + tokens: Tokens, +} + +impl PartialEq for Glob { + fn eq(&self, other: &Glob) -> bool { + self.glob == other.glob && self.opts == other.opts + } +} + +impl hash::Hash for Glob { + fn hash<H: hash::Hasher>(&self, state: &mut H) { + self.glob.hash(state); + self.opts.hash(state); + } +} + +impl fmt::Display for Glob { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.glob.fmt(f) + } +} + +impl str::FromStr for Glob { + type Err = Error; + + fn from_str(glob: &str) -> Result<Self, Self::Err> { + Self::new(glob) + } +} + +/// A matcher for a single pattern. +#[derive(Clone, Debug)] +pub struct GlobMatcher { + /// The underlying pattern. + pat: Glob, + /// The pattern, as a compiled regex. + re: Regex, +} + +impl GlobMatcher { + /// Tests whether the given path matches this pattern or not. + pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool { + self.is_match_candidate(&Candidate::new(path.as_ref())) + } + + /// Tests whether the given path matches this pattern or not. + pub fn is_match_candidate(&self, path: &Candidate<'_>) -> bool { + self.re.is_match(&path.path) + } + + /// Returns the `Glob` used to compile this matcher. + pub fn glob(&self) -> &Glob { + &self.pat + } +} + +/// A strategic matcher for a single pattern. +#[cfg(test)] +#[derive(Clone, Debug)] +struct GlobStrategic { + /// The match strategy to use. + strategy: MatchStrategy, + /// The pattern, as a compiled regex. + re: Regex, +} + +#[cfg(test)] +impl GlobStrategic { + /// Tests whether the given path matches this pattern or not. + fn is_match<P: AsRef<Path>>(&self, path: P) -> bool { + self.is_match_candidate(&Candidate::new(path.as_ref())) + } + + /// Tests whether the given path matches this pattern or not. + fn is_match_candidate(&self, candidate: &Candidate<'_>) -> bool { + let byte_path = &*candidate.path; + + match self.strategy { + MatchStrategy::Literal(ref lit) => lit.as_bytes() == byte_path, + MatchStrategy::BasenameLiteral(ref lit) => { + lit.as_bytes() == &*candidate.basename + } + MatchStrategy::Extension(ref ext) => { + ext.as_bytes() == &*candidate.ext + } + MatchStrategy::Prefix(ref pre) => { + starts_with(pre.as_bytes(), byte_path) + } + MatchStrategy::Suffix { ref suffix, component } => { + if component && byte_path == &suffix.as_bytes()[1..] { + return true; + } + ends_with(suffix.as_bytes(), byte_path) + } + MatchStrategy::RequiredExtension(ref ext) => { + let ext = ext.as_bytes(); + &*candidate.ext == ext && self.re.is_match(byte_path) + } + MatchStrategy::Regex => self.re.is_match(byte_path), + } + } +} + +/// A builder for a pattern. +/// +/// This builder enables configuring the match semantics of a pattern. For +/// example, one can make matching case insensitive. +/// +/// The lifetime `'a` refers to the lifetime of the pattern string. +#[derive(Clone, Debug)] +pub struct GlobBuilder<'a> { + /// The glob pattern to compile. + glob: &'a str, + /// Options for the pattern. + opts: GlobOptions, +} + +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +struct GlobOptions { + /// Whether to match case insensitively. + case_insensitive: bool, + /// Whether to require a literal separator to match a separator in a file + /// path. e.g., when enabled, `*` won't match `/`. + literal_separator: bool, + /// Whether or not to use `\` to escape special characters. + /// e.g., when enabled, `\*` will match a literal `*`. + backslash_escape: bool, +} + +impl GlobOptions { + fn default() -> GlobOptions { + GlobOptions { + case_insensitive: false, + literal_separator: false, + backslash_escape: !is_separator('\\'), + } + } +} + +#[derive(Clone, Debug, Default, Eq, PartialEq)] +struct Tokens(Vec<Token>); + +impl Deref for Tokens { + type Target = Vec<Token>; + fn deref(&self) -> &Vec<Token> { + &self.0 + } +} + +impl DerefMut for Tokens { + fn deref_mut(&mut self) -> &mut Vec<Token> { + &mut self.0 + } +} + +#[derive(Clone, Debug, Eq, PartialEq)] +enum Token { + Literal(char), + Any, + ZeroOrMore, + RecursivePrefix, + RecursiveSuffix, + RecursiveZeroOrMore, + Class { negated: bool, ranges: Vec<(char, char)> }, + Alternates(Vec<Tokens>), +} + +impl Glob { + /// Builds a new pattern with default options. + pub fn new(glob: &str) -> Result<Glob, Error> { + GlobBuilder::new(glob).build() + } + + /// Returns a matcher for this pattern. + pub fn compile_matcher(&self) -> GlobMatcher { + let re = + new_regex(&self.re).expect("regex compilation shouldn't fail"); + GlobMatcher { pat: self.clone(), re: re } + } + + /// Returns a strategic matcher. + /// + /// This isn't exposed because it's not clear whether it's actually + /// faster than just running a regex for a *single* pattern. If it + /// is faster, then GlobMatcher should do it automatically. + #[cfg(test)] + fn compile_strategic_matcher(&self) -> GlobStrategic { + let strategy = MatchStrategy::new(self); + let re = + new_regex(&self.re).expect("regex compilation shouldn't fail"); + GlobStrategic { strategy: strategy, re: re } + } + + /// Returns the original glob pattern used to build this pattern. + pub fn glob(&self) -> &str { + &self.glob + } + + /// Returns the regular expression string for this glob. + /// + /// Note that regular expressions for globs are intended to be matched on + /// arbitrary bytes (`&[u8]`) instead of Unicode strings (`&str`). In + /// particular, globs are frequently used on file paths, where there is no + /// general guarantee that file paths are themselves valid UTF-8. As a + /// result, callers will need to ensure that they are using a regex API + /// that can match on arbitrary bytes. For example, the + /// [`regex`](https://crates.io/regex) + /// crate's + /// [`Regex`](https://docs.rs/regex/*/regex/struct.Regex.html) + /// API is not suitable for this since it matches on `&str`, but its + /// [`bytes::Regex`](https://docs.rs/regex/*/regex/bytes/struct.Regex.html) + /// API is suitable for this. + pub fn regex(&self) -> &str { + &self.re + } + + /// Returns the pattern as a literal if and only if the pattern must match + /// an entire path exactly. + /// + /// The basic format of these patterns is `{literal}`. + fn literal(&self) -> Option<String> { + if self.opts.case_insensitive { + return None; + } + let mut lit = String::new(); + for t in &*self.tokens { + match *t { + Token::Literal(c) => lit.push(c), + _ => return None, + } + } + if lit.is_empty() { + None + } else { + Some(lit) + } + } + + /// Returns an extension if this pattern matches a file path if and only + /// if the file path has the extension returned. + /// + /// Note that this extension returned differs from the extension that + /// std::path::Path::extension returns. Namely, this extension includes + /// the '.'. Also, paths like `.rs` are considered to have an extension + /// of `.rs`. + fn ext(&self) -> Option<String> { + if self.opts.case_insensitive { + return None; + } + let start = match self.tokens.get(0) { + Some(&Token::RecursivePrefix) => 1, + Some(_) => 0, + _ => return None, + }; + match self.tokens.get(start) { + Some(&Token::ZeroOrMore) => { + // If there was no recursive prefix, then we only permit + // `*` if `*` can match a `/`. For example, if `*` can't + // match `/`, then `*.c` doesn't match `foo/bar.c`. + if start == 0 && self.opts.literal_separator { + return None; + } + } + _ => return None, + } + match self.tokens.get(start + 1) { + Some(&Token::Literal('.')) => {} + _ => return None, + } + let mut lit = ".".to_string(); + for t in self.tokens[start + 2..].iter() { + match *t { + Token::Literal('.') | Token::Literal('/') => return None, + Token::Literal(c) => lit.push(c), + _ => return None, + } + } + if lit.is_empty() { + None + } else { + Some(lit) + } + } + + /// This is like `ext`, but returns an extension even if it isn't sufficient + /// to imply a match. Namely, if an extension is returned, then it is + /// necessary but not sufficient for a match. + fn required_ext(&self) -> Option<String> { + if self.opts.case_insensitive { + return None; + } + // We don't care at all about the beginning of this pattern. All we + // need to check for is if it ends with a literal of the form `.ext`. + let mut ext: Vec<char> = vec![]; // built in reverse + for t in self.tokens.iter().rev() { + match *t { + Token::Literal('/') => return None, + Token::Literal(c) => { + ext.push(c); + if c == '.' { + break; + } + } + _ => return None, + } + } + if ext.last() != Some(&'.') { + None + } else { + ext.reverse(); + Some(ext.into_iter().collect()) + } + } + + /// Returns a literal prefix of this pattern if the entire pattern matches + /// if the literal prefix matches. + fn prefix(&self) -> Option<String> { + if self.opts.case_insensitive { + return None; + } + let (end, need_sep) = match self.tokens.last() { + Some(&Token::ZeroOrMore) => { + if self.opts.literal_separator { + // If a trailing `*` can't match a `/`, then we can't + // assume a match of the prefix corresponds to a match + // of the overall pattern. e.g., `foo/*` with + // `literal_separator` enabled matches `foo/bar` but not + // `foo/bar/baz`, even though `foo/bar/baz` has a `foo/` + // literal prefix. + return None; + } + (self.tokens.len() - 1, false) + } + Some(&Token::RecursiveSuffix) => (self.tokens.len() - 1, true), + _ => (self.tokens.len(), false), + }; + let mut lit = String::new(); + for t in &self.tokens[0..end] { + match *t { + Token::Literal(c) => lit.push(c), + _ => return None, + } + } + if need_sep { + lit.push('/'); + } + if lit.is_empty() { + None + } else { + Some(lit) + } + } + + /// Returns a literal suffix of this pattern if the entire pattern matches + /// if the literal suffix matches. + /// + /// If a literal suffix is returned and it must match either the entire + /// file path or be preceded by a `/`, then also return true. This happens + /// with a pattern like `**/foo/bar`. Namely, this pattern matches + /// `foo/bar` and `baz/foo/bar`, but not `foofoo/bar`. In this case, the + /// suffix returned is `/foo/bar` (but should match the entire path + /// `foo/bar`). + /// + /// When this returns true, the suffix literal is guaranteed to start with + /// a `/`. + fn suffix(&self) -> Option<(String, bool)> { + if self.opts.case_insensitive { + return None; + } + let mut lit = String::new(); + let (start, entire) = match self.tokens.get(0) { + Some(&Token::RecursivePrefix) => { + // We only care if this follows a path component if the next + // token is a literal. + if let Some(&Token::Literal(_)) = self.tokens.get(1) { + lit.push('/'); + (1, true) + } else { + (1, false) + } + } + _ => (0, false), + }; + let start = match self.tokens.get(start) { + Some(&Token::ZeroOrMore) => { + // If literal_separator is enabled, then a `*` can't + // necessarily match everything, so reporting a suffix match + // as a match of the pattern would be a false positive. + if self.opts.literal_separator { + return None; + } + start + 1 + } + _ => start, + }; + for t in &self.tokens[start..] { + match *t { + Token::Literal(c) => lit.push(c), + _ => return None, + } + } + if lit.is_empty() || lit == "/" { + None + } else { + Some((lit, entire)) + } + } + + /// If this pattern only needs to inspect the basename of a file path, + /// then the tokens corresponding to only the basename match are returned. + /// + /// For example, given a pattern of `**/*.foo`, only the tokens + /// corresponding to `*.foo` are returned. + /// + /// Note that this will return None if any match of the basename tokens + /// doesn't correspond to a match of the entire pattern. For example, the + /// glob `foo` only matches when a file path has a basename of `foo`, but + /// doesn't *always* match when a file path has a basename of `foo`. e.g., + /// `foo` doesn't match `abc/foo`. + fn basename_tokens(&self) -> Option<&[Token]> { + if self.opts.case_insensitive { + return None; + } + let start = match self.tokens.get(0) { + Some(&Token::RecursivePrefix) => 1, + _ => { + // With nothing to gobble up the parent portion of a path, + // we can't assume that matching on only the basename is + // correct. + return None; + } + }; + if self.tokens[start..].is_empty() { + return None; + } + for t in &self.tokens[start..] { + match *t { + Token::Literal('/') => return None, + Token::Literal(_) => {} // OK + Token::Any | Token::ZeroOrMore => { + if !self.opts.literal_separator { + // In this case, `*` and `?` can match a path + // separator, which means this could reach outside + // the basename. + return None; + } + } + Token::RecursivePrefix + | Token::RecursiveSuffix + | Token::RecursiveZeroOrMore => { + return None; + } + Token::Class { .. } | Token::Alternates(..) => { + // We *could* be a little smarter here, but either one + // of these is going to prevent our literal optimizations + // anyway, so give up. + return None; + } + } + } + Some(&self.tokens[start..]) + } + + /// Returns the pattern as a literal if and only if the pattern exclusively + /// matches the basename of a file path *and* is a literal. + /// + /// The basic format of these patterns is `**/{literal}`, where `{literal}` + /// does not contain a path separator. + fn basename_literal(&self) -> Option<String> { + let tokens = match self.basename_tokens() { + None => return None, + Some(tokens) => tokens, + }; + let mut lit = String::new(); + for t in tokens { + match *t { + Token::Literal(c) => lit.push(c), + _ => return None, + } + } + Some(lit) + } +} + +impl<'a> GlobBuilder<'a> { + /// Create a new builder for the pattern given. + /// + /// The pattern is not compiled until `build` is called. + pub fn new(glob: &'a str) -> GlobBuilder<'a> { + GlobBuilder { glob: glob, opts: GlobOptions::default() } + } + + /// Parses and builds the pattern. + pub fn build(&self) -> Result<Glob, Error> { + let mut p = Parser { + glob: &self.glob, + stack: vec![Tokens::default()], + chars: self.glob.chars().peekable(), + prev: None, + cur: None, + opts: &self.opts, + }; + p.parse()?; + if p.stack.is_empty() { + Err(Error { + glob: Some(self.glob.to_string()), + kind: ErrorKind::UnopenedAlternates, + }) + } else if p.stack.len() > 1 { + Err(Error { + glob: Some(self.glob.to_string()), + kind: ErrorKind::UnclosedAlternates, + }) + } else { + let tokens = p.stack.pop().unwrap(); + Ok(Glob { + glob: self.glob.to_string(), + re: tokens.to_regex_with(&self.opts), + opts: self.opts, + tokens: tokens, + }) + } + } + + /// Toggle whether the pattern matches case insensitively or not. + /// + /// This is disabled by default. + pub fn case_insensitive(&mut self, yes: bool) -> &mut GlobBuilder<'a> { + self.opts.case_insensitive = yes; + self + } + + /// Toggle whether a literal `/` is required to match a path separator. + /// + /// By default this is false: `*` and `?` will match `/`. + pub fn literal_separator(&mut self, yes: bool) -> &mut GlobBuilder<'a> { + self.opts.literal_separator = yes; + self + } + + /// When enabled, a back slash (`\`) may be used to escape + /// special characters in a glob pattern. Additionally, this will + /// prevent `\` from being interpreted as a path separator on all + /// platforms. + /// + /// This is enabled by default on platforms where `\` is not a + /// path separator and disabled by default on platforms where `\` + /// is a path separator. + pub fn backslash_escape(&mut self, yes: bool) -> &mut GlobBuilder<'a> { + self.opts.backslash_escape = yes; + self + } +} + +impl Tokens { + /// Convert this pattern to a string that is guaranteed to be a valid + /// regular expression and will represent the matching semantics of this + /// glob pattern and the options given. + fn to_regex_with(&self, options: &GlobOptions) -> String { + let mut re = String::new(); + re.push_str("(?-u)"); + if options.case_insensitive { + re.push_str("(?i)"); + } + re.push('^'); + // Special case. If the entire glob is just `**`, then it should match + // everything. + if self.len() == 1 && self[0] == Token::RecursivePrefix { + re.push_str(".*"); + re.push('$'); + return re; + } + self.tokens_to_regex(options, &self, &mut re); + re.push('$'); + re + } + + fn tokens_to_regex( + &self, + options: &GlobOptions, + tokens: &[Token], + re: &mut String, + ) { + for tok in tokens { + match *tok { + Token::Literal(c) => { + re.push_str(&char_to_escaped_literal(c)); + } + Token::Any => { + if options.literal_separator { + re.push_str("[^/]"); + } else { + re.push_str("."); + } + } + Token::ZeroOrMore => { + if options.literal_separator { + re.push_str("[^/]*"); + } else { + re.push_str(".*"); + } + } + Token::RecursivePrefix => { + re.push_str("(?:/?|.*/)"); + } + Token::RecursiveSuffix => { + re.push_str("/.*"); + } + Token::RecursiveZeroOrMore => { + re.push_str("(?:/|/.*/)"); + } + Token::Class { negated, ref ranges } => { + re.push('['); + if negated { + re.push('^'); + } + for r in ranges { + if r.0 == r.1 { + // Not strictly necessary, but nicer to look at. + re.push_str(&char_to_escaped_literal(r.0)); + } else { + re.push_str(&char_to_escaped_literal(r.0)); + re.push('-'); + re.push_str(&char_to_escaped_literal(r.1)); + } + } + re.push(']'); + } + Token::Alternates(ref patterns) => { + let mut parts = vec![]; + for pat in patterns { + let mut altre = String::new(); + self.tokens_to_regex(options, &pat, &mut altre); + if !altre.is_empty() { + parts.push(altre); + } + } + + // It is possible to have an empty set in which case the + // resulting alternation '()' would be an error. + if !parts.is_empty() { + re.push('('); + re.push_str(&parts.join("|")); + re.push(')'); + } + } + } + } + } +} + +/// Convert a Unicode scalar value to an escaped string suitable for use as +/// a literal in a non-Unicode regex. +fn char_to_escaped_literal(c: char) -> String { + bytes_to_escaped_literal(&c.to_string().into_bytes()) +} + +/// Converts an arbitrary sequence of bytes to a UTF-8 string. All non-ASCII +/// code units are converted to their escaped form. +fn bytes_to_escaped_literal(bs: &[u8]) -> String { + let mut s = String::with_capacity(bs.len()); + for &b in bs { + if b <= 0x7F { + s.push_str(®ex::escape(&(b as char).to_string())); + } else { + s.push_str(&format!("\\x{:02x}", b)); + } + } + s +} + +struct Parser<'a> { + glob: &'a str, + stack: Vec<Tokens>, + chars: iter::Peekable<str::Chars<'a>>, + prev: Option<char>, + cur: Option<char>, + opts: &'a GlobOptions, +} + +impl<'a> Parser<'a> { + fn error(&self, kind: ErrorKind) -> Error { + Error { glob: Some(self.glob.to_string()), kind: kind } + } + + fn parse(&mut self) -> Result<(), Error> { + while let Some(c) = self.bump() { + match c { + '?' => self.push_token(Token::Any)?, + '*' => self.parse_star()?, + '[' => self.parse_class()?, + '{' => self.push_alternate()?, + '}' => self.pop_alternate()?, + ',' => self.parse_comma()?, + '\\' => self.parse_backslash()?, + c => self.push_token(Token::Literal(c))?, + } + } + Ok(()) + } + + fn push_alternate(&mut self) -> Result<(), Error> { + if self.stack.len() > 1 { + return Err(self.error(ErrorKind::NestedAlternates)); + } + Ok(self.stack.push(Tokens::default())) + } + + fn pop_alternate(&mut self) -> Result<(), Error> { + let mut alts = vec![]; + while self.stack.len() >= 2 { + alts.push(self.stack.pop().unwrap()); + } + self.push_token(Token::Alternates(alts)) + } + + fn push_token(&mut self, tok: Token) -> Result<(), Error> { + if let Some(ref mut pat) = self.stack.last_mut() { + return Ok(pat.push(tok)); + } + Err(self.error(ErrorKind::UnopenedAlternates)) + } + + fn pop_token(&mut self) -> Result<Token, Error> { + if let Some(ref mut pat) = self.stack.last_mut() { + return Ok(pat.pop().unwrap()); + } + Err(self.error(ErrorKind::UnopenedAlternates)) + } + + fn have_tokens(&self) -> Result<bool, Error> { + match self.stack.last() { + None => Err(self.error(ErrorKind::UnopenedAlternates)), + Some(ref pat) => Ok(!pat.is_empty()), + } + } + + fn parse_comma(&mut self) -> Result<(), Error> { + // If we aren't inside a group alternation, then don't + // treat commas specially. Otherwise, we need to start + // a new alternate. + if self.stack.len() <= 1 { + self.push_token(Token::Literal(',')) + } else { + Ok(self.stack.push(Tokens::default())) + } + } + + fn parse_backslash(&mut self) -> Result<(), Error> { + if self.opts.backslash_escape { + match self.bump() { + None => Err(self.error(ErrorKind::DanglingEscape)), + Some(c) => self.push_token(Token::Literal(c)), + } + } else if is_separator('\\') { + // Normalize all patterns to use / as a separator. + self.push_token(Token::Literal('/')) + } else { + self.push_token(Token::Literal('\\')) + } + } + + fn parse_star(&mut self) -> Result<(), Error> { + let prev = self.prev; + if self.peek() != Some('*') { + self.push_token(Token::ZeroOrMore)?; + return Ok(()); + } + assert!(self.bump() == Some('*')); + if !self.have_tokens()? { + if !self.peek().map_or(true, is_separator) { + self.push_token(Token::ZeroOrMore)?; + self.push_token(Token::ZeroOrMore)?; + } else { + self.push_token(Token::RecursivePrefix)?; + assert!(self.bump().map_or(true, is_separator)); + } + return Ok(()); + } + + if !prev.map(is_separator).unwrap_or(false) { + if self.stack.len() <= 1 + || (prev != Some(',') && prev != Some('{')) + { + self.push_token(Token::ZeroOrMore)?; + self.push_token(Token::ZeroOrMore)?; + return Ok(()); + } + } + let is_suffix = match self.peek() { + None => { + assert!(self.bump().is_none()); + true + } + Some(',') | Some('}') if self.stack.len() >= 2 => true, + Some(c) if is_separator(c) => { + assert!(self.bump().map(is_separator).unwrap_or(false)); + false + } + _ => { + self.push_token(Token::ZeroOrMore)?; + self.push_token(Token::ZeroOrMore)?; + return Ok(()); + } + }; + match self.pop_token()? { + Token::RecursivePrefix => { + self.push_token(Token::RecursivePrefix)?; + } + Token::RecursiveSuffix => { + self.push_token(Token::RecursiveSuffix)?; + } + _ => { + if is_suffix { + self.push_token(Token::RecursiveSuffix)?; + } else { + self.push_token(Token::RecursiveZeroOrMore)?; + } + } + } + Ok(()) + } + + fn parse_class(&mut self) -> Result<(), Error> { + fn add_to_last_range( + glob: &str, + r: &mut (char, char), + add: char, + ) -> Result<(), Error> { + r.1 = add; + if r.1 < r.0 { + Err(Error { + glob: Some(glob.to_string()), + kind: ErrorKind::InvalidRange(r.0, r.1), + }) + } else { + Ok(()) + } + } + let mut ranges = vec![]; + let negated = match self.chars.peek() { + Some(&'!') | Some(&'^') => { + let bump = self.bump(); + assert!(bump == Some('!') || bump == Some('^')); + true + } + _ => false, + }; + let mut first = true; + let mut in_range = false; + loop { + let c = match self.bump() { + Some(c) => c, + // The only way to successfully break this loop is to observe + // a ']'. + None => return Err(self.error(ErrorKind::UnclosedClass)), + }; + match c { + ']' => { + if first { + ranges.push((']', ']')); + } else { + break; + } + } + '-' => { + if first { + ranges.push(('-', '-')); + } else if in_range { + // invariant: in_range is only set when there is + // already at least one character seen. + let r = ranges.last_mut().unwrap(); + add_to_last_range(&self.glob, r, '-')?; + in_range = false; + } else { + assert!(!ranges.is_empty()); + in_range = true; + } + } + c => { + if in_range { + // invariant: in_range is only set when there is + // already at least one character seen. + add_to_last_range( + &self.glob, + ranges.last_mut().unwrap(), + c, + )?; + } else { + ranges.push((c, c)); + } + in_range = false; + } + } + first = false; + } + if in_range { + // Means that the last character in the class was a '-', so add + // it as a literal. + ranges.push(('-', '-')); + } + self.push_token(Token::Class { negated: negated, ranges: ranges }) + } + + fn bump(&mut self) -> Option<char> { + self.prev = self.cur; + self.cur = self.chars.next(); + self.cur + } + + fn peek(&mut self) -> Option<char> { + self.chars.peek().map(|&ch| ch) + } +} + +#[cfg(test)] +fn starts_with(needle: &[u8], haystack: &[u8]) -> bool { + needle.len() <= haystack.len() && needle == &haystack[..needle.len()] +} + +#[cfg(test)] +fn ends_with(needle: &[u8], haystack: &[u8]) -> bool { + if needle.len() > haystack.len() { + return false; + } + needle == &haystack[haystack.len() - needle.len()..] +} + +#[cfg(test)] +mod tests { + use super::Token::*; + use super::{Glob, GlobBuilder, Token}; + use crate::{ErrorKind, GlobSetBuilder}; + + #[derive(Clone, Copy, Debug, Default)] + struct Options { + casei: Option<bool>, + litsep: Option<bool>, + bsesc: Option<bool>, + } + + macro_rules! syntax { + ($name:ident, $pat:expr, $tokens:expr) => { + #[test] + fn $name() { + let pat = Glob::new($pat).unwrap(); + assert_eq!($tokens, pat.tokens.0); + } + }; + } + + macro_rules! syntaxerr { + ($name:ident, $pat:expr, $err:expr) => { + #[test] + fn $name() { + let err = Glob::new($pat).unwrap_err(); + assert_eq!(&$err, err.kind()); + } + }; + } + + macro_rules! toregex { + ($name:ident, $pat:expr, $re:expr) => { + toregex!($name, $pat, $re, Options::default()); + }; + ($name:ident, $pat:expr, $re:expr, $options:expr) => { + #[test] + fn $name() { + let mut builder = GlobBuilder::new($pat); + if let Some(casei) = $options.casei { + builder.case_insensitive(casei); + } + if let Some(litsep) = $options.litsep { + builder.literal_separator(litsep); + } + if let Some(bsesc) = $options.bsesc { + builder.backslash_escape(bsesc); + } + let pat = builder.build().unwrap(); + assert_eq!(format!("(?-u){}", $re), pat.regex()); + } + }; + } + + macro_rules! matches { + ($name:ident, $pat:expr, $path:expr) => { + matches!($name, $pat, $path, Options::default()); + }; + ($name:ident, $pat:expr, $path:expr, $options:expr) => { + #[test] + fn $name() { + let mut builder = GlobBuilder::new($pat); + if let Some(casei) = $options.casei { + builder.case_insensitive(casei); + } + if let Some(litsep) = $options.litsep { + builder.literal_separator(litsep); + } + if let Some(bsesc) = $options.bsesc { + builder.backslash_escape(bsesc); + } + let pat = builder.build().unwrap(); + let matcher = pat.compile_matcher(); + let strategic = pat.compile_strategic_matcher(); + let set = GlobSetBuilder::new().add(pat).build().unwrap(); + assert!(matcher.is_match($path)); + assert!(strategic.is_match($path)); + assert!(set.is_match($path)); + } + }; + } + + macro_rules! nmatches { + ($name:ident, $pat:expr, $path:expr) => { + nmatches!($name, $pat, $path, Options::default()); + }; + ($name:ident, $pat:expr, $path:expr, $options:expr) => { + #[test] + fn $name() { + let mut builder = GlobBuilder::new($pat); + if let Some(casei) = $options.casei { + builder.case_insensitive(casei); + } + if let Some(litsep) = $options.litsep { + builder.literal_separator(litsep); + } + if let Some(bsesc) = $options.bsesc { + builder.backslash_escape(bsesc); + } + let pat = builder.build().unwrap(); + let matcher = pat.compile_matcher(); + let strategic = pat.compile_strategic_matcher(); + let set = GlobSetBuilder::new().add(pat).build().unwrap(); + assert!(!matcher.is_match($path)); + assert!(!strategic.is_match($path)); + assert!(!set.is_match($path)); + } + }; + } + + fn s(string: &str) -> String { + string.to_string() + } + + fn class(s: char, e: char) -> Token { + Class { negated: false, ranges: vec![(s, e)] } + } + + fn classn(s: char, e: char) -> Token { + Class { negated: true, ranges: vec![(s, e)] } + } + + fn rclass(ranges: &[(char, char)]) -> Token { + Class { negated: false, ranges: ranges.to_vec() } + } + + fn rclassn(ranges: &[(char, char)]) -> Token { + Class { negated: true, ranges: ranges.to_vec() } + } + + syntax!(literal1, "a", vec![Literal('a')]); + syntax!(literal2, "ab", vec![Literal('a'), Literal('b')]); + syntax!(any1, "?", vec![Any]); + syntax!(any2, "a?b", vec![Literal('a'), Any, Literal('b')]); + syntax!(seq1, "*", vec![ZeroOrMore]); + syntax!(seq2, "a*b", vec![Literal('a'), ZeroOrMore, Literal('b')]); + syntax!( + seq3, + "*a*b*", + vec![ZeroOrMore, Literal('a'), ZeroOrMore, Literal('b'), ZeroOrMore,] + ); + syntax!(rseq1, "**", vec![RecursivePrefix]); + syntax!(rseq2, "**/", vec![RecursivePrefix]); + syntax!(rseq3, "/**", vec![RecursiveSuffix]); + syntax!(rseq4, "/**/", vec![RecursiveZeroOrMore]); + syntax!( + rseq5, + "a/**/b", + vec![Literal('a'), RecursiveZeroOrMore, Literal('b'),] + ); + syntax!(cls1, "[a]", vec![class('a', 'a')]); + syntax!(cls2, "[!a]", vec![classn('a', 'a')]); + syntax!(cls3, "[a-z]", vec![class('a', 'z')]); + syntax!(cls4, "[!a-z]", vec![classn('a', 'z')]); + syntax!(cls5, "[-]", vec![class('-', '-')]); + syntax!(cls6, "[]]", vec![class(']', ']')]); + syntax!(cls7, "[*]", vec![class('*', '*')]); + syntax!(cls8, "[!!]", vec![classn('!', '!')]); + syntax!(cls9, "[a-]", vec![rclass(&[('a', 'a'), ('-', '-')])]); + syntax!(cls10, "[-a-z]", vec![rclass(&[('-', '-'), ('a', 'z')])]); + syntax!(cls11, "[a-z-]", vec![rclass(&[('a', 'z'), ('-', '-')])]); + syntax!( + cls12, + "[-a-z-]", + vec![rclass(&[('-', '-'), ('a', 'z'), ('-', '-')]),] + ); + syntax!(cls13, "[]-z]", vec![class(']', 'z')]); + syntax!(cls14, "[--z]", vec![class('-', 'z')]); + syntax!(cls15, "[ --]", vec![class(' ', '-')]); + syntax!(cls16, "[0-9a-z]", vec![rclass(&[('0', '9'), ('a', 'z')])]); + syntax!(cls17, "[a-z0-9]", vec![rclass(&[('a', 'z'), ('0', '9')])]); + syntax!(cls18, "[!0-9a-z]", vec![rclassn(&[('0', '9'), ('a', 'z')])]); + syntax!(cls19, "[!a-z0-9]", vec![rclassn(&[('a', 'z'), ('0', '9')])]); + syntax!(cls20, "[^a]", vec![classn('a', 'a')]); + syntax!(cls21, "[^a-z]", vec![classn('a', 'z')]); + + syntaxerr!(err_unclosed1, "[", ErrorKind::UnclosedClass); + syntaxerr!(err_unclosed2, "[]", ErrorKind::UnclosedClass); + syntaxerr!(err_unclosed3, "[!", ErrorKind::UnclosedClass); + syntaxerr!(err_unclosed4, "[!]", ErrorKind::UnclosedClass); + syntaxerr!(err_range1, "[z-a]", ErrorKind::InvalidRange('z', 'a')); + syntaxerr!(err_range2, "[z--]", ErrorKind::InvalidRange('z', '-')); + + const CASEI: Options = + Options { casei: Some(true), litsep: None, bsesc: None }; + const SLASHLIT: Options = + Options { casei: None, litsep: Some(true), bsesc: None }; + const NOBSESC: Options = + Options { casei: None, litsep: None, bsesc: Some(false) }; + const BSESC: Options = + Options { casei: None, litsep: None, bsesc: Some(true) }; + + toregex!(re_casei, "a", "(?i)^a$", &CASEI); + + toregex!(re_slash1, "?", r"^[^/]$", SLASHLIT); + toregex!(re_slash2, "*", r"^[^/]*$", SLASHLIT); + + toregex!(re1, "a", "^a$"); + toregex!(re2, "?", "^.$"); + toregex!(re3, "*", "^.*$"); + toregex!(re4, "a?", "^a.$"); + toregex!(re5, "?a", "^.a$"); + toregex!(re6, "a*", "^a.*$"); + toregex!(re7, "*a", "^.*a$"); + toregex!(re8, "[*]", r"^[\*]$"); + toregex!(re9, "[+]", r"^[\+]$"); + toregex!(re10, "+", r"^\+$"); + toregex!(re11, "☃", r"^\xe2\x98\x83$"); + toregex!(re12, "**", r"^.*$"); + toregex!(re13, "**/", r"^.*$"); + toregex!(re14, "**/*", r"^(?:/?|.*/).*$"); + toregex!(re15, "**/**", r"^.*$"); + toregex!(re16, "**/**/*", r"^(?:/?|.*/).*$"); + toregex!(re17, "**/**/**", r"^.*$"); + toregex!(re18, "**/**/**/*", r"^(?:/?|.*/).*$"); + toregex!(re19, "a/**", r"^a/.*$"); + toregex!(re20, "a/**/**", r"^a/.*$"); + toregex!(re21, "a/**/**/**", r"^a/.*$"); + toregex!(re22, "a/**/b", r"^a(?:/|/.*/)b$"); + toregex!(re23, "a/**/**/b", r"^a(?:/|/.*/)b$"); + toregex!(re24, "a/**/**/**/b", r"^a(?:/|/.*/)b$"); + toregex!(re25, "**/b", r"^(?:/?|.*/)b$"); + toregex!(re26, "**/**/b", r"^(?:/?|.*/)b$"); + toregex!(re27, "**/**/**/b", r"^(?:/?|.*/)b$"); + toregex!(re28, "a**", r"^a.*.*$"); + toregex!(re29, "**a", r"^.*.*a$"); + toregex!(re30, "a**b", r"^a.*.*b$"); + toregex!(re31, "***", r"^.*.*.*$"); + toregex!(re32, "/a**", r"^/a.*.*$"); + toregex!(re33, "/**a", r"^/.*.*a$"); + toregex!(re34, "/a**b", r"^/a.*.*b$"); + + matches!(match1, "a", "a"); + matches!(match2, "a*b", "a_b"); + matches!(match3, "a*b*c", "abc"); + matches!(match4, "a*b*c", "a_b_c"); + matches!(match5, "a*b*c", "a___b___c"); + matches!(match6, "abc*abc*abc", "abcabcabcabcabcabcabc"); + matches!(match7, "a*a*a*a*a*a*a*a*a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); + matches!(match8, "a*b[xyz]c*d", "abxcdbxcddd"); + matches!(match9, "*.rs", ".rs"); + matches!(match10, "☃", "☃"); + + matches!(matchrec1, "some/**/needle.txt", "some/needle.txt"); + matches!(matchrec2, "some/**/needle.txt", "some/one/needle.txt"); + matches!(matchrec3, "some/**/needle.txt", "some/one/two/needle.txt"); + matches!(matchrec4, "some/**/needle.txt", "some/other/needle.txt"); + matches!(matchrec5, "**", "abcde"); + matches!(matchrec6, "**", ""); + matches!(matchrec7, "**", ".asdf"); + matches!(matchrec8, "**", "/x/.asdf"); + matches!(matchrec9, "some/**/**/needle.txt", "some/needle.txt"); + matches!(matchrec10, "some/**/**/needle.txt", "some/one/needle.txt"); + matches!(matchrec11, "some/**/**/needle.txt", "some/one/two/needle.txt"); + matches!(matchrec12, "some/**/**/needle.txt", "some/other/needle.txt"); + matches!(matchrec13, "**/test", "one/two/test"); + matches!(matchrec14, "**/test", "one/test"); + matches!(matchrec15, "**/test", "test"); + matches!(matchrec16, "/**/test", "/one/two/test"); + matches!(matchrec17, "/**/test", "/one/test"); + matches!(matchrec18, "/**/test", "/test"); + matches!(matchrec19, "**/.*", ".abc"); + matches!(matchrec20, "**/.*", "abc/.abc"); + matches!(matchrec21, "**/foo/bar", "foo/bar"); + matches!(matchrec22, ".*/**", ".abc/abc"); + matches!(matchrec23, "test/**", "test/"); + matches!(matchrec24, "test/**", "test/one"); + matches!(matchrec25, "test/**", "test/one/two"); + matches!(matchrec26, "some/*/needle.txt", "some/one/needle.txt"); + + matches!(matchrange1, "a[0-9]b", "a0b"); + matches!(matchrange2, "a[0-9]b", "a9b"); + matches!(matchrange3, "a[!0-9]b", "a_b"); + matches!(matchrange4, "[a-z123]", "1"); + matches!(matchrange5, "[1a-z23]", "1"); + matches!(matchrange6, "[123a-z]", "1"); + matches!(matchrange7, "[abc-]", "-"); + matches!(matchrange8, "[-abc]", "-"); + matches!(matchrange9, "[-a-c]", "b"); + matches!(matchrange10, "[a-c-]", "b"); + matches!(matchrange11, "[-]", "-"); + matches!(matchrange12, "a[^0-9]b", "a_b"); + + matches!(matchpat1, "*hello.txt", "hello.txt"); + matches!(matchpat2, "*hello.txt", "gareth_says_hello.txt"); + matches!(matchpat3, "*hello.txt", "some/path/to/hello.txt"); + matches!(matchpat4, "*hello.txt", "some\\path\\to\\hello.txt"); + matches!(matchpat5, "*hello.txt", "/an/absolute/path/to/hello.txt"); + matches!(matchpat6, "*some/path/to/hello.txt", "some/path/to/hello.txt"); + matches!( + matchpat7, + "*some/path/to/hello.txt", + "a/bigger/some/path/to/hello.txt" + ); + + matches!(matchescape, "_[[]_[]]_[?]_[*]_!_", "_[_]_?_*_!_"); + + matches!(matchcasei1, "aBcDeFg", "aBcDeFg", CASEI); + matches!(matchcasei2, "aBcDeFg", "abcdefg", CASEI); + matches!(matchcasei3, "aBcDeFg", "ABCDEFG", CASEI); + matches!(matchcasei4, "aBcDeFg", "AbCdEfG", CASEI); + + matches!(matchalt1, "a,b", "a,b"); + matches!(matchalt2, ",", ","); + matches!(matchalt3, "{a,b}", "a"); + matches!(matchalt4, "{a,b}", "b"); + matches!(matchalt5, "{**/src/**,foo}", "abc/src/bar"); + matches!(matchalt6, "{**/src/**,foo}", "foo"); + matches!(matchalt7, "{[}],foo}", "}"); + matches!(matchalt8, "{foo}", "foo"); + matches!(matchalt9, "{}", ""); + matches!(matchalt10, "{,}", ""); + matches!(matchalt11, "{*.foo,*.bar,*.wat}", "test.foo"); + matches!(matchalt12, "{*.foo,*.bar,*.wat}", "test.bar"); + matches!(matchalt13, "{*.foo,*.bar,*.wat}", "test.wat"); + + matches!(matchslash1, "abc/def", "abc/def", SLASHLIT); + #[cfg(unix)] + nmatches!(matchslash2, "abc?def", "abc/def", SLASHLIT); + #[cfg(not(unix))] + nmatches!(matchslash2, "abc?def", "abc\\def", SLASHLIT); + nmatches!(matchslash3, "abc*def", "abc/def", SLASHLIT); + matches!(matchslash4, "abc[/]def", "abc/def", SLASHLIT); // differs + #[cfg(unix)] + nmatches!(matchslash5, "abc\\def", "abc/def", SLASHLIT); + #[cfg(not(unix))] + matches!(matchslash5, "abc\\def", "abc/def", SLASHLIT); + + matches!(matchbackslash1, "\\[", "[", BSESC); + matches!(matchbackslash2, "\\?", "?", BSESC); + matches!(matchbackslash3, "\\*", "*", BSESC); + matches!(matchbackslash4, "\\[a-z]", "\\a", NOBSESC); + matches!(matchbackslash5, "\\?", "\\a", NOBSESC); + matches!(matchbackslash6, "\\*", "\\\\", NOBSESC); + #[cfg(unix)] + matches!(matchbackslash7, "\\a", "a"); + #[cfg(not(unix))] + matches!(matchbackslash8, "\\a", "/a"); + + nmatches!(matchnot1, "a*b*c", "abcd"); + nmatches!(matchnot2, "abc*abc*abc", "abcabcabcabcabcabcabca"); + nmatches!(matchnot3, "some/**/needle.txt", "some/other/notthis.txt"); + nmatches!(matchnot4, "some/**/**/needle.txt", "some/other/notthis.txt"); + nmatches!(matchnot5, "/**/test", "test"); + nmatches!(matchnot6, "/**/test", "/one/notthis"); + nmatches!(matchnot7, "/**/test", "/notthis"); + nmatches!(matchnot8, "**/.*", "ab.c"); + nmatches!(matchnot9, "**/.*", "abc/ab.c"); + nmatches!(matchnot10, ".*/**", "a.bc"); + nmatches!(matchnot11, ".*/**", "abc/a.bc"); + nmatches!(matchnot12, "a[0-9]b", "a_b"); + nmatches!(matchnot13, "a[!0-9]b", "a0b"); + nmatches!(matchnot14, "a[!0-9]b", "a9b"); + nmatches!(matchnot15, "[!-]", "-"); + nmatches!(matchnot16, "*hello.txt", "hello.txt-and-then-some"); + nmatches!(matchnot17, "*hello.txt", "goodbye.txt"); + nmatches!( + matchnot18, + "*some/path/to/hello.txt", + "some/path/to/hello.txt-and-then-some" + ); + nmatches!( + matchnot19, + "*some/path/to/hello.txt", + "some/other/path/to/hello.txt" + ); + nmatches!(matchnot20, "a", "foo/a"); + nmatches!(matchnot21, "./foo", "foo"); + nmatches!(matchnot22, "**/foo", "foofoo"); + nmatches!(matchnot23, "**/foo/bar", "foofoo/bar"); + nmatches!(matchnot24, "/*.c", "mozilla-sha1/sha1.c"); + nmatches!(matchnot25, "*.c", "mozilla-sha1/sha1.c", SLASHLIT); + nmatches!( + matchnot26, + "**/m4/ltoptions.m4", + "csharp/src/packages/repositories.config", + SLASHLIT + ); + nmatches!(matchnot27, "a[^0-9]b", "a0b"); + nmatches!(matchnot28, "a[^0-9]b", "a9b"); + nmatches!(matchnot29, "[^-]", "-"); + nmatches!(matchnot30, "some/*/needle.txt", "some/needle.txt"); + nmatches!( + matchrec31, + "some/*/needle.txt", + "some/one/two/needle.txt", + SLASHLIT + ); + nmatches!( + matchrec32, + "some/*/needle.txt", + "some/one/two/three/needle.txt", + SLASHLIT + ); + nmatches!(matchrec33, ".*/**", ".abc"); + nmatches!(matchrec34, "foo/**", "foo"); + + macro_rules! extract { + ($which:ident, $name:ident, $pat:expr, $expect:expr) => { + extract!($which, $name, $pat, $expect, Options::default()); + }; + ($which:ident, $name:ident, $pat:expr, $expect:expr, $options:expr) => { + #[test] + fn $name() { + let mut builder = GlobBuilder::new($pat); + if let Some(casei) = $options.casei { + builder.case_insensitive(casei); + } + if let Some(litsep) = $options.litsep { + builder.literal_separator(litsep); + } + if let Some(bsesc) = $options.bsesc { + builder.backslash_escape(bsesc); + } + let pat = builder.build().unwrap(); + assert_eq!($expect, pat.$which()); + } + }; + } + + macro_rules! literal { + ($($tt:tt)*) => { extract!(literal, $($tt)*); } + } + + macro_rules! basetokens { + ($($tt:tt)*) => { extract!(basename_tokens, $($tt)*); } + } + + macro_rules! ext { + ($($tt:tt)*) => { extract!(ext, $($tt)*); } + } + + macro_rules! required_ext { + ($($tt:tt)*) => { extract!(required_ext, $($tt)*); } + } + + macro_rules! prefix { + ($($tt:tt)*) => { extract!(prefix, $($tt)*); } + } + + macro_rules! suffix { + ($($tt:tt)*) => { extract!(suffix, $($tt)*); } + } + + macro_rules! baseliteral { + ($($tt:tt)*) => { extract!(basename_literal, $($tt)*); } + } + + literal!(extract_lit1, "foo", Some(s("foo"))); + literal!(extract_lit2, "foo", None, CASEI); + literal!(extract_lit3, "/foo", Some(s("/foo"))); + literal!(extract_lit4, "/foo/", Some(s("/foo/"))); + literal!(extract_lit5, "/foo/bar", Some(s("/foo/bar"))); + literal!(extract_lit6, "*.foo", None); + literal!(extract_lit7, "foo/bar", Some(s("foo/bar"))); + literal!(extract_lit8, "**/foo/bar", None); + + basetokens!( + extract_basetoks1, + "**/foo", + Some(&*vec![Literal('f'), Literal('o'), Literal('o'),]) + ); + basetokens!(extract_basetoks2, "**/foo", None, CASEI); + basetokens!( + extract_basetoks3, + "**/foo", + Some(&*vec![Literal('f'), Literal('o'), Literal('o'),]), + SLASHLIT + ); + basetokens!(extract_basetoks4, "*foo", None, SLASHLIT); + basetokens!(extract_basetoks5, "*foo", None); + basetokens!(extract_basetoks6, "**/fo*o", None); + basetokens!( + extract_basetoks7, + "**/fo*o", + Some(&*vec![Literal('f'), Literal('o'), ZeroOrMore, Literal('o'),]), + SLASHLIT + ); + + ext!(extract_ext1, "**/*.rs", Some(s(".rs"))); + ext!(extract_ext2, "**/*.rs.bak", None); + ext!(extract_ext3, "*.rs", Some(s(".rs"))); + ext!(extract_ext4, "a*.rs", None); + ext!(extract_ext5, "/*.c", None); + ext!(extract_ext6, "*.c", None, SLASHLIT); + ext!(extract_ext7, "*.c", Some(s(".c"))); + + required_ext!(extract_req_ext1, "*.rs", Some(s(".rs"))); + required_ext!(extract_req_ext2, "/foo/bar/*.rs", Some(s(".rs"))); + required_ext!(extract_req_ext3, "/foo/bar/*.rs", Some(s(".rs"))); + required_ext!(extract_req_ext4, "/foo/bar/.rs", Some(s(".rs"))); + required_ext!(extract_req_ext5, ".rs", Some(s(".rs"))); + required_ext!(extract_req_ext6, "./rs", None); + required_ext!(extract_req_ext7, "foo", None); + required_ext!(extract_req_ext8, ".foo/", None); + required_ext!(extract_req_ext9, "foo/", None); + + prefix!(extract_prefix1, "/foo", Some(s("/foo"))); + prefix!(extract_prefix2, "/foo/*", Some(s("/foo/"))); + prefix!(extract_prefix3, "**/foo", None); + prefix!(extract_prefix4, "foo/**", Some(s("foo/"))); + + suffix!(extract_suffix1, "**/foo/bar", Some((s("/foo/bar"), true))); + suffix!(extract_suffix2, "*/foo/bar", Some((s("/foo/bar"), false))); + suffix!(extract_suffix3, "*/foo/bar", None, SLASHLIT); + suffix!(extract_suffix4, "foo/bar", Some((s("foo/bar"), false))); + suffix!(extract_suffix5, "*.foo", Some((s(".foo"), false))); + suffix!(extract_suffix6, "*.foo", None, SLASHLIT); + suffix!(extract_suffix7, "**/*_test", Some((s("_test"), false))); + + baseliteral!(extract_baselit1, "**/foo", Some(s("foo"))); + baseliteral!(extract_baselit2, "foo", None); + baseliteral!(extract_baselit3, "*foo", None); + baseliteral!(extract_baselit4, "*/foo", None); +} diff --git a/vendor/globset/src/lib.rs b/vendor/globset/src/lib.rs new file mode 100644 index 000000000..c8072b2db --- /dev/null +++ b/vendor/globset/src/lib.rs @@ -0,0 +1,912 @@ +/*! +The globset crate provides cross platform single glob and glob set matching. + +Glob set matching is the process of matching one or more glob patterns against +a single candidate path simultaneously, and returning all of the globs that +matched. For example, given this set of globs: + +```ignore +*.rs +src/lib.rs +src/**/foo.rs +``` + +and a path `src/bar/baz/foo.rs`, then the set would report the first and third +globs as matching. + +# Example: one glob + +This example shows how to match a single glob against a single file path. + +``` +# fn example() -> Result<(), globset::Error> { +use globset::Glob; + +let glob = Glob::new("*.rs")?.compile_matcher(); + +assert!(glob.is_match("foo.rs")); +assert!(glob.is_match("foo/bar.rs")); +assert!(!glob.is_match("Cargo.toml")); +# Ok(()) } example().unwrap(); +``` + +# Example: configuring a glob matcher + +This example shows how to use a `GlobBuilder` to configure aspects of match +semantics. In this example, we prevent wildcards from matching path separators. + +``` +# fn example() -> Result<(), globset::Error> { +use globset::GlobBuilder; + +let glob = GlobBuilder::new("*.rs") + .literal_separator(true).build()?.compile_matcher(); + +assert!(glob.is_match("foo.rs")); +assert!(!glob.is_match("foo/bar.rs")); // no longer matches +assert!(!glob.is_match("Cargo.toml")); +# Ok(()) } example().unwrap(); +``` + +# Example: match multiple globs at once + +This example shows how to match multiple glob patterns at once. + +``` +# fn example() -> Result<(), globset::Error> { +use globset::{Glob, GlobSetBuilder}; + +let mut builder = GlobSetBuilder::new(); +// A GlobBuilder can be used to configure each glob's match semantics +// independently. +builder.add(Glob::new("*.rs")?); +builder.add(Glob::new("src/lib.rs")?); +builder.add(Glob::new("src/**/foo.rs")?); +let set = builder.build()?; + +assert_eq!(set.matches("src/bar/baz/foo.rs"), vec![0, 2]); +# Ok(()) } example().unwrap(); +``` + +# Syntax + +Standard Unix-style glob syntax is supported: + +* `?` matches any single character. (If the `literal_separator` option is + enabled, then `?` can never match a path separator.) +* `*` matches zero or more characters. (If the `literal_separator` option is + enabled, then `*` can never match a path separator.) +* `**` recursively matches directories but are only legal in three situations. + First, if the glob starts with <code>\*\*/</code>, then it matches + all directories. For example, <code>\*\*/foo</code> matches `foo` + and `bar/foo` but not `foo/bar`. Secondly, if the glob ends with + <code>/\*\*</code>, then it matches all sub-entries. For example, + <code>foo/\*\*</code> matches `foo/a` and `foo/a/b`, but not `foo`. + Thirdly, if the glob contains <code>/\*\*/</code> anywhere within + the pattern, then it matches zero or more directories. Using `**` anywhere + else is illegal (N.B. the glob `**` is allowed and means "match everything"). +* `{a,b}` matches `a` or `b` where `a` and `b` are arbitrary glob patterns. + (N.B. Nesting `{...}` is not currently allowed.) +* `[ab]` matches `a` or `b` where `a` and `b` are characters. Use + `[!ab]` to match any character except for `a` and `b`. +* Metacharacters such as `*` and `?` can be escaped with character class + notation. e.g., `[*]` matches `*`. +* When backslash escapes are enabled, a backslash (`\`) will escape all meta + characters in a glob. If it precedes a non-meta character, then the slash is + ignored. A `\\` will match a literal `\\`. Note that this mode is only + enabled on Unix platforms by default, but can be enabled on any platform + via the `backslash_escape` setting on `Glob`. + +A `GlobBuilder` can be used to prevent wildcards from matching path separators, +or to enable case insensitive matching. +*/ + +#![deny(missing_docs)] + +use std::borrow::Cow; +use std::collections::{BTreeMap, HashMap}; +use std::error::Error as StdError; +use std::fmt; +use std::hash; +use std::path::Path; +use std::str; + +use aho_corasick::AhoCorasick; +use bstr::{ByteSlice, ByteVec, B}; +use regex::bytes::{Regex, RegexBuilder, RegexSet}; + +use crate::glob::MatchStrategy; +pub use crate::glob::{Glob, GlobBuilder, GlobMatcher}; +use crate::pathutil::{file_name, file_name_ext, normalize_path}; + +mod glob; +mod pathutil; + +#[cfg(feature = "serde1")] +mod serde_impl; + +#[cfg(feature = "log")] +macro_rules! debug { + ($($token:tt)*) => (::log::debug!($($token)*);) +} + +#[cfg(not(feature = "log"))] +macro_rules! debug { + ($($token:tt)*) => {}; +} + +/// Represents an error that can occur when parsing a glob pattern. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Error { + /// The original glob provided by the caller. + glob: Option<String>, + /// The kind of error. + kind: ErrorKind, +} + +/// The kind of error that can occur when parsing a glob pattern. +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum ErrorKind { + /// **DEPRECATED**. + /// + /// This error used to occur for consistency with git's glob specification, + /// but the specification now accepts all uses of `**`. When `**` does not + /// appear adjacent to a path separator or at the beginning/end of a glob, + /// it is now treated as two consecutive `*` patterns. As such, this error + /// is no longer used. + InvalidRecursive, + /// Occurs when a character class (e.g., `[abc]`) is not closed. + UnclosedClass, + /// Occurs when a range in a character (e.g., `[a-z]`) is invalid. For + /// example, if the range starts with a lexicographically larger character + /// than it ends with. + InvalidRange(char, char), + /// Occurs when a `}` is found without a matching `{`. + UnopenedAlternates, + /// Occurs when a `{` is found without a matching `}`. + UnclosedAlternates, + /// Occurs when an alternating group is nested inside another alternating + /// group, e.g., `{{a,b},{c,d}}`. + NestedAlternates, + /// Occurs when an unescaped '\' is found at the end of a glob. + DanglingEscape, + /// An error associated with parsing or compiling a regex. + Regex(String), + /// Hints that destructuring should not be exhaustive. + /// + /// This enum may grow additional variants, so this makes sure clients + /// don't count on exhaustive matching. (Otherwise, adding a new variant + /// could break existing code.) + #[doc(hidden)] + __Nonexhaustive, +} + +impl StdError for Error { + fn description(&self) -> &str { + self.kind.description() + } +} + +impl Error { + /// Return the glob that caused this error, if one exists. + pub fn glob(&self) -> Option<&str> { + self.glob.as_ref().map(|s| &**s) + } + + /// Return the kind of this error. + pub fn kind(&self) -> &ErrorKind { + &self.kind + } +} + +impl ErrorKind { + fn description(&self) -> &str { + match *self { + ErrorKind::InvalidRecursive => { + "invalid use of **; must be one path component" + } + ErrorKind::UnclosedClass => { + "unclosed character class; missing ']'" + } + ErrorKind::InvalidRange(_, _) => "invalid character range", + ErrorKind::UnopenedAlternates => { + "unopened alternate group; missing '{' \ + (maybe escape '}' with '[}]'?)" + } + ErrorKind::UnclosedAlternates => { + "unclosed alternate group; missing '}' \ + (maybe escape '{' with '[{]'?)" + } + ErrorKind::NestedAlternates => { + "nested alternate groups are not allowed" + } + ErrorKind::DanglingEscape => "dangling '\\'", + ErrorKind::Regex(ref err) => err, + ErrorKind::__Nonexhaustive => unreachable!(), + } + } +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.glob { + None => self.kind.fmt(f), + Some(ref glob) => { + write!(f, "error parsing glob '{}': {}", glob, self.kind) + } + } + } +} + +impl fmt::Display for ErrorKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match *self { + ErrorKind::InvalidRecursive + | ErrorKind::UnclosedClass + | ErrorKind::UnopenedAlternates + | ErrorKind::UnclosedAlternates + | ErrorKind::NestedAlternates + | ErrorKind::DanglingEscape + | ErrorKind::Regex(_) => write!(f, "{}", self.description()), + ErrorKind::InvalidRange(s, e) => { + write!(f, "invalid range; '{}' > '{}'", s, e) + } + ErrorKind::__Nonexhaustive => unreachable!(), + } + } +} + +fn new_regex(pat: &str) -> Result<Regex, Error> { + RegexBuilder::new(pat) + .dot_matches_new_line(true) + .size_limit(10 * (1 << 20)) + .dfa_size_limit(10 * (1 << 20)) + .build() + .map_err(|err| Error { + glob: Some(pat.to_string()), + kind: ErrorKind::Regex(err.to_string()), + }) +} + +fn new_regex_set<I, S>(pats: I) -> Result<RegexSet, Error> +where + S: AsRef<str>, + I: IntoIterator<Item = S>, +{ + RegexSet::new(pats).map_err(|err| Error { + glob: None, + kind: ErrorKind::Regex(err.to_string()), + }) +} + +type Fnv = hash::BuildHasherDefault<fnv::FnvHasher>; + +/// GlobSet represents a group of globs that can be matched together in a +/// single pass. +#[derive(Clone, Debug)] +pub struct GlobSet { + len: usize, + strats: Vec<GlobSetMatchStrategy>, +} + +impl GlobSet { + /// Create an empty `GlobSet`. An empty set matches nothing. + #[inline] + pub fn empty() -> GlobSet { + GlobSet { len: 0, strats: vec![] } + } + + /// Returns true if this set is empty, and therefore matches nothing. + #[inline] + pub fn is_empty(&self) -> bool { + self.len == 0 + } + + /// Returns the number of globs in this set. + #[inline] + pub fn len(&self) -> usize { + self.len + } + + /// Returns true if any glob in this set matches the path given. + pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool { + self.is_match_candidate(&Candidate::new(path.as_ref())) + } + + /// Returns true if any glob in this set matches the path given. + /// + /// This takes a Candidate as input, which can be used to amortize the + /// cost of preparing a path for matching. + pub fn is_match_candidate(&self, path: &Candidate<'_>) -> bool { + if self.is_empty() { + return false; + } + for strat in &self.strats { + if strat.is_match(path) { + return true; + } + } + false + } + + /// Returns the sequence number of every glob pattern that matches the + /// given path. + pub fn matches<P: AsRef<Path>>(&self, path: P) -> Vec<usize> { + self.matches_candidate(&Candidate::new(path.as_ref())) + } + + /// Returns the sequence number of every glob pattern that matches the + /// given path. + /// + /// This takes a Candidate as input, which can be used to amortize the + /// cost of preparing a path for matching. + pub fn matches_candidate(&self, path: &Candidate<'_>) -> Vec<usize> { + let mut into = vec![]; + if self.is_empty() { + return into; + } + self.matches_candidate_into(path, &mut into); + into + } + + /// Adds the sequence number of every glob pattern that matches the given + /// path to the vec given. + /// + /// `into` is cleared before matching begins, and contains the set of + /// sequence numbers (in ascending order) after matching ends. If no globs + /// were matched, then `into` will be empty. + pub fn matches_into<P: AsRef<Path>>( + &self, + path: P, + into: &mut Vec<usize>, + ) { + self.matches_candidate_into(&Candidate::new(path.as_ref()), into); + } + + /// Adds the sequence number of every glob pattern that matches the given + /// path to the vec given. + /// + /// `into` is cleared before matching begins, and contains the set of + /// sequence numbers (in ascending order) after matching ends. If no globs + /// were matched, then `into` will be empty. + /// + /// This takes a Candidate as input, which can be used to amortize the + /// cost of preparing a path for matching. + pub fn matches_candidate_into( + &self, + path: &Candidate<'_>, + into: &mut Vec<usize>, + ) { + into.clear(); + if self.is_empty() { + return; + } + for strat in &self.strats { + strat.matches_into(path, into); + } + into.sort(); + into.dedup(); + } + + fn new(pats: &[Glob]) -> Result<GlobSet, Error> { + if pats.is_empty() { + return Ok(GlobSet { len: 0, strats: vec![] }); + } + let mut lits = LiteralStrategy::new(); + let mut base_lits = BasenameLiteralStrategy::new(); + let mut exts = ExtensionStrategy::new(); + let mut prefixes = MultiStrategyBuilder::new(); + let mut suffixes = MultiStrategyBuilder::new(); + let mut required_exts = RequiredExtensionStrategyBuilder::new(); + let mut regexes = MultiStrategyBuilder::new(); + for (i, p) in pats.iter().enumerate() { + match MatchStrategy::new(p) { + MatchStrategy::Literal(lit) => { + lits.add(i, lit); + } + MatchStrategy::BasenameLiteral(lit) => { + base_lits.add(i, lit); + } + MatchStrategy::Extension(ext) => { + exts.add(i, ext); + } + MatchStrategy::Prefix(prefix) => { + prefixes.add(i, prefix); + } + MatchStrategy::Suffix { suffix, component } => { + if component { + lits.add(i, suffix[1..].to_string()); + } + suffixes.add(i, suffix); + } + MatchStrategy::RequiredExtension(ext) => { + required_exts.add(i, ext, p.regex().to_owned()); + } + MatchStrategy::Regex => { + debug!("glob converted to regex: {:?}", p); + regexes.add(i, p.regex().to_owned()); + } + } + } + debug!( + "built glob set; {} literals, {} basenames, {} extensions, \ + {} prefixes, {} suffixes, {} required extensions, {} regexes", + lits.0.len(), + base_lits.0.len(), + exts.0.len(), + prefixes.literals.len(), + suffixes.literals.len(), + required_exts.0.len(), + regexes.literals.len() + ); + Ok(GlobSet { + len: pats.len(), + strats: vec![ + GlobSetMatchStrategy::Extension(exts), + GlobSetMatchStrategy::BasenameLiteral(base_lits), + GlobSetMatchStrategy::Literal(lits), + GlobSetMatchStrategy::Suffix(suffixes.suffix()), + GlobSetMatchStrategy::Prefix(prefixes.prefix()), + GlobSetMatchStrategy::RequiredExtension( + required_exts.build()?, + ), + GlobSetMatchStrategy::Regex(regexes.regex_set()?), + ], + }) + } +} + +impl Default for GlobSet { + /// Create a default empty GlobSet. + fn default() -> Self { + GlobSet::empty() + } +} + +/// GlobSetBuilder builds a group of patterns that can be used to +/// simultaneously match a file path. +#[derive(Clone, Debug)] +pub struct GlobSetBuilder { + pats: Vec<Glob>, +} + +impl GlobSetBuilder { + /// Create a new GlobSetBuilder. A GlobSetBuilder can be used to add new + /// patterns. Once all patterns have been added, `build` should be called + /// to produce a `GlobSet`, which can then be used for matching. + pub fn new() -> GlobSetBuilder { + GlobSetBuilder { pats: vec![] } + } + + /// Builds a new matcher from all of the glob patterns added so far. + /// + /// Once a matcher is built, no new patterns can be added to it. + pub fn build(&self) -> Result<GlobSet, Error> { + GlobSet::new(&self.pats) + } + + /// Add a new pattern to this set. + pub fn add(&mut self, pat: Glob) -> &mut GlobSetBuilder { + self.pats.push(pat); + self + } +} + +/// A candidate path for matching. +/// +/// All glob matching in this crate operates on `Candidate` values. +/// Constructing candidates has a very small cost associated with it, so +/// callers may find it beneficial to amortize that cost when matching a single +/// path against multiple globs or sets of globs. +#[derive(Clone, Debug)] +pub struct Candidate<'a> { + path: Cow<'a, [u8]>, + basename: Cow<'a, [u8]>, + ext: Cow<'a, [u8]>, +} + +impl<'a> Candidate<'a> { + /// Create a new candidate for matching from the given path. + pub fn new<P: AsRef<Path> + ?Sized>(path: &'a P) -> Candidate<'a> { + let path = normalize_path(Vec::from_path_lossy(path.as_ref())); + let basename = file_name(&path).unwrap_or(Cow::Borrowed(B(""))); + let ext = file_name_ext(&basename).unwrap_or(Cow::Borrowed(B(""))); + Candidate { path: path, basename: basename, ext: ext } + } + + fn path_prefix(&self, max: usize) -> &[u8] { + if self.path.len() <= max { + &*self.path + } else { + &self.path[..max] + } + } + + fn path_suffix(&self, max: usize) -> &[u8] { + if self.path.len() <= max { + &*self.path + } else { + &self.path[self.path.len() - max..] + } + } +} + +#[derive(Clone, Debug)] +enum GlobSetMatchStrategy { + Literal(LiteralStrategy), + BasenameLiteral(BasenameLiteralStrategy), + Extension(ExtensionStrategy), + Prefix(PrefixStrategy), + Suffix(SuffixStrategy), + RequiredExtension(RequiredExtensionStrategy), + Regex(RegexSetStrategy), +} + +impl GlobSetMatchStrategy { + fn is_match(&self, candidate: &Candidate<'_>) -> bool { + use self::GlobSetMatchStrategy::*; + match *self { + Literal(ref s) => s.is_match(candidate), + BasenameLiteral(ref s) => s.is_match(candidate), + Extension(ref s) => s.is_match(candidate), + Prefix(ref s) => s.is_match(candidate), + Suffix(ref s) => s.is_match(candidate), + RequiredExtension(ref s) => s.is_match(candidate), + Regex(ref s) => s.is_match(candidate), + } + } + + fn matches_into( + &self, + candidate: &Candidate<'_>, + matches: &mut Vec<usize>, + ) { + use self::GlobSetMatchStrategy::*; + match *self { + Literal(ref s) => s.matches_into(candidate, matches), + BasenameLiteral(ref s) => s.matches_into(candidate, matches), + Extension(ref s) => s.matches_into(candidate, matches), + Prefix(ref s) => s.matches_into(candidate, matches), + Suffix(ref s) => s.matches_into(candidate, matches), + RequiredExtension(ref s) => s.matches_into(candidate, matches), + Regex(ref s) => s.matches_into(candidate, matches), + } + } +} + +#[derive(Clone, Debug)] +struct LiteralStrategy(BTreeMap<Vec<u8>, Vec<usize>>); + +impl LiteralStrategy { + fn new() -> LiteralStrategy { + LiteralStrategy(BTreeMap::new()) + } + + fn add(&mut self, global_index: usize, lit: String) { + self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index); + } + + fn is_match(&self, candidate: &Candidate<'_>) -> bool { + self.0.contains_key(candidate.path.as_bytes()) + } + + #[inline(never)] + fn matches_into( + &self, + candidate: &Candidate<'_>, + matches: &mut Vec<usize>, + ) { + if let Some(hits) = self.0.get(candidate.path.as_bytes()) { + matches.extend(hits); + } + } +} + +#[derive(Clone, Debug)] +struct BasenameLiteralStrategy(BTreeMap<Vec<u8>, Vec<usize>>); + +impl BasenameLiteralStrategy { + fn new() -> BasenameLiteralStrategy { + BasenameLiteralStrategy(BTreeMap::new()) + } + + fn add(&mut self, global_index: usize, lit: String) { + self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index); + } + + fn is_match(&self, candidate: &Candidate<'_>) -> bool { + if candidate.basename.is_empty() { + return false; + } + self.0.contains_key(candidate.basename.as_bytes()) + } + + #[inline(never)] + fn matches_into( + &self, + candidate: &Candidate<'_>, + matches: &mut Vec<usize>, + ) { + if candidate.basename.is_empty() { + return; + } + if let Some(hits) = self.0.get(candidate.basename.as_bytes()) { + matches.extend(hits); + } + } +} + +#[derive(Clone, Debug)] +struct ExtensionStrategy(HashMap<Vec<u8>, Vec<usize>, Fnv>); + +impl ExtensionStrategy { + fn new() -> ExtensionStrategy { + ExtensionStrategy(HashMap::with_hasher(Fnv::default())) + } + + fn add(&mut self, global_index: usize, ext: String) { + self.0.entry(ext.into_bytes()).or_insert(vec![]).push(global_index); + } + + fn is_match(&self, candidate: &Candidate<'_>) -> bool { + if candidate.ext.is_empty() { + return false; + } + self.0.contains_key(candidate.ext.as_bytes()) + } + + #[inline(never)] + fn matches_into( + &self, + candidate: &Candidate<'_>, + matches: &mut Vec<usize>, + ) { + if candidate.ext.is_empty() { + return; + } + if let Some(hits) = self.0.get(candidate.ext.as_bytes()) { + matches.extend(hits); + } + } +} + +#[derive(Clone, Debug)] +struct PrefixStrategy { + matcher: AhoCorasick, + map: Vec<usize>, + longest: usize, +} + +impl PrefixStrategy { + fn is_match(&self, candidate: &Candidate<'_>) -> bool { + let path = candidate.path_prefix(self.longest); + for m in self.matcher.find_overlapping_iter(path) { + if m.start() == 0 { + return true; + } + } + false + } + + fn matches_into( + &self, + candidate: &Candidate<'_>, + matches: &mut Vec<usize>, + ) { + let path = candidate.path_prefix(self.longest); + for m in self.matcher.find_overlapping_iter(path) { + if m.start() == 0 { + matches.push(self.map[m.pattern()]); + } + } + } +} + +#[derive(Clone, Debug)] +struct SuffixStrategy { + matcher: AhoCorasick, + map: Vec<usize>, + longest: usize, +} + +impl SuffixStrategy { + fn is_match(&self, candidate: &Candidate<'_>) -> bool { + let path = candidate.path_suffix(self.longest); + for m in self.matcher.find_overlapping_iter(path) { + if m.end() == path.len() { + return true; + } + } + false + } + + fn matches_into( + &self, + candidate: &Candidate<'_>, + matches: &mut Vec<usize>, + ) { + let path = candidate.path_suffix(self.longest); + for m in self.matcher.find_overlapping_iter(path) { + if m.end() == path.len() { + matches.push(self.map[m.pattern()]); + } + } + } +} + +#[derive(Clone, Debug)] +struct RequiredExtensionStrategy(HashMap<Vec<u8>, Vec<(usize, Regex)>, Fnv>); + +impl RequiredExtensionStrategy { + fn is_match(&self, candidate: &Candidate<'_>) -> bool { + if candidate.ext.is_empty() { + return false; + } + match self.0.get(candidate.ext.as_bytes()) { + None => false, + Some(regexes) => { + for &(_, ref re) in regexes { + if re.is_match(candidate.path.as_bytes()) { + return true; + } + } + false + } + } + } + + #[inline(never)] + fn matches_into( + &self, + candidate: &Candidate<'_>, + matches: &mut Vec<usize>, + ) { + if candidate.ext.is_empty() { + return; + } + if let Some(regexes) = self.0.get(candidate.ext.as_bytes()) { + for &(global_index, ref re) in regexes { + if re.is_match(candidate.path.as_bytes()) { + matches.push(global_index); + } + } + } + } +} + +#[derive(Clone, Debug)] +struct RegexSetStrategy { + matcher: RegexSet, + map: Vec<usize>, +} + +impl RegexSetStrategy { + fn is_match(&self, candidate: &Candidate<'_>) -> bool { + self.matcher.is_match(candidate.path.as_bytes()) + } + + fn matches_into( + &self, + candidate: &Candidate<'_>, + matches: &mut Vec<usize>, + ) { + for i in self.matcher.matches(candidate.path.as_bytes()) { + matches.push(self.map[i]); + } + } +} + +#[derive(Clone, Debug)] +struct MultiStrategyBuilder { + literals: Vec<String>, + map: Vec<usize>, + longest: usize, +} + +impl MultiStrategyBuilder { + fn new() -> MultiStrategyBuilder { + MultiStrategyBuilder { literals: vec![], map: vec![], longest: 0 } + } + + fn add(&mut self, global_index: usize, literal: String) { + if literal.len() > self.longest { + self.longest = literal.len(); + } + self.map.push(global_index); + self.literals.push(literal); + } + + fn prefix(self) -> PrefixStrategy { + PrefixStrategy { + matcher: AhoCorasick::new_auto_configured(&self.literals), + map: self.map, + longest: self.longest, + } + } + + fn suffix(self) -> SuffixStrategy { + SuffixStrategy { + matcher: AhoCorasick::new_auto_configured(&self.literals), + map: self.map, + longest: self.longest, + } + } + + fn regex_set(self) -> Result<RegexSetStrategy, Error> { + Ok(RegexSetStrategy { + matcher: new_regex_set(self.literals)?, + map: self.map, + }) + } +} + +#[derive(Clone, Debug)] +struct RequiredExtensionStrategyBuilder( + HashMap<Vec<u8>, Vec<(usize, String)>>, +); + +impl RequiredExtensionStrategyBuilder { + fn new() -> RequiredExtensionStrategyBuilder { + RequiredExtensionStrategyBuilder(HashMap::new()) + } + + fn add(&mut self, global_index: usize, ext: String, regex: String) { + self.0 + .entry(ext.into_bytes()) + .or_insert(vec![]) + .push((global_index, regex)); + } + + fn build(self) -> Result<RequiredExtensionStrategy, Error> { + let mut exts = HashMap::with_hasher(Fnv::default()); + for (ext, regexes) in self.0.into_iter() { + exts.insert(ext.clone(), vec![]); + for (global_index, regex) in regexes { + let compiled = new_regex(®ex)?; + exts.get_mut(&ext).unwrap().push((global_index, compiled)); + } + } + Ok(RequiredExtensionStrategy(exts)) + } +} + +#[cfg(test)] +mod tests { + use super::{GlobSet, GlobSetBuilder}; + use crate::glob::Glob; + + #[test] + fn set_works() { + let mut builder = GlobSetBuilder::new(); + builder.add(Glob::new("src/**/*.rs").unwrap()); + builder.add(Glob::new("*.c").unwrap()); + builder.add(Glob::new("src/lib.rs").unwrap()); + let set = builder.build().unwrap(); + + assert!(set.is_match("foo.c")); + assert!(set.is_match("src/foo.c")); + assert!(!set.is_match("foo.rs")); + assert!(!set.is_match("tests/foo.rs")); + assert!(set.is_match("src/foo.rs")); + assert!(set.is_match("src/grep/src/main.rs")); + + let matches = set.matches("src/lib.rs"); + assert_eq!(2, matches.len()); + assert_eq!(0, matches[0]); + assert_eq!(2, matches[1]); + } + + #[test] + fn empty_set_works() { + let set = GlobSetBuilder::new().build().unwrap(); + assert!(!set.is_match("")); + assert!(!set.is_match("a")); + } + + #[test] + fn default_set_is_empty_works() { + let set: GlobSet = Default::default(); + assert!(!set.is_match("")); + assert!(!set.is_match("a")); + } +} diff --git a/vendor/globset/src/pathutil.rs b/vendor/globset/src/pathutil.rs new file mode 100644 index 000000000..2bd34e1dd --- /dev/null +++ b/vendor/globset/src/pathutil.rs @@ -0,0 +1,129 @@ +use std::borrow::Cow; + +use bstr::{ByteSlice, ByteVec}; + +/// The final component of the path, if it is a normal file. +/// +/// If the path terminates in ., .., or consists solely of a root of prefix, +/// file_name will return None. +pub fn file_name<'a>(path: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> { + if path.is_empty() { + return None; + } else if path.last_byte() == Some(b'.') { + return None; + } + let last_slash = path.rfind_byte(b'/').map(|i| i + 1).unwrap_or(0); + Some(match *path { + Cow::Borrowed(path) => Cow::Borrowed(&path[last_slash..]), + Cow::Owned(ref path) => { + let mut path = path.clone(); + path.drain_bytes(..last_slash); + Cow::Owned(path) + } + }) +} + +/// Return a file extension given a path's file name. +/// +/// Note that this does NOT match the semantics of std::path::Path::extension. +/// Namely, the extension includes the `.` and matching is otherwise more +/// liberal. Specifically, the extenion is: +/// +/// * None, if the file name given is empty; +/// * None, if there is no embedded `.`; +/// * Otherwise, the portion of the file name starting with the final `.`. +/// +/// e.g., A file name of `.rs` has an extension `.rs`. +/// +/// N.B. This is done to make certain glob match optimizations easier. Namely, +/// a pattern like `*.rs` is obviously trying to match files with a `rs` +/// extension, but it also matches files like `.rs`, which doesn't have an +/// extension according to std::path::Path::extension. +pub fn file_name_ext<'a>(name: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> { + if name.is_empty() { + return None; + } + let last_dot_at = match name.rfind_byte(b'.') { + None => return None, + Some(i) => i, + }; + Some(match *name { + Cow::Borrowed(name) => Cow::Borrowed(&name[last_dot_at..]), + Cow::Owned(ref name) => { + let mut name = name.clone(); + name.drain_bytes(..last_dot_at); + Cow::Owned(name) + } + }) +} + +/// Normalizes a path to use `/` as a separator everywhere, even on platforms +/// that recognize other characters as separators. +#[cfg(unix)] +pub fn normalize_path(path: Cow<'_, [u8]>) -> Cow<'_, [u8]> { + // UNIX only uses /, so we're good. + path +} + +/// Normalizes a path to use `/` as a separator everywhere, even on platforms +/// that recognize other characters as separators. +#[cfg(not(unix))] +pub fn normalize_path(mut path: Cow<[u8]>) -> Cow<[u8]> { + use std::path::is_separator; + + for i in 0..path.len() { + if path[i] == b'/' || !is_separator(path[i] as char) { + continue; + } + path.to_mut()[i] = b'/'; + } + path +} + +#[cfg(test)] +mod tests { + use std::borrow::Cow; + + use bstr::{ByteVec, B}; + + use super::{file_name_ext, normalize_path}; + + macro_rules! ext { + ($name:ident, $file_name:expr, $ext:expr) => { + #[test] + fn $name() { + let bs = Vec::from($file_name); + let got = file_name_ext(&Cow::Owned(bs)); + assert_eq!($ext.map(|s| Cow::Borrowed(B(s))), got); + } + }; + } + + ext!(ext1, "foo.rs", Some(".rs")); + ext!(ext2, ".rs", Some(".rs")); + ext!(ext3, "..rs", Some(".rs")); + ext!(ext4, "", None::<&str>); + ext!(ext5, "foo", None::<&str>); + + macro_rules! normalize { + ($name:ident, $path:expr, $expected:expr) => { + #[test] + fn $name() { + let bs = Vec::from_slice($path); + let got = normalize_path(Cow::Owned(bs)); + assert_eq!($expected.to_vec(), got.into_owned()); + } + }; + } + + normalize!(normal1, b"foo", b"foo"); + normalize!(normal2, b"foo/bar", b"foo/bar"); + #[cfg(unix)] + normalize!(normal3, b"foo\\bar", b"foo\\bar"); + #[cfg(not(unix))] + normalize!(normal3, b"foo\\bar", b"foo/bar"); + #[cfg(unix)] + normalize!(normal4, b"foo\\bar/baz", b"foo\\bar/baz"); + #[cfg(not(unix))] + normalize!(normal4, b"foo\\bar/baz", b"foo/bar/baz"); +} diff --git a/vendor/globset/src/serde_impl.rs b/vendor/globset/src/serde_impl.rs new file mode 100644 index 000000000..6affc5904 --- /dev/null +++ b/vendor/globset/src/serde_impl.rs @@ -0,0 +1,38 @@ +use serde::de::Error; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; + +use crate::Glob; + +impl Serialize for Glob { + fn serialize<S: Serializer>( + &self, + serializer: S, + ) -> Result<S::Ok, S::Error> { + serializer.serialize_str(self.glob()) + } +} + +impl<'de> Deserialize<'de> for Glob { + fn deserialize<D: Deserializer<'de>>( + deserializer: D, + ) -> Result<Self, D::Error> { + let glob = <&str as Deserialize>::deserialize(deserializer)?; + Glob::new(glob).map_err(D::Error::custom) + } +} + +#[cfg(test)] +mod tests { + use Glob; + + #[test] + fn glob_json_works() { + let test_glob = Glob::new("src/**/*.rs").unwrap(); + + let ser = serde_json::to_string(&test_glob).unwrap(); + assert_eq!(ser, "\"src/**/*.rs\""); + + let de: Glob = serde_json::from_str(&ser).unwrap(); + assert_eq!(test_glob, de); + } +} |