diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-06-19 09:25:56 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-06-19 09:25:56 +0000 |
commit | 018c4950b9406055dec02ef0fb52f132e2bb1e2c (patch) | |
tree | a835ebdf2088ef88fa681f8fad45f09922c1ae9a /vendor/globset/src | |
parent | Adding debian version 1.75.0+dfsg1-5. (diff) | |
download | rustc-018c4950b9406055dec02ef0fb52f132e2bb1e2c.tar.xz rustc-018c4950b9406055dec02ef0fb52f132e2bb1e2c.zip |
Merging upstream version 1.76.0+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/globset/src')
-rw-r--r-- | vendor/globset/src/fnv.rs | 30 | ||||
-rw-r--r-- | vendor/globset/src/glob.rs | 115 | ||||
-rw-r--r-- | vendor/globset/src/lib.rs | 195 | ||||
-rw-r--r-- | vendor/globset/src/pathutil.rs | 20 |
4 files changed, 220 insertions, 140 deletions
diff --git a/vendor/globset/src/fnv.rs b/vendor/globset/src/fnv.rs new file mode 100644 index 000000000..91174e20b --- /dev/null +++ b/vendor/globset/src/fnv.rs @@ -0,0 +1,30 @@ +/// A convenience alias for creating a hash map with an FNV hasher. +pub(crate) type HashMap<K, V> = + std::collections::HashMap<K, V, std::hash::BuildHasherDefault<Hasher>>; + +/// A hasher that implements the Fowler–Noll–Vo (FNV) hash. +pub(crate) struct Hasher(u64); + +impl Hasher { + const OFFSET_BASIS: u64 = 0xcbf29ce484222325; + const PRIME: u64 = 0x100000001b3; +} + +impl Default for Hasher { + fn default() -> Hasher { + Hasher(Hasher::OFFSET_BASIS) + } +} + +impl std::hash::Hasher for Hasher { + fn finish(&self) -> u64 { + self.0 + } + + fn write(&mut self, bytes: &[u8]) { + for &byte in bytes.iter() { + self.0 = self.0 ^ u64::from(byte); + self.0 = self.0.wrapping_mul(Hasher::PRIME); + } + } +} diff --git a/vendor/globset/src/glob.rs b/vendor/globset/src/glob.rs index d19c70ed2..83c08344c 100644 --- a/vendor/globset/src/glob.rs +++ b/vendor/globset/src/glob.rs @@ -1,12 +1,6 @@ -use std::fmt; -use std::hash; -use std::iter; -use std::ops::{Deref, DerefMut}; use std::path::{is_separator, Path}; -use std::str; -use regex; -use regex::bytes::Regex; +use regex_automata::meta::Regex; use crate::{new_regex, Candidate, Error, ErrorKind}; @@ -18,7 +12,7 @@ use crate::{new_regex, Candidate, Error, ErrorKind}; /// possible to test whether any of those patterns matches by looking up a /// file path's extension in a hash table. #[derive(Clone, Debug, Eq, PartialEq)] -pub enum MatchStrategy { +pub(crate) enum MatchStrategy { /// A pattern matches if and only if the entire file path matches this /// literal string. Literal(String), @@ -53,7 +47,7 @@ pub enum MatchStrategy { impl MatchStrategy { /// Returns a matching strategy for the given pattern. - pub fn new(pat: &Glob) -> MatchStrategy { + pub(crate) fn new(pat: &Glob) -> MatchStrategy { if let Some(lit) = pat.basename_literal() { MatchStrategy::BasenameLiteral(lit) } else if let Some(lit) = pat.literal() { @@ -63,7 +57,7 @@ impl MatchStrategy { } else if let Some(prefix) = pat.prefix() { MatchStrategy::Prefix(prefix) } else if let Some((suffix, component)) = pat.suffix() { - MatchStrategy::Suffix { suffix: suffix, component: component } + MatchStrategy::Suffix { suffix, component } } else if let Some(ext) = pat.required_ext() { MatchStrategy::RequiredExtension(ext) } else { @@ -90,20 +84,20 @@ impl PartialEq for Glob { } } -impl hash::Hash for Glob { - fn hash<H: hash::Hasher>(&self, state: &mut H) { +impl std::hash::Hash for Glob { + fn hash<H: std::hash::Hasher>(&self, state: &mut H) { self.glob.hash(state); self.opts.hash(state); } } -impl fmt::Display for Glob { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl std::fmt::Display for Glob { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { self.glob.fmt(f) } } -impl str::FromStr for Glob { +impl std::str::FromStr for Glob { type Err = Error; fn from_str(glob: &str) -> Result<Self, Self::Err> { @@ -227,14 +221,14 @@ impl GlobOptions { #[derive(Clone, Debug, Default, Eq, PartialEq)] struct Tokens(Vec<Token>); -impl Deref for Tokens { +impl std::ops::Deref for Tokens { type Target = Vec<Token>; fn deref(&self) -> &Vec<Token> { &self.0 } } -impl DerefMut for Tokens { +impl std::ops::DerefMut for Tokens { fn deref_mut(&mut self) -> &mut Vec<Token> { &mut self.0 } @@ -262,7 +256,7 @@ impl Glob { pub fn compile_matcher(&self) -> GlobMatcher { let re = new_regex(&self.re).expect("regex compilation shouldn't fail"); - GlobMatcher { pat: self.clone(), re: re } + GlobMatcher { pat: self.clone(), re } } /// Returns a strategic matcher. @@ -275,7 +269,7 @@ impl Glob { let strategy = MatchStrategy::new(self); let re = new_regex(&self.re).expect("regex compilation shouldn't fail"); - GlobStrategic { strategy: strategy, re: re } + GlobStrategic { strategy, re } } /// Returns the original glob pattern used to build this pattern. @@ -311,10 +305,8 @@ impl Glob { } let mut lit = String::new(); for t in &*self.tokens { - match *t { - Token::Literal(c) => lit.push(c), - _ => return None, - } + let Token::Literal(c) = *t else { return None }; + lit.push(c); } if lit.is_empty() { None @@ -334,13 +326,12 @@ impl Glob { if self.opts.case_insensitive { return None; } - let start = match self.tokens.get(0) { - Some(&Token::RecursivePrefix) => 1, - Some(_) => 0, - _ => return None, + let start = match *self.tokens.get(0)? { + Token::RecursivePrefix => 1, + _ => 0, }; - match self.tokens.get(start) { - Some(&Token::ZeroOrMore) => { + match *self.tokens.get(start)? { + Token::ZeroOrMore => { // If there was no recursive prefix, then we only permit // `*` if `*` can match a `/`. For example, if `*` can't // match `/`, then `*.c` doesn't match `foo/bar.c`. @@ -350,8 +341,8 @@ impl Glob { } _ => return None, } - match self.tokens.get(start + 1) { - Some(&Token::Literal('.')) => {} + match *self.tokens.get(start + 1)? { + Token::Literal('.') => {} _ => return None, } let mut lit = ".".to_string(); @@ -405,8 +396,8 @@ impl Glob { if self.opts.case_insensitive { return None; } - let (end, need_sep) = match self.tokens.last() { - Some(&Token::ZeroOrMore) => { + let (end, need_sep) = match *self.tokens.last()? { + Token::ZeroOrMore => { if self.opts.literal_separator { // If a trailing `*` can't match a `/`, then we can't // assume a match of the prefix corresponds to a match @@ -418,15 +409,13 @@ impl Glob { } (self.tokens.len() - 1, false) } - Some(&Token::RecursiveSuffix) => (self.tokens.len() - 1, true), + Token::RecursiveSuffix => (self.tokens.len() - 1, true), _ => (self.tokens.len(), false), }; let mut lit = String::new(); for t in &self.tokens[0..end] { - match *t { - Token::Literal(c) => lit.push(c), - _ => return None, - } + let Token::Literal(c) = *t else { return None }; + lit.push(c); } if need_sep { lit.push('/'); @@ -455,8 +444,8 @@ impl Glob { return None; } let mut lit = String::new(); - let (start, entire) = match self.tokens.get(0) { - Some(&Token::RecursivePrefix) => { + let (start, entire) = match *self.tokens.get(0)? { + Token::RecursivePrefix => { // We only care if this follows a path component if the next // token is a literal. if let Some(&Token::Literal(_)) = self.tokens.get(1) { @@ -468,8 +457,8 @@ impl Glob { } _ => (0, false), }; - let start = match self.tokens.get(start) { - Some(&Token::ZeroOrMore) => { + let start = match *self.tokens.get(start)? { + Token::ZeroOrMore => { // If literal_separator is enabled, then a `*` can't // necessarily match everything, so reporting a suffix match // as a match of the pattern would be a false positive. @@ -481,10 +470,8 @@ impl Glob { _ => start, }; for t in &self.tokens[start..] { - match *t { - Token::Literal(c) => lit.push(c), - _ => return None, - } + let Token::Literal(c) = *t else { return None }; + lit.push(c); } if lit.is_empty() || lit == "/" { None @@ -508,8 +495,8 @@ impl Glob { if self.opts.case_insensitive { return None; } - let start = match self.tokens.get(0) { - Some(&Token::RecursivePrefix) => 1, + let start = match *self.tokens.get(0)? { + Token::RecursivePrefix => 1, _ => { // With nothing to gobble up the parent portion of a path, // we can't assume that matching on only the basename is @@ -520,7 +507,7 @@ impl Glob { if self.tokens[start..].is_empty() { return None; } - for t in &self.tokens[start..] { + for t in self.tokens[start..].iter() { match *t { Token::Literal('/') => return None, Token::Literal(_) => {} // OK @@ -554,16 +541,11 @@ impl Glob { /// The basic format of these patterns is `**/{literal}`, where `{literal}` /// does not contain a path separator. fn basename_literal(&self) -> Option<String> { - let tokens = match self.basename_tokens() { - None => return None, - Some(tokens) => tokens, - }; + let tokens = self.basename_tokens()?; let mut lit = String::new(); for t in tokens { - match *t { - Token::Literal(c) => lit.push(c), - _ => return None, - } + let Token::Literal(c) = *t else { return None }; + lit.push(c); } Some(lit) } @@ -574,7 +556,7 @@ impl<'a> GlobBuilder<'a> { /// /// The pattern is not compiled until `build` is called. pub fn new(glob: &'a str) -> GlobBuilder<'a> { - GlobBuilder { glob: glob, opts: GlobOptions::default() } + GlobBuilder { glob, opts: GlobOptions::default() } } /// Parses and builds the pattern. @@ -604,7 +586,7 @@ impl<'a> GlobBuilder<'a> { glob: self.glob.to_string(), re: tokens.to_regex_with(&self.opts), opts: self.opts, - tokens: tokens, + tokens, }) } } @@ -640,7 +622,8 @@ impl<'a> GlobBuilder<'a> { /// Toggle whether an empty pattern in a list of alternates is accepted. /// - /// For example, if this is set then the glob `foo{,.txt}` will match both `foo` and `foo.txt`. + /// For example, if this is set then the glob `foo{,.txt}` will match both + /// `foo` and `foo.txt`. /// /// By default this is false. pub fn empty_alternates(&mut self, yes: bool) -> &mut GlobBuilder<'a> { @@ -678,7 +661,7 @@ impl Tokens { tokens: &[Token], re: &mut String, ) { - for tok in tokens { + for tok in tokens.iter() { match *tok { Token::Literal(c) => { re.push_str(&char_to_escaped_literal(c)); @@ -758,7 +741,9 @@ fn bytes_to_escaped_literal(bs: &[u8]) -> String { let mut s = String::with_capacity(bs.len()); for &b in bs { if b <= 0x7F { - s.push_str(®ex::escape(&(b as char).to_string())); + s.push_str(®ex_syntax::escape( + char::from(b).encode_utf8(&mut [0; 4]), + )); } else { s.push_str(&format!("\\x{:02x}", b)); } @@ -769,7 +754,7 @@ fn bytes_to_escaped_literal(bs: &[u8]) -> String { struct Parser<'a> { glob: &'a str, stack: Vec<Tokens>, - chars: iter::Peekable<str::Chars<'a>>, + chars: std::iter::Peekable<std::str::Chars<'a>>, prev: Option<char>, cur: Option<char>, opts: &'a GlobOptions, @@ -777,7 +762,7 @@ struct Parser<'a> { impl<'a> Parser<'a> { fn error(&self, kind: ErrorKind) -> Error { - Error { glob: Some(self.glob.to_string()), kind: kind } + Error { glob: Some(self.glob.to_string()), kind } } fn parse(&mut self) -> Result<(), Error> { @@ -996,7 +981,7 @@ impl<'a> Parser<'a> { // it as a literal. ranges.push(('-', '-')); } - self.push_token(Token::Class { negated: negated, ranges: ranges }) + self.push_token(Token::Class { negated, ranges }) } fn bump(&mut self) -> Option<char> { diff --git a/vendor/globset/src/lib.rs b/vendor/globset/src/lib.rs index 7a357489b..b1fc696af 100644 --- a/vendor/globset/src/lib.rs +++ b/vendor/globset/src/lib.rs @@ -5,11 +5,9 @@ Glob set matching is the process of matching one or more glob patterns against a single candidate path simultaneously, and returning all of the globs that matched. For example, given this set of globs: -```ignore -*.rs -src/lib.rs -src/**/foo.rs -``` +* `*.rs` +* `src/lib.rs` +* `src/**/foo.rs` and a path `src/bar/baz/foo.rs`, then the set would report the first and third globs as matching. @@ -19,7 +17,6 @@ globs as matching. This example shows how to match a single glob against a single file path. ``` -# fn example() -> Result<(), globset::Error> { use globset::Glob; let glob = Glob::new("*.rs")?.compile_matcher(); @@ -27,7 +24,7 @@ let glob = Glob::new("*.rs")?.compile_matcher(); assert!(glob.is_match("foo.rs")); assert!(glob.is_match("foo/bar.rs")); assert!(!glob.is_match("Cargo.toml")); -# Ok(()) } example().unwrap(); +# Ok::<(), Box<dyn std::error::Error>>(()) ``` # Example: configuring a glob matcher @@ -36,7 +33,6 @@ This example shows how to use a `GlobBuilder` to configure aspects of match semantics. In this example, we prevent wildcards from matching path separators. ``` -# fn example() -> Result<(), globset::Error> { use globset::GlobBuilder; let glob = GlobBuilder::new("*.rs") @@ -45,7 +41,7 @@ let glob = GlobBuilder::new("*.rs") assert!(glob.is_match("foo.rs")); assert!(!glob.is_match("foo/bar.rs")); // no longer matches assert!(!glob.is_match("Cargo.toml")); -# Ok(()) } example().unwrap(); +# Ok::<(), Box<dyn std::error::Error>>(()) ``` # Example: match multiple globs at once @@ -53,7 +49,6 @@ assert!(!glob.is_match("Cargo.toml")); This example shows how to match multiple glob patterns at once. ``` -# fn example() -> Result<(), globset::Error> { use globset::{Glob, GlobSetBuilder}; let mut builder = GlobSetBuilder::new(); @@ -65,7 +60,7 @@ builder.add(Glob::new("src/**/foo.rs")?); let set = builder.build()?; assert_eq!(set.matches("src/bar/baz/foo.rs"), vec![0, 2]); -# Ok(()) } example().unwrap(); +# Ok::<(), Box<dyn std::error::Error>>(()) ``` # Syntax @@ -103,22 +98,31 @@ or to enable case insensitive matching. #![deny(missing_docs)] -use std::borrow::Cow; -use std::collections::{BTreeMap, HashMap}; -use std::error::Error as StdError; -use std::fmt; -use std::hash; -use std::path::Path; -use std::str; - -use aho_corasick::AhoCorasick; -use bstr::{ByteSlice, ByteVec, B}; -use regex::bytes::{Regex, RegexBuilder, RegexSet}; +use std::{ + borrow::Cow, + panic::{RefUnwindSafe, UnwindSafe}, + path::Path, + sync::Arc, +}; + +use { + aho_corasick::AhoCorasick, + bstr::{ByteSlice, ByteVec, B}, + regex_automata::{ + meta::Regex, + util::pool::{Pool, PoolGuard}, + PatternSet, + }, +}; + +use crate::{ + glob::MatchStrategy, + pathutil::{file_name, file_name_ext, normalize_path}, +}; -use crate::glob::MatchStrategy; pub use crate::glob::{Glob, GlobBuilder, GlobMatcher}; -use crate::pathutil::{file_name, file_name_ext, normalize_path}; +mod fnv; mod glob; mod pathutil; @@ -181,7 +185,7 @@ pub enum ErrorKind { __Nonexhaustive, } -impl StdError for Error { +impl std::error::Error for Error { fn description(&self) -> &str { self.kind.description() } @@ -227,8 +231,8 @@ impl ErrorKind { } } -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self.glob { None => self.kind.fmt(f), Some(ref glob) => { @@ -238,8 +242,8 @@ impl fmt::Display for Error { } } -impl fmt::Display for ErrorKind { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl std::fmt::Display for ErrorKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match *self { ErrorKind::InvalidRecursive | ErrorKind::UnclosedClass @@ -257,30 +261,40 @@ impl fmt::Display for ErrorKind { } fn new_regex(pat: &str) -> Result<Regex, Error> { - RegexBuilder::new(pat) - .dot_matches_new_line(true) - .size_limit(10 * (1 << 20)) - .dfa_size_limit(10 * (1 << 20)) - .build() - .map_err(|err| Error { + let syntax = regex_automata::util::syntax::Config::new() + .utf8(false) + .dot_matches_new_line(true); + let config = Regex::config() + .utf8_empty(false) + .nfa_size_limit(Some(10 * (1 << 20))) + .hybrid_cache_capacity(10 * (1 << 20)); + Regex::builder().syntax(syntax).configure(config).build(pat).map_err( + |err| Error { glob: Some(pat.to_string()), kind: ErrorKind::Regex(err.to_string()), + }, + ) +} + +fn new_regex_set(pats: Vec<String>) -> Result<Regex, Error> { + let syntax = regex_automata::util::syntax::Config::new() + .utf8(false) + .dot_matches_new_line(true); + let config = Regex::config() + .match_kind(regex_automata::MatchKind::All) + .utf8_empty(false) + .nfa_size_limit(Some(10 * (1 << 20))) + .hybrid_cache_capacity(10 * (1 << 20)); + Regex::builder() + .syntax(syntax) + .configure(config) + .build_many(&pats) + .map_err(|err| Error { + glob: None, + kind: ErrorKind::Regex(err.to_string()), }) } -fn new_regex_set<I, S>(pats: I) -> Result<RegexSet, Error> -where - S: AsRef<str>, - I: IntoIterator<Item = S>, -{ - RegexSet::new(pats).map_err(|err| Error { - glob: None, - kind: ErrorKind::Regex(err.to_string()), - }) -} - -type Fnv = hash::BuildHasherDefault<fnv::FnvHasher>; - /// GlobSet represents a group of globs that can be matched together in a /// single pass. #[derive(Clone, Debug)] @@ -290,6 +304,14 @@ pub struct GlobSet { } impl GlobSet { + /// Create a new [`GlobSetBuilder`]. A `GlobSetBuilder` can be used to add + /// new patterns. Once all patterns have been added, `build` should be + /// called to produce a `GlobSet`, which can then be used for matching. + #[inline] + pub fn builder() -> GlobSetBuilder { + GlobSetBuilder::new() + } + /// Create an empty `GlobSet`. An empty set matches nothing. #[inline] pub fn empty() -> GlobSet { @@ -471,9 +493,9 @@ pub struct GlobSetBuilder { } impl GlobSetBuilder { - /// Create a new GlobSetBuilder. A GlobSetBuilder can be used to add new + /// Create a new `GlobSetBuilder`. A `GlobSetBuilder` can be used to add new /// patterns. Once all patterns have been added, `build` should be called - /// to produce a `GlobSet`, which can then be used for matching. + /// to produce a [`GlobSet`], which can then be used for matching. pub fn new() -> GlobSetBuilder { GlobSetBuilder { pats: vec![] } } @@ -521,7 +543,7 @@ impl<'a> Candidate<'a> { let path = normalize_path(Vec::from_path_lossy(path.as_ref())); let basename = file_name(&path).unwrap_or(Cow::Borrowed(B(""))); let ext = file_name_ext(&basename).unwrap_or(Cow::Borrowed(B(""))); - Candidate { path: path, basename: basename, ext: ext } + Candidate { path, basename, ext } } fn path_prefix(&self, max: usize) -> &[u8] { @@ -585,11 +607,11 @@ impl GlobSetMatchStrategy { } #[derive(Clone, Debug)] -struct LiteralStrategy(BTreeMap<Vec<u8>, Vec<usize>>); +struct LiteralStrategy(fnv::HashMap<Vec<u8>, Vec<usize>>); impl LiteralStrategy { fn new() -> LiteralStrategy { - LiteralStrategy(BTreeMap::new()) + LiteralStrategy(fnv::HashMap::default()) } fn add(&mut self, global_index: usize, lit: String) { @@ -613,11 +635,11 @@ impl LiteralStrategy { } #[derive(Clone, Debug)] -struct BasenameLiteralStrategy(BTreeMap<Vec<u8>, Vec<usize>>); +struct BasenameLiteralStrategy(fnv::HashMap<Vec<u8>, Vec<usize>>); impl BasenameLiteralStrategy { fn new() -> BasenameLiteralStrategy { - BasenameLiteralStrategy(BTreeMap::new()) + BasenameLiteralStrategy(fnv::HashMap::default()) } fn add(&mut self, global_index: usize, lit: String) { @@ -647,11 +669,11 @@ impl BasenameLiteralStrategy { } #[derive(Clone, Debug)] -struct ExtensionStrategy(HashMap<Vec<u8>, Vec<usize>, Fnv>); +struct ExtensionStrategy(fnv::HashMap<Vec<u8>, Vec<usize>>); impl ExtensionStrategy { fn new() -> ExtensionStrategy { - ExtensionStrategy(HashMap::with_hasher(Fnv::default())) + ExtensionStrategy(fnv::HashMap::default()) } fn add(&mut self, global_index: usize, ext: String) { @@ -745,7 +767,7 @@ impl SuffixStrategy { } #[derive(Clone, Debug)] -struct RequiredExtensionStrategy(HashMap<Vec<u8>, Vec<(usize, Regex)>, Fnv>); +struct RequiredExtensionStrategy(fnv::HashMap<Vec<u8>, Vec<(usize, Regex)>>); impl RequiredExtensionStrategy { fn is_match(&self, candidate: &Candidate<'_>) -> bool { @@ -786,10 +808,22 @@ impl RequiredExtensionStrategy { #[derive(Clone, Debug)] struct RegexSetStrategy { - matcher: RegexSet, + matcher: Regex, map: Vec<usize>, + // We use a pool of PatternSets to hopefully allocating a fresh one on each + // call. + // + // TODO: In the next semver breaking release, we should drop this pool and + // expose an opaque type that wraps PatternSet. Then callers can provide + // it to `matches_into` directly. Callers might still want to use a pool + // or similar to amortize allocation, but that matches the status quo and + // absolves us of needing to do it here. + patset: Arc<Pool<PatternSet, PatternSetPoolFn>>, } +type PatternSetPoolFn = + Box<dyn Fn() -> PatternSet + Send + Sync + UnwindSafe + RefUnwindSafe>; + impl RegexSetStrategy { fn is_match(&self, candidate: &Candidate<'_>) -> bool { self.matcher.is_match(candidate.path.as_bytes()) @@ -800,9 +834,14 @@ impl RegexSetStrategy { candidate: &Candidate<'_>, matches: &mut Vec<usize>, ) { - for i in self.matcher.matches(candidate.path.as_bytes()) { + let input = regex_automata::Input::new(candidate.path.as_bytes()); + let mut patset = self.patset.get(); + patset.clear(); + self.matcher.which_overlapping_matches(&input, &mut patset); + for i in patset.iter() { matches.push(self.map[i]); } + PoolGuard::put(patset); } } @@ -843,21 +882,26 @@ impl MultiStrategyBuilder { } fn regex_set(self) -> Result<RegexSetStrategy, Error> { + let matcher = new_regex_set(self.literals)?; + let pattern_len = matcher.pattern_len(); + let create: PatternSetPoolFn = + Box::new(move || PatternSet::new(pattern_len)); Ok(RegexSetStrategy { - matcher: new_regex_set(self.literals)?, + matcher, map: self.map, + patset: Arc::new(Pool::new(create)), }) } } #[derive(Clone, Debug)] struct RequiredExtensionStrategyBuilder( - HashMap<Vec<u8>, Vec<(usize, String)>>, + fnv::HashMap<Vec<u8>, Vec<(usize, String)>>, ); impl RequiredExtensionStrategyBuilder { fn new() -> RequiredExtensionStrategyBuilder { - RequiredExtensionStrategyBuilder(HashMap::new()) + RequiredExtensionStrategyBuilder(fnv::HashMap::default()) } fn add(&mut self, global_index: usize, ext: String, regex: String) { @@ -868,7 +912,7 @@ impl RequiredExtensionStrategyBuilder { } fn build(self) -> Result<RequiredExtensionStrategy, Error> { - let mut exts = HashMap::with_hasher(Fnv::default()); + let mut exts = fnv::HashMap::default(); for (ext, regexes) in self.0.into_iter() { exts.insert(ext.clone(), vec![]); for (global_index, regex) in regexes { @@ -905,9 +949,10 @@ pub fn escape(s: &str) -> String { #[cfg(test)] mod tests { - use super::{GlobSet, GlobSetBuilder}; use crate::glob::Glob; + use super::{GlobSet, GlobSetBuilder}; + #[test] fn set_works() { let mut builder = GlobSetBuilder::new(); @@ -954,4 +999,24 @@ mod tests { assert_eq!("bar[[]ab[]]baz", escape("bar[ab]baz")); assert_eq!("bar[[]!![]]!baz", escape("bar[!!]!baz")); } + + // This tests that regex matching doesn't "remember" the results of + // previous searches. That is, if any memory is reused from a previous + // search, then it should be cleared first. + #[test] + fn set_does_not_remember() { + let mut builder = GlobSetBuilder::new(); + builder.add(Glob::new("*foo*").unwrap()); + builder.add(Glob::new("*bar*").unwrap()); + builder.add(Glob::new("*quux*").unwrap()); + let set = builder.build().unwrap(); + + let matches = set.matches("ZfooZquuxZ"); + assert_eq!(2, matches.len()); + assert_eq!(0, matches[0]); + assert_eq!(2, matches[1]); + + let matches = set.matches("nada"); + assert_eq!(0, matches.len()); + } } diff --git a/vendor/globset/src/pathutil.rs b/vendor/globset/src/pathutil.rs index 522df3401..8488e74f2 100644 --- a/vendor/globset/src/pathutil.rs +++ b/vendor/globset/src/pathutil.rs @@ -4,12 +4,10 @@ use bstr::{ByteSlice, ByteVec}; /// The final component of the path, if it is a normal file. /// -/// If the path terminates in ., .., or consists solely of a root of prefix, -/// file_name will return None. -pub fn file_name<'a>(path: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> { - if path.is_empty() { - return None; - } else if path.last_byte() == Some(b'.') { +/// If the path terminates in `.`, `..`, or consists solely of a root of +/// prefix, file_name will return None. +pub(crate) fn file_name<'a>(path: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> { + if path.last_byte().map_or(true, |b| b == b'.') { return None; } let last_slash = path.rfind_byte(b'/').map(|i| i + 1).unwrap_or(0); @@ -39,7 +37,9 @@ pub fn file_name<'a>(path: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> { /// a pattern like `*.rs` is obviously trying to match files with a `rs` /// extension, but it also matches files like `.rs`, which doesn't have an /// extension according to std::path::Path::extension. -pub fn file_name_ext<'a>(name: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> { +pub(crate) fn file_name_ext<'a>( + name: &Cow<'a, [u8]>, +) -> Option<Cow<'a, [u8]>> { if name.is_empty() { return None; } @@ -60,7 +60,7 @@ pub fn file_name_ext<'a>(name: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> { /// Normalizes a path to use `/` as a separator everywhere, even on platforms /// that recognize other characters as separators. #[cfg(unix)] -pub fn normalize_path(path: Cow<'_, [u8]>) -> Cow<'_, [u8]> { +pub(crate) fn normalize_path(path: Cow<'_, [u8]>) -> Cow<'_, [u8]> { // UNIX only uses /, so we're good. path } @@ -68,11 +68,11 @@ pub fn normalize_path(path: Cow<'_, [u8]>) -> Cow<'_, [u8]> { /// Normalizes a path to use `/` as a separator everywhere, even on platforms /// that recognize other characters as separators. #[cfg(not(unix))] -pub fn normalize_path(mut path: Cow<[u8]>) -> Cow<[u8]> { +pub(crate) fn normalize_path(mut path: Cow<[u8]>) -> Cow<[u8]> { use std::path::is_separator; for i in 0..path.len() { - if path[i] == b'/' || !is_separator(path[i] as char) { + if path[i] == b'/' || !is_separator(char::from(path[i])) { continue; } path.to_mut()[i] = b'/'; |