summaryrefslogtreecommitdiffstats
path: root/vendor/globset/src
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-06-19 09:25:56 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-06-19 09:25:56 +0000
commit018c4950b9406055dec02ef0fb52f132e2bb1e2c (patch)
treea835ebdf2088ef88fa681f8fad45f09922c1ae9a /vendor/globset/src
parentAdding debian version 1.75.0+dfsg1-5. (diff)
downloadrustc-018c4950b9406055dec02ef0fb52f132e2bb1e2c.tar.xz
rustc-018c4950b9406055dec02ef0fb52f132e2bb1e2c.zip
Merging upstream version 1.76.0+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/globset/src')
-rw-r--r--vendor/globset/src/fnv.rs30
-rw-r--r--vendor/globset/src/glob.rs115
-rw-r--r--vendor/globset/src/lib.rs195
-rw-r--r--vendor/globset/src/pathutil.rs20
4 files changed, 220 insertions, 140 deletions
diff --git a/vendor/globset/src/fnv.rs b/vendor/globset/src/fnv.rs
new file mode 100644
index 000000000..91174e20b
--- /dev/null
+++ b/vendor/globset/src/fnv.rs
@@ -0,0 +1,30 @@
+/// A convenience alias for creating a hash map with an FNV hasher.
+pub(crate) type HashMap<K, V> =
+ std::collections::HashMap<K, V, std::hash::BuildHasherDefault<Hasher>>;
+
+/// A hasher that implements the Fowler–Noll–Vo (FNV) hash.
+pub(crate) struct Hasher(u64);
+
+impl Hasher {
+ const OFFSET_BASIS: u64 = 0xcbf29ce484222325;
+ const PRIME: u64 = 0x100000001b3;
+}
+
+impl Default for Hasher {
+ fn default() -> Hasher {
+ Hasher(Hasher::OFFSET_BASIS)
+ }
+}
+
+impl std::hash::Hasher for Hasher {
+ fn finish(&self) -> u64 {
+ self.0
+ }
+
+ fn write(&mut self, bytes: &[u8]) {
+ for &byte in bytes.iter() {
+ self.0 = self.0 ^ u64::from(byte);
+ self.0 = self.0.wrapping_mul(Hasher::PRIME);
+ }
+ }
+}
diff --git a/vendor/globset/src/glob.rs b/vendor/globset/src/glob.rs
index d19c70ed2..83c08344c 100644
--- a/vendor/globset/src/glob.rs
+++ b/vendor/globset/src/glob.rs
@@ -1,12 +1,6 @@
-use std::fmt;
-use std::hash;
-use std::iter;
-use std::ops::{Deref, DerefMut};
use std::path::{is_separator, Path};
-use std::str;
-use regex;
-use regex::bytes::Regex;
+use regex_automata::meta::Regex;
use crate::{new_regex, Candidate, Error, ErrorKind};
@@ -18,7 +12,7 @@ use crate::{new_regex, Candidate, Error, ErrorKind};
/// possible to test whether any of those patterns matches by looking up a
/// file path's extension in a hash table.
#[derive(Clone, Debug, Eq, PartialEq)]
-pub enum MatchStrategy {
+pub(crate) enum MatchStrategy {
/// A pattern matches if and only if the entire file path matches this
/// literal string.
Literal(String),
@@ -53,7 +47,7 @@ pub enum MatchStrategy {
impl MatchStrategy {
/// Returns a matching strategy for the given pattern.
- pub fn new(pat: &Glob) -> MatchStrategy {
+ pub(crate) fn new(pat: &Glob) -> MatchStrategy {
if let Some(lit) = pat.basename_literal() {
MatchStrategy::BasenameLiteral(lit)
} else if let Some(lit) = pat.literal() {
@@ -63,7 +57,7 @@ impl MatchStrategy {
} else if let Some(prefix) = pat.prefix() {
MatchStrategy::Prefix(prefix)
} else if let Some((suffix, component)) = pat.suffix() {
- MatchStrategy::Suffix { suffix: suffix, component: component }
+ MatchStrategy::Suffix { suffix, component }
} else if let Some(ext) = pat.required_ext() {
MatchStrategy::RequiredExtension(ext)
} else {
@@ -90,20 +84,20 @@ impl PartialEq for Glob {
}
}
-impl hash::Hash for Glob {
- fn hash<H: hash::Hasher>(&self, state: &mut H) {
+impl std::hash::Hash for Glob {
+ fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.glob.hash(state);
self.opts.hash(state);
}
}
-impl fmt::Display for Glob {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+impl std::fmt::Display for Glob {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.glob.fmt(f)
}
}
-impl str::FromStr for Glob {
+impl std::str::FromStr for Glob {
type Err = Error;
fn from_str(glob: &str) -> Result<Self, Self::Err> {
@@ -227,14 +221,14 @@ impl GlobOptions {
#[derive(Clone, Debug, Default, Eq, PartialEq)]
struct Tokens(Vec<Token>);
-impl Deref for Tokens {
+impl std::ops::Deref for Tokens {
type Target = Vec<Token>;
fn deref(&self) -> &Vec<Token> {
&self.0
}
}
-impl DerefMut for Tokens {
+impl std::ops::DerefMut for Tokens {
fn deref_mut(&mut self) -> &mut Vec<Token> {
&mut self.0
}
@@ -262,7 +256,7 @@ impl Glob {
pub fn compile_matcher(&self) -> GlobMatcher {
let re =
new_regex(&self.re).expect("regex compilation shouldn't fail");
- GlobMatcher { pat: self.clone(), re: re }
+ GlobMatcher { pat: self.clone(), re }
}
/// Returns a strategic matcher.
@@ -275,7 +269,7 @@ impl Glob {
let strategy = MatchStrategy::new(self);
let re =
new_regex(&self.re).expect("regex compilation shouldn't fail");
- GlobStrategic { strategy: strategy, re: re }
+ GlobStrategic { strategy, re }
}
/// Returns the original glob pattern used to build this pattern.
@@ -311,10 +305,8 @@ impl Glob {
}
let mut lit = String::new();
for t in &*self.tokens {
- match *t {
- Token::Literal(c) => lit.push(c),
- _ => return None,
- }
+ let Token::Literal(c) = *t else { return None };
+ lit.push(c);
}
if lit.is_empty() {
None
@@ -334,13 +326,12 @@ impl Glob {
if self.opts.case_insensitive {
return None;
}
- let start = match self.tokens.get(0) {
- Some(&Token::RecursivePrefix) => 1,
- Some(_) => 0,
- _ => return None,
+ let start = match *self.tokens.get(0)? {
+ Token::RecursivePrefix => 1,
+ _ => 0,
};
- match self.tokens.get(start) {
- Some(&Token::ZeroOrMore) => {
+ match *self.tokens.get(start)? {
+ Token::ZeroOrMore => {
// If there was no recursive prefix, then we only permit
// `*` if `*` can match a `/`. For example, if `*` can't
// match `/`, then `*.c` doesn't match `foo/bar.c`.
@@ -350,8 +341,8 @@ impl Glob {
}
_ => return None,
}
- match self.tokens.get(start + 1) {
- Some(&Token::Literal('.')) => {}
+ match *self.tokens.get(start + 1)? {
+ Token::Literal('.') => {}
_ => return None,
}
let mut lit = ".".to_string();
@@ -405,8 +396,8 @@ impl Glob {
if self.opts.case_insensitive {
return None;
}
- let (end, need_sep) = match self.tokens.last() {
- Some(&Token::ZeroOrMore) => {
+ let (end, need_sep) = match *self.tokens.last()? {
+ Token::ZeroOrMore => {
if self.opts.literal_separator {
// If a trailing `*` can't match a `/`, then we can't
// assume a match of the prefix corresponds to a match
@@ -418,15 +409,13 @@ impl Glob {
}
(self.tokens.len() - 1, false)
}
- Some(&Token::RecursiveSuffix) => (self.tokens.len() - 1, true),
+ Token::RecursiveSuffix => (self.tokens.len() - 1, true),
_ => (self.tokens.len(), false),
};
let mut lit = String::new();
for t in &self.tokens[0..end] {
- match *t {
- Token::Literal(c) => lit.push(c),
- _ => return None,
- }
+ let Token::Literal(c) = *t else { return None };
+ lit.push(c);
}
if need_sep {
lit.push('/');
@@ -455,8 +444,8 @@ impl Glob {
return None;
}
let mut lit = String::new();
- let (start, entire) = match self.tokens.get(0) {
- Some(&Token::RecursivePrefix) => {
+ let (start, entire) = match *self.tokens.get(0)? {
+ Token::RecursivePrefix => {
// We only care if this follows a path component if the next
// token is a literal.
if let Some(&Token::Literal(_)) = self.tokens.get(1) {
@@ -468,8 +457,8 @@ impl Glob {
}
_ => (0, false),
};
- let start = match self.tokens.get(start) {
- Some(&Token::ZeroOrMore) => {
+ let start = match *self.tokens.get(start)? {
+ Token::ZeroOrMore => {
// If literal_separator is enabled, then a `*` can't
// necessarily match everything, so reporting a suffix match
// as a match of the pattern would be a false positive.
@@ -481,10 +470,8 @@ impl Glob {
_ => start,
};
for t in &self.tokens[start..] {
- match *t {
- Token::Literal(c) => lit.push(c),
- _ => return None,
- }
+ let Token::Literal(c) = *t else { return None };
+ lit.push(c);
}
if lit.is_empty() || lit == "/" {
None
@@ -508,8 +495,8 @@ impl Glob {
if self.opts.case_insensitive {
return None;
}
- let start = match self.tokens.get(0) {
- Some(&Token::RecursivePrefix) => 1,
+ let start = match *self.tokens.get(0)? {
+ Token::RecursivePrefix => 1,
_ => {
// With nothing to gobble up the parent portion of a path,
// we can't assume that matching on only the basename is
@@ -520,7 +507,7 @@ impl Glob {
if self.tokens[start..].is_empty() {
return None;
}
- for t in &self.tokens[start..] {
+ for t in self.tokens[start..].iter() {
match *t {
Token::Literal('/') => return None,
Token::Literal(_) => {} // OK
@@ -554,16 +541,11 @@ impl Glob {
/// The basic format of these patterns is `**/{literal}`, where `{literal}`
/// does not contain a path separator.
fn basename_literal(&self) -> Option<String> {
- let tokens = match self.basename_tokens() {
- None => return None,
- Some(tokens) => tokens,
- };
+ let tokens = self.basename_tokens()?;
let mut lit = String::new();
for t in tokens {
- match *t {
- Token::Literal(c) => lit.push(c),
- _ => return None,
- }
+ let Token::Literal(c) = *t else { return None };
+ lit.push(c);
}
Some(lit)
}
@@ -574,7 +556,7 @@ impl<'a> GlobBuilder<'a> {
///
/// The pattern is not compiled until `build` is called.
pub fn new(glob: &'a str) -> GlobBuilder<'a> {
- GlobBuilder { glob: glob, opts: GlobOptions::default() }
+ GlobBuilder { glob, opts: GlobOptions::default() }
}
/// Parses and builds the pattern.
@@ -604,7 +586,7 @@ impl<'a> GlobBuilder<'a> {
glob: self.glob.to_string(),
re: tokens.to_regex_with(&self.opts),
opts: self.opts,
- tokens: tokens,
+ tokens,
})
}
}
@@ -640,7 +622,8 @@ impl<'a> GlobBuilder<'a> {
/// Toggle whether an empty pattern in a list of alternates is accepted.
///
- /// For example, if this is set then the glob `foo{,.txt}` will match both `foo` and `foo.txt`.
+ /// For example, if this is set then the glob `foo{,.txt}` will match both
+ /// `foo` and `foo.txt`.
///
/// By default this is false.
pub fn empty_alternates(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
@@ -678,7 +661,7 @@ impl Tokens {
tokens: &[Token],
re: &mut String,
) {
- for tok in tokens {
+ for tok in tokens.iter() {
match *tok {
Token::Literal(c) => {
re.push_str(&char_to_escaped_literal(c));
@@ -758,7 +741,9 @@ fn bytes_to_escaped_literal(bs: &[u8]) -> String {
let mut s = String::with_capacity(bs.len());
for &b in bs {
if b <= 0x7F {
- s.push_str(&regex::escape(&(b as char).to_string()));
+ s.push_str(&regex_syntax::escape(
+ char::from(b).encode_utf8(&mut [0; 4]),
+ ));
} else {
s.push_str(&format!("\\x{:02x}", b));
}
@@ -769,7 +754,7 @@ fn bytes_to_escaped_literal(bs: &[u8]) -> String {
struct Parser<'a> {
glob: &'a str,
stack: Vec<Tokens>,
- chars: iter::Peekable<str::Chars<'a>>,
+ chars: std::iter::Peekable<std::str::Chars<'a>>,
prev: Option<char>,
cur: Option<char>,
opts: &'a GlobOptions,
@@ -777,7 +762,7 @@ struct Parser<'a> {
impl<'a> Parser<'a> {
fn error(&self, kind: ErrorKind) -> Error {
- Error { glob: Some(self.glob.to_string()), kind: kind }
+ Error { glob: Some(self.glob.to_string()), kind }
}
fn parse(&mut self) -> Result<(), Error> {
@@ -996,7 +981,7 @@ impl<'a> Parser<'a> {
// it as a literal.
ranges.push(('-', '-'));
}
- self.push_token(Token::Class { negated: negated, ranges: ranges })
+ self.push_token(Token::Class { negated, ranges })
}
fn bump(&mut self) -> Option<char> {
diff --git a/vendor/globset/src/lib.rs b/vendor/globset/src/lib.rs
index 7a357489b..b1fc696af 100644
--- a/vendor/globset/src/lib.rs
+++ b/vendor/globset/src/lib.rs
@@ -5,11 +5,9 @@ Glob set matching is the process of matching one or more glob patterns against
a single candidate path simultaneously, and returning all of the globs that
matched. For example, given this set of globs:
-```ignore
-*.rs
-src/lib.rs
-src/**/foo.rs
-```
+* `*.rs`
+* `src/lib.rs`
+* `src/**/foo.rs`
and a path `src/bar/baz/foo.rs`, then the set would report the first and third
globs as matching.
@@ -19,7 +17,6 @@ globs as matching.
This example shows how to match a single glob against a single file path.
```
-# fn example() -> Result<(), globset::Error> {
use globset::Glob;
let glob = Glob::new("*.rs")?.compile_matcher();
@@ -27,7 +24,7 @@ let glob = Glob::new("*.rs")?.compile_matcher();
assert!(glob.is_match("foo.rs"));
assert!(glob.is_match("foo/bar.rs"));
assert!(!glob.is_match("Cargo.toml"));
-# Ok(()) } example().unwrap();
+# Ok::<(), Box<dyn std::error::Error>>(())
```
# Example: configuring a glob matcher
@@ -36,7 +33,6 @@ This example shows how to use a `GlobBuilder` to configure aspects of match
semantics. In this example, we prevent wildcards from matching path separators.
```
-# fn example() -> Result<(), globset::Error> {
use globset::GlobBuilder;
let glob = GlobBuilder::new("*.rs")
@@ -45,7 +41,7 @@ let glob = GlobBuilder::new("*.rs")
assert!(glob.is_match("foo.rs"));
assert!(!glob.is_match("foo/bar.rs")); // no longer matches
assert!(!glob.is_match("Cargo.toml"));
-# Ok(()) } example().unwrap();
+# Ok::<(), Box<dyn std::error::Error>>(())
```
# Example: match multiple globs at once
@@ -53,7 +49,6 @@ assert!(!glob.is_match("Cargo.toml"));
This example shows how to match multiple glob patterns at once.
```
-# fn example() -> Result<(), globset::Error> {
use globset::{Glob, GlobSetBuilder};
let mut builder = GlobSetBuilder::new();
@@ -65,7 +60,7 @@ builder.add(Glob::new("src/**/foo.rs")?);
let set = builder.build()?;
assert_eq!(set.matches("src/bar/baz/foo.rs"), vec![0, 2]);
-# Ok(()) } example().unwrap();
+# Ok::<(), Box<dyn std::error::Error>>(())
```
# Syntax
@@ -103,22 +98,31 @@ or to enable case insensitive matching.
#![deny(missing_docs)]
-use std::borrow::Cow;
-use std::collections::{BTreeMap, HashMap};
-use std::error::Error as StdError;
-use std::fmt;
-use std::hash;
-use std::path::Path;
-use std::str;
-
-use aho_corasick::AhoCorasick;
-use bstr::{ByteSlice, ByteVec, B};
-use regex::bytes::{Regex, RegexBuilder, RegexSet};
+use std::{
+ borrow::Cow,
+ panic::{RefUnwindSafe, UnwindSafe},
+ path::Path,
+ sync::Arc,
+};
+
+use {
+ aho_corasick::AhoCorasick,
+ bstr::{ByteSlice, ByteVec, B},
+ regex_automata::{
+ meta::Regex,
+ util::pool::{Pool, PoolGuard},
+ PatternSet,
+ },
+};
+
+use crate::{
+ glob::MatchStrategy,
+ pathutil::{file_name, file_name_ext, normalize_path},
+};
-use crate::glob::MatchStrategy;
pub use crate::glob::{Glob, GlobBuilder, GlobMatcher};
-use crate::pathutil::{file_name, file_name_ext, normalize_path};
+mod fnv;
mod glob;
mod pathutil;
@@ -181,7 +185,7 @@ pub enum ErrorKind {
__Nonexhaustive,
}
-impl StdError for Error {
+impl std::error::Error for Error {
fn description(&self) -> &str {
self.kind.description()
}
@@ -227,8 +231,8 @@ impl ErrorKind {
}
}
-impl fmt::Display for Error {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+impl std::fmt::Display for Error {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self.glob {
None => self.kind.fmt(f),
Some(ref glob) => {
@@ -238,8 +242,8 @@ impl fmt::Display for Error {
}
}
-impl fmt::Display for ErrorKind {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+impl std::fmt::Display for ErrorKind {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match *self {
ErrorKind::InvalidRecursive
| ErrorKind::UnclosedClass
@@ -257,30 +261,40 @@ impl fmt::Display for ErrorKind {
}
fn new_regex(pat: &str) -> Result<Regex, Error> {
- RegexBuilder::new(pat)
- .dot_matches_new_line(true)
- .size_limit(10 * (1 << 20))
- .dfa_size_limit(10 * (1 << 20))
- .build()
- .map_err(|err| Error {
+ let syntax = regex_automata::util::syntax::Config::new()
+ .utf8(false)
+ .dot_matches_new_line(true);
+ let config = Regex::config()
+ .utf8_empty(false)
+ .nfa_size_limit(Some(10 * (1 << 20)))
+ .hybrid_cache_capacity(10 * (1 << 20));
+ Regex::builder().syntax(syntax).configure(config).build(pat).map_err(
+ |err| Error {
glob: Some(pat.to_string()),
kind: ErrorKind::Regex(err.to_string()),
+ },
+ )
+}
+
+fn new_regex_set(pats: Vec<String>) -> Result<Regex, Error> {
+ let syntax = regex_automata::util::syntax::Config::new()
+ .utf8(false)
+ .dot_matches_new_line(true);
+ let config = Regex::config()
+ .match_kind(regex_automata::MatchKind::All)
+ .utf8_empty(false)
+ .nfa_size_limit(Some(10 * (1 << 20)))
+ .hybrid_cache_capacity(10 * (1 << 20));
+ Regex::builder()
+ .syntax(syntax)
+ .configure(config)
+ .build_many(&pats)
+ .map_err(|err| Error {
+ glob: None,
+ kind: ErrorKind::Regex(err.to_string()),
})
}
-fn new_regex_set<I, S>(pats: I) -> Result<RegexSet, Error>
-where
- S: AsRef<str>,
- I: IntoIterator<Item = S>,
-{
- RegexSet::new(pats).map_err(|err| Error {
- glob: None,
- kind: ErrorKind::Regex(err.to_string()),
- })
-}
-
-type Fnv = hash::BuildHasherDefault<fnv::FnvHasher>;
-
/// GlobSet represents a group of globs that can be matched together in a
/// single pass.
#[derive(Clone, Debug)]
@@ -290,6 +304,14 @@ pub struct GlobSet {
}
impl GlobSet {
+ /// Create a new [`GlobSetBuilder`]. A `GlobSetBuilder` can be used to add
+ /// new patterns. Once all patterns have been added, `build` should be
+ /// called to produce a `GlobSet`, which can then be used for matching.
+ #[inline]
+ pub fn builder() -> GlobSetBuilder {
+ GlobSetBuilder::new()
+ }
+
/// Create an empty `GlobSet`. An empty set matches nothing.
#[inline]
pub fn empty() -> GlobSet {
@@ -471,9 +493,9 @@ pub struct GlobSetBuilder {
}
impl GlobSetBuilder {
- /// Create a new GlobSetBuilder. A GlobSetBuilder can be used to add new
+ /// Create a new `GlobSetBuilder`. A `GlobSetBuilder` can be used to add new
/// patterns. Once all patterns have been added, `build` should be called
- /// to produce a `GlobSet`, which can then be used for matching.
+ /// to produce a [`GlobSet`], which can then be used for matching.
pub fn new() -> GlobSetBuilder {
GlobSetBuilder { pats: vec![] }
}
@@ -521,7 +543,7 @@ impl<'a> Candidate<'a> {
let path = normalize_path(Vec::from_path_lossy(path.as_ref()));
let basename = file_name(&path).unwrap_or(Cow::Borrowed(B("")));
let ext = file_name_ext(&basename).unwrap_or(Cow::Borrowed(B("")));
- Candidate { path: path, basename: basename, ext: ext }
+ Candidate { path, basename, ext }
}
fn path_prefix(&self, max: usize) -> &[u8] {
@@ -585,11 +607,11 @@ impl GlobSetMatchStrategy {
}
#[derive(Clone, Debug)]
-struct LiteralStrategy(BTreeMap<Vec<u8>, Vec<usize>>);
+struct LiteralStrategy(fnv::HashMap<Vec<u8>, Vec<usize>>);
impl LiteralStrategy {
fn new() -> LiteralStrategy {
- LiteralStrategy(BTreeMap::new())
+ LiteralStrategy(fnv::HashMap::default())
}
fn add(&mut self, global_index: usize, lit: String) {
@@ -613,11 +635,11 @@ impl LiteralStrategy {
}
#[derive(Clone, Debug)]
-struct BasenameLiteralStrategy(BTreeMap<Vec<u8>, Vec<usize>>);
+struct BasenameLiteralStrategy(fnv::HashMap<Vec<u8>, Vec<usize>>);
impl BasenameLiteralStrategy {
fn new() -> BasenameLiteralStrategy {
- BasenameLiteralStrategy(BTreeMap::new())
+ BasenameLiteralStrategy(fnv::HashMap::default())
}
fn add(&mut self, global_index: usize, lit: String) {
@@ -647,11 +669,11 @@ impl BasenameLiteralStrategy {
}
#[derive(Clone, Debug)]
-struct ExtensionStrategy(HashMap<Vec<u8>, Vec<usize>, Fnv>);
+struct ExtensionStrategy(fnv::HashMap<Vec<u8>, Vec<usize>>);
impl ExtensionStrategy {
fn new() -> ExtensionStrategy {
- ExtensionStrategy(HashMap::with_hasher(Fnv::default()))
+ ExtensionStrategy(fnv::HashMap::default())
}
fn add(&mut self, global_index: usize, ext: String) {
@@ -745,7 +767,7 @@ impl SuffixStrategy {
}
#[derive(Clone, Debug)]
-struct RequiredExtensionStrategy(HashMap<Vec<u8>, Vec<(usize, Regex)>, Fnv>);
+struct RequiredExtensionStrategy(fnv::HashMap<Vec<u8>, Vec<(usize, Regex)>>);
impl RequiredExtensionStrategy {
fn is_match(&self, candidate: &Candidate<'_>) -> bool {
@@ -786,10 +808,22 @@ impl RequiredExtensionStrategy {
#[derive(Clone, Debug)]
struct RegexSetStrategy {
- matcher: RegexSet,
+ matcher: Regex,
map: Vec<usize>,
+ // We use a pool of PatternSets to hopefully allocating a fresh one on each
+ // call.
+ //
+ // TODO: In the next semver breaking release, we should drop this pool and
+ // expose an opaque type that wraps PatternSet. Then callers can provide
+ // it to `matches_into` directly. Callers might still want to use a pool
+ // or similar to amortize allocation, but that matches the status quo and
+ // absolves us of needing to do it here.
+ patset: Arc<Pool<PatternSet, PatternSetPoolFn>>,
}
+type PatternSetPoolFn =
+ Box<dyn Fn() -> PatternSet + Send + Sync + UnwindSafe + RefUnwindSafe>;
+
impl RegexSetStrategy {
fn is_match(&self, candidate: &Candidate<'_>) -> bool {
self.matcher.is_match(candidate.path.as_bytes())
@@ -800,9 +834,14 @@ impl RegexSetStrategy {
candidate: &Candidate<'_>,
matches: &mut Vec<usize>,
) {
- for i in self.matcher.matches(candidate.path.as_bytes()) {
+ let input = regex_automata::Input::new(candidate.path.as_bytes());
+ let mut patset = self.patset.get();
+ patset.clear();
+ self.matcher.which_overlapping_matches(&input, &mut patset);
+ for i in patset.iter() {
matches.push(self.map[i]);
}
+ PoolGuard::put(patset);
}
}
@@ -843,21 +882,26 @@ impl MultiStrategyBuilder {
}
fn regex_set(self) -> Result<RegexSetStrategy, Error> {
+ let matcher = new_regex_set(self.literals)?;
+ let pattern_len = matcher.pattern_len();
+ let create: PatternSetPoolFn =
+ Box::new(move || PatternSet::new(pattern_len));
Ok(RegexSetStrategy {
- matcher: new_regex_set(self.literals)?,
+ matcher,
map: self.map,
+ patset: Arc::new(Pool::new(create)),
})
}
}
#[derive(Clone, Debug)]
struct RequiredExtensionStrategyBuilder(
- HashMap<Vec<u8>, Vec<(usize, String)>>,
+ fnv::HashMap<Vec<u8>, Vec<(usize, String)>>,
);
impl RequiredExtensionStrategyBuilder {
fn new() -> RequiredExtensionStrategyBuilder {
- RequiredExtensionStrategyBuilder(HashMap::new())
+ RequiredExtensionStrategyBuilder(fnv::HashMap::default())
}
fn add(&mut self, global_index: usize, ext: String, regex: String) {
@@ -868,7 +912,7 @@ impl RequiredExtensionStrategyBuilder {
}
fn build(self) -> Result<RequiredExtensionStrategy, Error> {
- let mut exts = HashMap::with_hasher(Fnv::default());
+ let mut exts = fnv::HashMap::default();
for (ext, regexes) in self.0.into_iter() {
exts.insert(ext.clone(), vec![]);
for (global_index, regex) in regexes {
@@ -905,9 +949,10 @@ pub fn escape(s: &str) -> String {
#[cfg(test)]
mod tests {
- use super::{GlobSet, GlobSetBuilder};
use crate::glob::Glob;
+ use super::{GlobSet, GlobSetBuilder};
+
#[test]
fn set_works() {
let mut builder = GlobSetBuilder::new();
@@ -954,4 +999,24 @@ mod tests {
assert_eq!("bar[[]ab[]]baz", escape("bar[ab]baz"));
assert_eq!("bar[[]!![]]!baz", escape("bar[!!]!baz"));
}
+
+ // This tests that regex matching doesn't "remember" the results of
+ // previous searches. That is, if any memory is reused from a previous
+ // search, then it should be cleared first.
+ #[test]
+ fn set_does_not_remember() {
+ let mut builder = GlobSetBuilder::new();
+ builder.add(Glob::new("*foo*").unwrap());
+ builder.add(Glob::new("*bar*").unwrap());
+ builder.add(Glob::new("*quux*").unwrap());
+ let set = builder.build().unwrap();
+
+ let matches = set.matches("ZfooZquuxZ");
+ assert_eq!(2, matches.len());
+ assert_eq!(0, matches[0]);
+ assert_eq!(2, matches[1]);
+
+ let matches = set.matches("nada");
+ assert_eq!(0, matches.len());
+ }
}
diff --git a/vendor/globset/src/pathutil.rs b/vendor/globset/src/pathutil.rs
index 522df3401..8488e74f2 100644
--- a/vendor/globset/src/pathutil.rs
+++ b/vendor/globset/src/pathutil.rs
@@ -4,12 +4,10 @@ use bstr::{ByteSlice, ByteVec};
/// The final component of the path, if it is a normal file.
///
-/// If the path terminates in ., .., or consists solely of a root of prefix,
-/// file_name will return None.
-pub fn file_name<'a>(path: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> {
- if path.is_empty() {
- return None;
- } else if path.last_byte() == Some(b'.') {
+/// If the path terminates in `.`, `..`, or consists solely of a root of
+/// prefix, file_name will return None.
+pub(crate) fn file_name<'a>(path: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> {
+ if path.last_byte().map_or(true, |b| b == b'.') {
return None;
}
let last_slash = path.rfind_byte(b'/').map(|i| i + 1).unwrap_or(0);
@@ -39,7 +37,9 @@ pub fn file_name<'a>(path: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> {
/// a pattern like `*.rs` is obviously trying to match files with a `rs`
/// extension, but it also matches files like `.rs`, which doesn't have an
/// extension according to std::path::Path::extension.
-pub fn file_name_ext<'a>(name: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> {
+pub(crate) fn file_name_ext<'a>(
+ name: &Cow<'a, [u8]>,
+) -> Option<Cow<'a, [u8]>> {
if name.is_empty() {
return None;
}
@@ -60,7 +60,7 @@ pub fn file_name_ext<'a>(name: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> {
/// Normalizes a path to use `/` as a separator everywhere, even on platforms
/// that recognize other characters as separators.
#[cfg(unix)]
-pub fn normalize_path(path: Cow<'_, [u8]>) -> Cow<'_, [u8]> {
+pub(crate) fn normalize_path(path: Cow<'_, [u8]>) -> Cow<'_, [u8]> {
// UNIX only uses /, so we're good.
path
}
@@ -68,11 +68,11 @@ pub fn normalize_path(path: Cow<'_, [u8]>) -> Cow<'_, [u8]> {
/// Normalizes a path to use `/` as a separator everywhere, even on platforms
/// that recognize other characters as separators.
#[cfg(not(unix))]
-pub fn normalize_path(mut path: Cow<[u8]>) -> Cow<[u8]> {
+pub(crate) fn normalize_path(mut path: Cow<[u8]>) -> Cow<[u8]> {
use std::path::is_separator;
for i in 0..path.len() {
- if path[i] == b'/' || !is_separator(path[i] as char) {
+ if path[i] == b'/' || !is_separator(char::from(path[i])) {
continue;
}
path.to_mut()[i] = b'/';