diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:41:35 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:41:35 +0000 |
commit | 7e5d7eea9c580ef4b41a765bde624af431942b96 (patch) | |
tree | 2c0d9ca12878fc4525650aa4e54d77a81a07cc09 /vendor/gix-glob/src | |
parent | Adding debian version 1.70.0+dfsg1-9. (diff) | |
download | rustc-7e5d7eea9c580ef4b41a765bde624af431942b96.tar.xz rustc-7e5d7eea9c580ef4b41a765bde624af431942b96.zip |
Merging upstream version 1.70.0+dfsg2.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/gix-glob/src')
-rw-r--r-- | vendor/gix-glob/src/lib.rs | 41 | ||||
-rw-r--r-- | vendor/gix-glob/src/parse.rs | 86 | ||||
-rw-r--r-- | vendor/gix-glob/src/pattern.rs | 162 | ||||
-rw-r--r-- | vendor/gix-glob/src/wildmatch.rs | 354 |
4 files changed, 643 insertions, 0 deletions
diff --git a/vendor/gix-glob/src/lib.rs b/vendor/gix-glob/src/lib.rs new file mode 100644 index 000000000..48d011a52 --- /dev/null +++ b/vendor/gix-glob/src/lib.rs @@ -0,0 +1,41 @@ +//! Provide glob [`Patterns`][Pattern] for matching against paths or anything else. +//! ## Feature Flags +#![cfg_attr( + feature = "document-features", + cfg_attr(doc, doc = ::document_features::document_features!()) +)] +#![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))] +#![deny(missing_docs, rust_2018_idioms)] +#![forbid(unsafe_code)] + +use bstr::BString; + +/// A glob pattern optimized for matching paths relative to a root directory. +/// +/// For normal globbing, use [`wildmatch()`] instead. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct Pattern { + /// the actual pattern bytes + pub text: BString, + /// Additional information to help accelerate pattern matching. + pub mode: pattern::Mode, + /// The position in `text` with the first wildcard character, or `None` if there is no wildcard at all. + pub first_wildcard_pos: Option<usize>, +} + +/// +pub mod pattern; + +/// +pub mod wildmatch; +pub use wildmatch::function::wildmatch; + +mod parse; + +/// Create a [`Pattern`] by parsing `text` or return `None` if `text` is empty. +/// +/// Note that +pub fn parse(text: impl AsRef<[u8]>) -> Option<Pattern> { + Pattern::from_bytes(text.as_ref()) +} diff --git a/vendor/gix-glob/src/parse.rs b/vendor/gix-glob/src/parse.rs new file mode 100644 index 000000000..3693f88ef --- /dev/null +++ b/vendor/gix-glob/src/parse.rs @@ -0,0 +1,86 @@ +use bstr::{BString, ByteSlice}; + +use crate::{pattern, pattern::Mode}; + +#[inline] +/// A sloppy parser that performs only the most basic checks, providing additional information +/// using `pattern::Mode` flags. +/// +/// Returns `(pattern, mode, no_wildcard_len)` +pub fn pattern(mut pat: &[u8]) -> Option<(BString, pattern::Mode, Option<usize>)> { + let mut mode = Mode::empty(); + if pat.is_empty() { + return None; + }; + if pat.first() == Some(&b'!') { + mode |= Mode::NEGATIVE; + pat = &pat[1..]; + } else if pat.first() == Some(&b'\\') { + let second = pat.get(1); + if second == Some(&b'!') || second == Some(&b'#') { + pat = &pat[1..]; + } + } + if pat.iter().all(|b| b.is_ascii_whitespace()) { + return None; + } + if pat.first() == Some(&b'/') { + mode |= Mode::ABSOLUTE; + pat = &pat[1..]; + } + let mut pat = truncate_non_escaped_trailing_spaces(pat); + if pat.last() == Some(&b'/') { + mode |= Mode::MUST_BE_DIR; + pat.pop(); + } + + if !pat.contains(&b'/') { + mode |= Mode::NO_SUB_DIR; + } + if pat.first() == Some(&b'*') && first_wildcard_pos(&pat[1..]).is_none() { + mode |= Mode::ENDS_WITH; + } + + let pos_of_first_wildcard = first_wildcard_pos(&pat); + Some((pat, mode, pos_of_first_wildcard)) +} + +fn first_wildcard_pos(pat: &[u8]) -> Option<usize> { + pat.find_byteset(GLOB_CHARACTERS) +} + +pub(crate) const GLOB_CHARACTERS: &[u8] = br"*?[\"; + +/// We always copy just because that's ultimately needed anyway, not because we always have to. +fn truncate_non_escaped_trailing_spaces(buf: &[u8]) -> BString { + match buf.rfind_not_byteset(br"\ ") { + Some(pos) if pos + 1 == buf.len() => buf.into(), // does not end in (escaped) whitespace + None => buf.into(), + Some(start_of_non_space) => { + // This seems a bit strange but attempts to recreate the git implementation while + // actually removing the escape characters before spaces. We leave other backslashes + // for escapes to be handled by `glob/globset`. + let mut res: BString = buf[..start_of_non_space + 1].into(); + + let mut trailing_bytes = buf[start_of_non_space + 1..].iter(); + let mut bare_spaces = 0; + while let Some(b) = trailing_bytes.next() { + match b { + b' ' => { + bare_spaces += 1; + } + b'\\' => { + res.extend(std::iter::repeat(b' ').take(bare_spaces)); + bare_spaces = 0; + // Skip what follows, like git does, but keep spaces if possible. + if trailing_bytes.next() == Some(&b' ') { + res.push(b' '); + } + } + _ => unreachable!("BUG: this must be either backslash or space"), + } + } + res + } + } +} diff --git a/vendor/gix-glob/src/pattern.rs b/vendor/gix-glob/src/pattern.rs new file mode 100644 index 000000000..fa874b226 --- /dev/null +++ b/vendor/gix-glob/src/pattern.rs @@ -0,0 +1,162 @@ +use std::fmt; + +use bitflags::bitflags; +use bstr::{BStr, ByteSlice}; + +use crate::{pattern, wildmatch, Pattern}; + +bitflags! { + /// Information about a [`Pattern`]. + /// + /// Its main purpose is to accelerate pattern matching, or to negate the match result or to + /// keep special rules only applicable when matching paths. + /// + /// The mode is typically created when parsing the pattern by inspecting it and isn't typically handled by the user. + #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] + pub struct Mode: u32 { + /// The pattern does not contain a sub-directory and - it doesn't contain slashes after removing the trailing one. + const NO_SUB_DIR = 1 << 0; + /// A pattern that is '*literal', meaning that it ends with what's given here + const ENDS_WITH = 1 << 1; + /// The pattern must match a directory, and not a file. + const MUST_BE_DIR = 1 << 2; + /// The pattern matches, but should be negated. Note that this mode has to be checked and applied by the caller. + const NEGATIVE = 1 << 3; + /// The pattern starts with a slash and thus matches only from the beginning. + const ABSOLUTE = 1 << 4; + } +} + +/// Describes whether to match a path case sensitively or not. +/// +/// Used in [Pattern::matches_repo_relative_path()]. +#[derive(Debug, PartialOrd, PartialEq, Copy, Clone, Hash, Ord, Eq)] +pub enum Case { + /// The case affects the match + Sensitive, + /// Ignore the case of ascii characters. + Fold, +} + +impl Default for Case { + fn default() -> Self { + Case::Sensitive + } +} + +impl Pattern { + /// Parse the given `text` as pattern, or return `None` if `text` was empty. + pub fn from_bytes(text: &[u8]) -> Option<Self> { + crate::parse::pattern(text).map(|(text, mode, first_wildcard_pos)| Pattern { + text, + mode, + first_wildcard_pos, + }) + } + + /// Return true if a match is negated. + pub fn is_negative(&self) -> bool { + self.mode.contains(Mode::NEGATIVE) + } + + /// Match the given `path` which takes slashes (and only slashes) literally, and is relative to the repository root. + /// Note that `path` is assumed to be relative to the repository. + /// + /// We may take various shortcuts which is when `basename_start_pos` and `is_dir` come into play. + /// `basename_start_pos` is the index at which the `path`'s basename starts. + /// + /// Lastly, `case` folding can be configured as well. + pub fn matches_repo_relative_path<'a>( + &self, + path: impl Into<&'a BStr>, + basename_start_pos: Option<usize>, + is_dir: Option<bool>, + case: Case, + ) -> bool { + let is_dir = is_dir.unwrap_or(false); + if !is_dir && self.mode.contains(pattern::Mode::MUST_BE_DIR) { + return false; + } + + let flags = wildmatch::Mode::NO_MATCH_SLASH_LITERAL + | match case { + Case::Fold => wildmatch::Mode::IGNORE_CASE, + Case::Sensitive => wildmatch::Mode::empty(), + }; + let path = path.into(); + debug_assert_eq!( + basename_start_pos, + path.rfind_byte(b'/').map(|p| p + 1), + "BUG: invalid cached basename_start_pos provided" + ); + debug_assert!(!path.starts_with(b"/"), "input path must be relative"); + + if self.mode.contains(pattern::Mode::NO_SUB_DIR) && !self.mode.contains(pattern::Mode::ABSOLUTE) { + let basename = &path[basename_start_pos.unwrap_or_default()..]; + self.matches(basename, flags) + } else { + self.matches(path, flags) + } + } + + /// See if `value` matches this pattern in the given `mode`. + /// + /// `mode` can identify `value` as path which won't match the slash character, and can match + /// strings with cases ignored as well. Note that the case folding performed here is ASCII only. + /// + /// Note that this method uses some shortcuts to accelerate simple patterns. + fn matches<'a>(&self, value: impl Into<&'a BStr>, mode: wildmatch::Mode) -> bool { + let value = value.into(); + match self.first_wildcard_pos { + // "*literal" case, overrides starts-with + Some(pos) if self.mode.contains(pattern::Mode::ENDS_WITH) && !value.contains(&b'/') => { + let text = &self.text[pos + 1..]; + if mode.contains(wildmatch::Mode::IGNORE_CASE) { + value + .len() + .checked_sub(text.len()) + .map(|start| text.eq_ignore_ascii_case(&value[start..])) + .unwrap_or(false) + } else { + value.ends_with(text.as_ref()) + } + } + Some(pos) => { + if mode.contains(wildmatch::Mode::IGNORE_CASE) { + if !value + .get(..pos) + .map_or(false, |value| value.eq_ignore_ascii_case(&self.text[..pos])) + { + return false; + } + } else if !value.starts_with(&self.text[..pos]) { + return false; + } + crate::wildmatch(self.text.as_bstr(), value, mode) + } + None => { + if mode.contains(wildmatch::Mode::IGNORE_CASE) { + self.text.eq_ignore_ascii_case(value) + } else { + self.text == value + } + } + } + } +} + +impl fmt::Display for Pattern { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.mode.contains(Mode::NEGATIVE) { + "!".fmt(f)?; + } + if self.mode.contains(Mode::ABSOLUTE) { + "/".fmt(f)?; + } + self.text.fmt(f)?; + if self.mode.contains(Mode::MUST_BE_DIR) { + "/".fmt(f)?; + } + Ok(()) + } +} diff --git a/vendor/gix-glob/src/wildmatch.rs b/vendor/gix-glob/src/wildmatch.rs new file mode 100644 index 000000000..4b2e33948 --- /dev/null +++ b/vendor/gix-glob/src/wildmatch.rs @@ -0,0 +1,354 @@ +use bitflags::bitflags; +bitflags! { + /// The match mode employed in [`Pattern::matches()`][crate::Pattern::matches()]. + #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] + pub struct Mode: u8 { + /// Let globs like `*` and `?` not match the slash `/` literal, which is useful when matching paths. + const NO_MATCH_SLASH_LITERAL = 1 << 0; + /// Match case insensitively for ascii characters only. + const IGNORE_CASE = 1 << 1; + } +} + +pub(crate) mod function { + use bstr::{BStr, ByteSlice}; + + use crate::wildmatch::Mode; + + #[derive(Eq, PartialEq)] + enum Result { + Match, + NoMatch, + AbortAll, + AbortToStarStar, + } + + const STAR: u8 = b'*'; + const BACKSLASH: u8 = b'\\'; + const SLASH: u8 = b'/'; + const BRACKET_OPEN: u8 = b'['; + const BRACKET_CLOSE: u8 = b']'; + const COLON: u8 = b':'; + + const NEGATE_CLASS: u8 = b'!'; + + fn match_recursive(pattern: &BStr, text: &BStr, mode: Mode) -> Result { + use self::Result::*; + let possibly_lowercase = |c: &u8| { + if mode.contains(Mode::IGNORE_CASE) { + c.to_ascii_lowercase() + } else { + *c + } + }; + let mut p = pattern.iter().map(possibly_lowercase).enumerate().peekable(); + let mut t = text.iter().map(possibly_lowercase).enumerate(); + + while let Some((mut p_idx, mut p_ch)) = p.next() { + let (mut t_idx, mut t_ch) = match t.next() { + Some(c) => c, + None if p_ch != STAR => return AbortAll, + None => (text.len(), 0), + }; + + if p_ch == BACKSLASH { + match p.next() { + Some((_p_idx, p_ch)) => { + if p_ch != t_ch { + return NoMatch; + } else { + continue; + } + } + None => return NoMatch, + }; + } + match p_ch { + b'?' => { + if mode.contains(Mode::NO_MATCH_SLASH_LITERAL) && t_ch == SLASH { + return NoMatch; + } else { + continue; + } + } + STAR => { + let mut match_slash = !mode.contains(Mode::NO_MATCH_SLASH_LITERAL); + match p.next() { + Some((next_p_idx, next_p_ch)) => { + let next; + if next_p_ch == STAR { + let leading_slash_idx = p_idx.checked_sub(1); + while p.next_if(|(_, c)| *c == STAR).is_some() {} + next = p.next(); + if !mode.contains(Mode::NO_MATCH_SLASH_LITERAL) { + match_slash = true; + } else if leading_slash_idx.map_or(true, |idx| pattern[idx] == SLASH) + && next.map_or(true, |(_, c)| { + c == SLASH || (c == BACKSLASH && p.peek().map(|t| t.1) == Some(SLASH)) + }) + { + if next.map_or(NoMatch, |(idx, _)| { + match_recursive(pattern[idx + 1..].as_bstr(), text[t_idx..].as_bstr(), mode) + }) == Match + { + return Match; + } + match_slash = true; + } else { + match_slash = false; + } + } else { + next = Some((next_p_idx, next_p_ch)); + } + + match next { + None => { + return if !match_slash && text[t_idx..].contains(&SLASH) { + NoMatch + } else { + Match + }; + } + Some((next_p_idx, next_p_ch)) => { + p_idx = next_p_idx; + p_ch = next_p_ch; + if !match_slash && p_ch == SLASH { + match text[t_idx..].find_byte(SLASH) { + Some(distance_to_slash) => { + for _ in t.by_ref().take(distance_to_slash) {} + continue; + } + None => return NoMatch, + } + } + } + } + } + None => { + return if !match_slash && text[t_idx..].contains(&SLASH) { + NoMatch + } else { + Match + } + } + } + + return loop { + if !crate::parse::GLOB_CHARACTERS.contains(&p_ch) { + loop { + if (!match_slash && t_ch == SLASH) || t_ch == p_ch { + break; + } + match t.next() { + Some(t) => { + t_idx = t.0; + t_ch = t.1; + } + None => break, + }; + } + if t_ch != p_ch { + return NoMatch; + } + } + let res = match_recursive(pattern[p_idx..].as_bstr(), text[t_idx..].as_bstr(), mode); + if res != NoMatch { + if !match_slash || res != AbortToStarStar { + return res; + } + } else if !match_slash && t_ch == SLASH { + return AbortToStarStar; + } + match t.next() { + Some(t) => { + t_idx = t.0; + t_ch = t.1; + } + None => break AbortAll, + }; + }; + } + BRACKET_OPEN => { + match p.next() { + Some(t) => { + p_idx = t.0; + p_ch = t.1; + } + None => return AbortAll, + }; + + if p_ch == b'^' { + p_ch = NEGATE_CLASS; + } + let negated = p_ch == NEGATE_CLASS; + let mut next = if negated { p.next() } else { Some((p_idx, p_ch)) }; + let mut prev_p_ch = 0; + let mut matched = false; + loop { + match next { + None => return AbortAll, + Some((p_idx, mut p_ch)) => match p_ch { + BACKSLASH => match p.next() { + Some((_, p_ch)) => { + if p_ch == t_ch { + matched = true + } else { + prev_p_ch = p_ch; + } + } + None => return AbortAll, + }, + b'-' if prev_p_ch != 0 + && p.peek().is_some() + && p.peek().map(|t| t.1) != Some(BRACKET_CLOSE) => + { + p_ch = p.next().expect("peeked").1; + if p_ch == BACKSLASH { + p_ch = match p.next() { + Some(t) => t.1, + None => return AbortAll, + }; + } + if t_ch <= p_ch && t_ch >= prev_p_ch { + matched = true; + } else if mode.contains(Mode::IGNORE_CASE) && t_ch.is_ascii_lowercase() { + let t_ch_upper = t_ch.to_ascii_uppercase(); + if (t_ch_upper <= p_ch.to_ascii_uppercase() + && t_ch_upper >= prev_p_ch.to_ascii_uppercase()) + || (t_ch_upper <= prev_p_ch.to_ascii_uppercase() + && t_ch_upper >= p_ch.to_ascii_uppercase()) + { + matched = true; + } + } + prev_p_ch = 0; + } + BRACKET_OPEN if matches!(p.peek(), Some((_, COLON))) => { + p.next(); + while p.peek().map_or(false, |t| t.1 != BRACKET_CLOSE) { + p.next(); + } + let closing_bracket_idx = match p.next() { + Some((idx, _)) => idx, + None => return AbortAll, + }; + const BRACKET__COLON__BRACKET: usize = 3; + if closing_bracket_idx - p_idx < BRACKET__COLON__BRACKET + || pattern[closing_bracket_idx - 1] != COLON + { + if t_ch == BRACKET_OPEN { + matched = true + } + p = pattern[p_idx + 1..] + .iter() + .map(possibly_lowercase) + .enumerate() + .peekable(); + } else { + let class = &pattern.as_bytes()[p_idx + 2..closing_bracket_idx - 1]; + match class { + b"alnum" => { + if t_ch.is_ascii_alphanumeric() { + matched = true; + } + } + b"alpha" => { + if t_ch.is_ascii_alphabetic() { + matched = true; + } + } + b"blank" => { + if t_ch.is_ascii_whitespace() { + matched = true; + } + } + b"cntrl" => { + if t_ch.is_ascii_control() { + matched = true; + } + } + b"digit" => { + if t_ch.is_ascii_digit() { + matched = true; + } + } + + b"graph" => { + if t_ch.is_ascii_graphic() { + matched = true; + } + } + b"lower" => { + if t_ch.is_ascii_lowercase() { + matched = true; + } + } + b"print" => { + if (0x20u8..=0x7e).contains(&t_ch) { + matched = true; + } + } + b"punct" => { + if t_ch.is_ascii_punctuation() { + matched = true; + } + } + b"space" => { + if t_ch == b' ' { + matched = true; + } + } + b"upper" => { + if t_ch.is_ascii_uppercase() + || mode.contains(Mode::IGNORE_CASE) && t_ch.is_ascii_lowercase() + { + matched = true; + } + } + b"xdigit" => { + if t_ch.is_ascii_hexdigit() { + matched = true; + } + } + _ => return AbortAll, + }; + prev_p_ch = 0; + } + } + _ => { + prev_p_ch = p_ch; + if p_ch == t_ch { + matched = true; + } + } + }, + }; + next = p.next(); + if let Some((_, BRACKET_CLOSE)) = next { + break; + } + } + if matched == negated || mode.contains(Mode::NO_MATCH_SLASH_LITERAL) && t_ch == SLASH { + return NoMatch; + } + continue; + } + non_glob_ch => { + if non_glob_ch != t_ch { + return NoMatch; + } else { + continue; + } + } + } + } + t.next().map(|_| NoMatch).unwrap_or(Match) + } + + /// Employ pattern matching to see if `value` matches `pattern`. + /// + /// `mode` can be used to adjust the way the matching is performed. + pub fn wildmatch(pattern: &BStr, value: &BStr, mode: Mode) -> bool { + match_recursive(pattern, value, mode) == Result::Match + } +} |