summaryrefslogtreecommitdiffstats
path: root/vendor/gix-glob/src
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:41:41 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:41:41 +0000
commit10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87 (patch)
treebdffd5d80c26cf4a7a518281a204be1ace85b4c1 /vendor/gix-glob/src
parentReleasing progress-linux version 1.70.0+dfsg1-9~progress7.99u1. (diff)
downloadrustc-10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87.tar.xz
rustc-10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87.zip
Merging upstream version 1.70.0+dfsg2.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/gix-glob/src')
-rw-r--r--vendor/gix-glob/src/lib.rs41
-rw-r--r--vendor/gix-glob/src/parse.rs86
-rw-r--r--vendor/gix-glob/src/pattern.rs162
-rw-r--r--vendor/gix-glob/src/wildmatch.rs354
4 files changed, 643 insertions, 0 deletions
diff --git a/vendor/gix-glob/src/lib.rs b/vendor/gix-glob/src/lib.rs
new file mode 100644
index 000000000..48d011a52
--- /dev/null
+++ b/vendor/gix-glob/src/lib.rs
@@ -0,0 +1,41 @@
+//! Provide glob [`Patterns`][Pattern] for matching against paths or anything else.
+//! ## Feature Flags
+#![cfg_attr(
+ feature = "document-features",
+ cfg_attr(doc, doc = ::document_features::document_features!())
+)]
+#![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))]
+#![deny(missing_docs, rust_2018_idioms)]
+#![forbid(unsafe_code)]
+
+use bstr::BString;
+
+/// A glob pattern optimized for matching paths relative to a root directory.
+///
+/// For normal globbing, use [`wildmatch()`] instead.
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub struct Pattern {
+ /// the actual pattern bytes
+ pub text: BString,
+ /// Additional information to help accelerate pattern matching.
+ pub mode: pattern::Mode,
+ /// The position in `text` with the first wildcard character, or `None` if there is no wildcard at all.
+ pub first_wildcard_pos: Option<usize>,
+}
+
+///
+pub mod pattern;
+
+///
+pub mod wildmatch;
+pub use wildmatch::function::wildmatch;
+
+mod parse;
+
+/// Create a [`Pattern`] by parsing `text` or return `None` if `text` is empty.
+///
+/// Note that
+pub fn parse(text: impl AsRef<[u8]>) -> Option<Pattern> {
+ Pattern::from_bytes(text.as_ref())
+}
diff --git a/vendor/gix-glob/src/parse.rs b/vendor/gix-glob/src/parse.rs
new file mode 100644
index 000000000..3693f88ef
--- /dev/null
+++ b/vendor/gix-glob/src/parse.rs
@@ -0,0 +1,86 @@
+use bstr::{BString, ByteSlice};
+
+use crate::{pattern, pattern::Mode};
+
+#[inline]
+/// A sloppy parser that performs only the most basic checks, providing additional information
+/// using `pattern::Mode` flags.
+///
+/// Returns `(pattern, mode, no_wildcard_len)`
+pub fn pattern(mut pat: &[u8]) -> Option<(BString, pattern::Mode, Option<usize>)> {
+ let mut mode = Mode::empty();
+ if pat.is_empty() {
+ return None;
+ };
+ if pat.first() == Some(&b'!') {
+ mode |= Mode::NEGATIVE;
+ pat = &pat[1..];
+ } else if pat.first() == Some(&b'\\') {
+ let second = pat.get(1);
+ if second == Some(&b'!') || second == Some(&b'#') {
+ pat = &pat[1..];
+ }
+ }
+ if pat.iter().all(|b| b.is_ascii_whitespace()) {
+ return None;
+ }
+ if pat.first() == Some(&b'/') {
+ mode |= Mode::ABSOLUTE;
+ pat = &pat[1..];
+ }
+ let mut pat = truncate_non_escaped_trailing_spaces(pat);
+ if pat.last() == Some(&b'/') {
+ mode |= Mode::MUST_BE_DIR;
+ pat.pop();
+ }
+
+ if !pat.contains(&b'/') {
+ mode |= Mode::NO_SUB_DIR;
+ }
+ if pat.first() == Some(&b'*') && first_wildcard_pos(&pat[1..]).is_none() {
+ mode |= Mode::ENDS_WITH;
+ }
+
+ let pos_of_first_wildcard = first_wildcard_pos(&pat);
+ Some((pat, mode, pos_of_first_wildcard))
+}
+
+fn first_wildcard_pos(pat: &[u8]) -> Option<usize> {
+ pat.find_byteset(GLOB_CHARACTERS)
+}
+
+pub(crate) const GLOB_CHARACTERS: &[u8] = br"*?[\";
+
+/// We always copy just because that's ultimately needed anyway, not because we always have to.
+fn truncate_non_escaped_trailing_spaces(buf: &[u8]) -> BString {
+ match buf.rfind_not_byteset(br"\ ") {
+ Some(pos) if pos + 1 == buf.len() => buf.into(), // does not end in (escaped) whitespace
+ None => buf.into(),
+ Some(start_of_non_space) => {
+ // This seems a bit strange but attempts to recreate the git implementation while
+ // actually removing the escape characters before spaces. We leave other backslashes
+ // for escapes to be handled by `glob/globset`.
+ let mut res: BString = buf[..start_of_non_space + 1].into();
+
+ let mut trailing_bytes = buf[start_of_non_space + 1..].iter();
+ let mut bare_spaces = 0;
+ while let Some(b) = trailing_bytes.next() {
+ match b {
+ b' ' => {
+ bare_spaces += 1;
+ }
+ b'\\' => {
+ res.extend(std::iter::repeat(b' ').take(bare_spaces));
+ bare_spaces = 0;
+ // Skip what follows, like git does, but keep spaces if possible.
+ if trailing_bytes.next() == Some(&b' ') {
+ res.push(b' ');
+ }
+ }
+ _ => unreachable!("BUG: this must be either backslash or space"),
+ }
+ }
+ res
+ }
+ }
+}
diff --git a/vendor/gix-glob/src/pattern.rs b/vendor/gix-glob/src/pattern.rs
new file mode 100644
index 000000000..fa874b226
--- /dev/null
+++ b/vendor/gix-glob/src/pattern.rs
@@ -0,0 +1,162 @@
+use std::fmt;
+
+use bitflags::bitflags;
+use bstr::{BStr, ByteSlice};
+
+use crate::{pattern, wildmatch, Pattern};
+
+bitflags! {
+ /// Information about a [`Pattern`].
+ ///
+ /// Its main purpose is to accelerate pattern matching, or to negate the match result or to
+ /// keep special rules only applicable when matching paths.
+ ///
+ /// The mode is typically created when parsing the pattern by inspecting it and isn't typically handled by the user.
+ #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+ pub struct Mode: u32 {
+ /// The pattern does not contain a sub-directory and - it doesn't contain slashes after removing the trailing one.
+ const NO_SUB_DIR = 1 << 0;
+ /// A pattern that is '*literal', meaning that it ends with what's given here
+ const ENDS_WITH = 1 << 1;
+ /// The pattern must match a directory, and not a file.
+ const MUST_BE_DIR = 1 << 2;
+ /// The pattern matches, but should be negated. Note that this mode has to be checked and applied by the caller.
+ const NEGATIVE = 1 << 3;
+ /// The pattern starts with a slash and thus matches only from the beginning.
+ const ABSOLUTE = 1 << 4;
+ }
+}
+
+/// Describes whether to match a path case sensitively or not.
+///
+/// Used in [Pattern::matches_repo_relative_path()].
+#[derive(Debug, PartialOrd, PartialEq, Copy, Clone, Hash, Ord, Eq)]
+pub enum Case {
+ /// The case affects the match
+ Sensitive,
+ /// Ignore the case of ascii characters.
+ Fold,
+}
+
+impl Default for Case {
+ fn default() -> Self {
+ Case::Sensitive
+ }
+}
+
+impl Pattern {
+ /// Parse the given `text` as pattern, or return `None` if `text` was empty.
+ pub fn from_bytes(text: &[u8]) -> Option<Self> {
+ crate::parse::pattern(text).map(|(text, mode, first_wildcard_pos)| Pattern {
+ text,
+ mode,
+ first_wildcard_pos,
+ })
+ }
+
+ /// Return true if a match is negated.
+ pub fn is_negative(&self) -> bool {
+ self.mode.contains(Mode::NEGATIVE)
+ }
+
+ /// Match the given `path` which takes slashes (and only slashes) literally, and is relative to the repository root.
+ /// Note that `path` is assumed to be relative to the repository.
+ ///
+ /// We may take various shortcuts which is when `basename_start_pos` and `is_dir` come into play.
+ /// `basename_start_pos` is the index at which the `path`'s basename starts.
+ ///
+ /// Lastly, `case` folding can be configured as well.
+ pub fn matches_repo_relative_path<'a>(
+ &self,
+ path: impl Into<&'a BStr>,
+ basename_start_pos: Option<usize>,
+ is_dir: Option<bool>,
+ case: Case,
+ ) -> bool {
+ let is_dir = is_dir.unwrap_or(false);
+ if !is_dir && self.mode.contains(pattern::Mode::MUST_BE_DIR) {
+ return false;
+ }
+
+ let flags = wildmatch::Mode::NO_MATCH_SLASH_LITERAL
+ | match case {
+ Case::Fold => wildmatch::Mode::IGNORE_CASE,
+ Case::Sensitive => wildmatch::Mode::empty(),
+ };
+ let path = path.into();
+ debug_assert_eq!(
+ basename_start_pos,
+ path.rfind_byte(b'/').map(|p| p + 1),
+ "BUG: invalid cached basename_start_pos provided"
+ );
+ debug_assert!(!path.starts_with(b"/"), "input path must be relative");
+
+ if self.mode.contains(pattern::Mode::NO_SUB_DIR) && !self.mode.contains(pattern::Mode::ABSOLUTE) {
+ let basename = &path[basename_start_pos.unwrap_or_default()..];
+ self.matches(basename, flags)
+ } else {
+ self.matches(path, flags)
+ }
+ }
+
+ /// See if `value` matches this pattern in the given `mode`.
+ ///
+ /// `mode` can identify `value` as path which won't match the slash character, and can match
+ /// strings with cases ignored as well. Note that the case folding performed here is ASCII only.
+ ///
+ /// Note that this method uses some shortcuts to accelerate simple patterns.
+ fn matches<'a>(&self, value: impl Into<&'a BStr>, mode: wildmatch::Mode) -> bool {
+ let value = value.into();
+ match self.first_wildcard_pos {
+ // "*literal" case, overrides starts-with
+ Some(pos) if self.mode.contains(pattern::Mode::ENDS_WITH) && !value.contains(&b'/') => {
+ let text = &self.text[pos + 1..];
+ if mode.contains(wildmatch::Mode::IGNORE_CASE) {
+ value
+ .len()
+ .checked_sub(text.len())
+ .map(|start| text.eq_ignore_ascii_case(&value[start..]))
+ .unwrap_or(false)
+ } else {
+ value.ends_with(text.as_ref())
+ }
+ }
+ Some(pos) => {
+ if mode.contains(wildmatch::Mode::IGNORE_CASE) {
+ if !value
+ .get(..pos)
+ .map_or(false, |value| value.eq_ignore_ascii_case(&self.text[..pos]))
+ {
+ return false;
+ }
+ } else if !value.starts_with(&self.text[..pos]) {
+ return false;
+ }
+ crate::wildmatch(self.text.as_bstr(), value, mode)
+ }
+ None => {
+ if mode.contains(wildmatch::Mode::IGNORE_CASE) {
+ self.text.eq_ignore_ascii_case(value)
+ } else {
+ self.text == value
+ }
+ }
+ }
+ }
+}
+
+impl fmt::Display for Pattern {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ if self.mode.contains(Mode::NEGATIVE) {
+ "!".fmt(f)?;
+ }
+ if self.mode.contains(Mode::ABSOLUTE) {
+ "/".fmt(f)?;
+ }
+ self.text.fmt(f)?;
+ if self.mode.contains(Mode::MUST_BE_DIR) {
+ "/".fmt(f)?;
+ }
+ Ok(())
+ }
+}
diff --git a/vendor/gix-glob/src/wildmatch.rs b/vendor/gix-glob/src/wildmatch.rs
new file mode 100644
index 000000000..4b2e33948
--- /dev/null
+++ b/vendor/gix-glob/src/wildmatch.rs
@@ -0,0 +1,354 @@
+use bitflags::bitflags;
+bitflags! {
+ /// The match mode employed in [`Pattern::matches()`][crate::Pattern::matches()].
+ #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+ pub struct Mode: u8 {
+ /// Let globs like `*` and `?` not match the slash `/` literal, which is useful when matching paths.
+ const NO_MATCH_SLASH_LITERAL = 1 << 0;
+ /// Match case insensitively for ascii characters only.
+ const IGNORE_CASE = 1 << 1;
+ }
+}
+
+pub(crate) mod function {
+ use bstr::{BStr, ByteSlice};
+
+ use crate::wildmatch::Mode;
+
+ #[derive(Eq, PartialEq)]
+ enum Result {
+ Match,
+ NoMatch,
+ AbortAll,
+ AbortToStarStar,
+ }
+
+ const STAR: u8 = b'*';
+ const BACKSLASH: u8 = b'\\';
+ const SLASH: u8 = b'/';
+ const BRACKET_OPEN: u8 = b'[';
+ const BRACKET_CLOSE: u8 = b']';
+ const COLON: u8 = b':';
+
+ const NEGATE_CLASS: u8 = b'!';
+
+ fn match_recursive(pattern: &BStr, text: &BStr, mode: Mode) -> Result {
+ use self::Result::*;
+ let possibly_lowercase = |c: &u8| {
+ if mode.contains(Mode::IGNORE_CASE) {
+ c.to_ascii_lowercase()
+ } else {
+ *c
+ }
+ };
+ let mut p = pattern.iter().map(possibly_lowercase).enumerate().peekable();
+ let mut t = text.iter().map(possibly_lowercase).enumerate();
+
+ while let Some((mut p_idx, mut p_ch)) = p.next() {
+ let (mut t_idx, mut t_ch) = match t.next() {
+ Some(c) => c,
+ None if p_ch != STAR => return AbortAll,
+ None => (text.len(), 0),
+ };
+
+ if p_ch == BACKSLASH {
+ match p.next() {
+ Some((_p_idx, p_ch)) => {
+ if p_ch != t_ch {
+ return NoMatch;
+ } else {
+ continue;
+ }
+ }
+ None => return NoMatch,
+ };
+ }
+ match p_ch {
+ b'?' => {
+ if mode.contains(Mode::NO_MATCH_SLASH_LITERAL) && t_ch == SLASH {
+ return NoMatch;
+ } else {
+ continue;
+ }
+ }
+ STAR => {
+ let mut match_slash = !mode.contains(Mode::NO_MATCH_SLASH_LITERAL);
+ match p.next() {
+ Some((next_p_idx, next_p_ch)) => {
+ let next;
+ if next_p_ch == STAR {
+ let leading_slash_idx = p_idx.checked_sub(1);
+ while p.next_if(|(_, c)| *c == STAR).is_some() {}
+ next = p.next();
+ if !mode.contains(Mode::NO_MATCH_SLASH_LITERAL) {
+ match_slash = true;
+ } else if leading_slash_idx.map_or(true, |idx| pattern[idx] == SLASH)
+ && next.map_or(true, |(_, c)| {
+ c == SLASH || (c == BACKSLASH && p.peek().map(|t| t.1) == Some(SLASH))
+ })
+ {
+ if next.map_or(NoMatch, |(idx, _)| {
+ match_recursive(pattern[idx + 1..].as_bstr(), text[t_idx..].as_bstr(), mode)
+ }) == Match
+ {
+ return Match;
+ }
+ match_slash = true;
+ } else {
+ match_slash = false;
+ }
+ } else {
+ next = Some((next_p_idx, next_p_ch));
+ }
+
+ match next {
+ None => {
+ return if !match_slash && text[t_idx..].contains(&SLASH) {
+ NoMatch
+ } else {
+ Match
+ };
+ }
+ Some((next_p_idx, next_p_ch)) => {
+ p_idx = next_p_idx;
+ p_ch = next_p_ch;
+ if !match_slash && p_ch == SLASH {
+ match text[t_idx..].find_byte(SLASH) {
+ Some(distance_to_slash) => {
+ for _ in t.by_ref().take(distance_to_slash) {}
+ continue;
+ }
+ None => return NoMatch,
+ }
+ }
+ }
+ }
+ }
+ None => {
+ return if !match_slash && text[t_idx..].contains(&SLASH) {
+ NoMatch
+ } else {
+ Match
+ }
+ }
+ }
+
+ return loop {
+ if !crate::parse::GLOB_CHARACTERS.contains(&p_ch) {
+ loop {
+ if (!match_slash && t_ch == SLASH) || t_ch == p_ch {
+ break;
+ }
+ match t.next() {
+ Some(t) => {
+ t_idx = t.0;
+ t_ch = t.1;
+ }
+ None => break,
+ };
+ }
+ if t_ch != p_ch {
+ return NoMatch;
+ }
+ }
+ let res = match_recursive(pattern[p_idx..].as_bstr(), text[t_idx..].as_bstr(), mode);
+ if res != NoMatch {
+ if !match_slash || res != AbortToStarStar {
+ return res;
+ }
+ } else if !match_slash && t_ch == SLASH {
+ return AbortToStarStar;
+ }
+ match t.next() {
+ Some(t) => {
+ t_idx = t.0;
+ t_ch = t.1;
+ }
+ None => break AbortAll,
+ };
+ };
+ }
+ BRACKET_OPEN => {
+ match p.next() {
+ Some(t) => {
+ p_idx = t.0;
+ p_ch = t.1;
+ }
+ None => return AbortAll,
+ };
+
+ if p_ch == b'^' {
+ p_ch = NEGATE_CLASS;
+ }
+ let negated = p_ch == NEGATE_CLASS;
+ let mut next = if negated { p.next() } else { Some((p_idx, p_ch)) };
+ let mut prev_p_ch = 0;
+ let mut matched = false;
+ loop {
+ match next {
+ None => return AbortAll,
+ Some((p_idx, mut p_ch)) => match p_ch {
+ BACKSLASH => match p.next() {
+ Some((_, p_ch)) => {
+ if p_ch == t_ch {
+ matched = true
+ } else {
+ prev_p_ch = p_ch;
+ }
+ }
+ None => return AbortAll,
+ },
+ b'-' if prev_p_ch != 0
+ && p.peek().is_some()
+ && p.peek().map(|t| t.1) != Some(BRACKET_CLOSE) =>
+ {
+ p_ch = p.next().expect("peeked").1;
+ if p_ch == BACKSLASH {
+ p_ch = match p.next() {
+ Some(t) => t.1,
+ None => return AbortAll,
+ };
+ }
+ if t_ch <= p_ch && t_ch >= prev_p_ch {
+ matched = true;
+ } else if mode.contains(Mode::IGNORE_CASE) && t_ch.is_ascii_lowercase() {
+ let t_ch_upper = t_ch.to_ascii_uppercase();
+ if (t_ch_upper <= p_ch.to_ascii_uppercase()
+ && t_ch_upper >= prev_p_ch.to_ascii_uppercase())
+ || (t_ch_upper <= prev_p_ch.to_ascii_uppercase()
+ && t_ch_upper >= p_ch.to_ascii_uppercase())
+ {
+ matched = true;
+ }
+ }
+ prev_p_ch = 0;
+ }
+ BRACKET_OPEN if matches!(p.peek(), Some((_, COLON))) => {
+ p.next();
+ while p.peek().map_or(false, |t| t.1 != BRACKET_CLOSE) {
+ p.next();
+ }
+ let closing_bracket_idx = match p.next() {
+ Some((idx, _)) => idx,
+ None => return AbortAll,
+ };
+ const BRACKET__COLON__BRACKET: usize = 3;
+ if closing_bracket_idx - p_idx < BRACKET__COLON__BRACKET
+ || pattern[closing_bracket_idx - 1] != COLON
+ {
+ if t_ch == BRACKET_OPEN {
+ matched = true
+ }
+ p = pattern[p_idx + 1..]
+ .iter()
+ .map(possibly_lowercase)
+ .enumerate()
+ .peekable();
+ } else {
+ let class = &pattern.as_bytes()[p_idx + 2..closing_bracket_idx - 1];
+ match class {
+ b"alnum" => {
+ if t_ch.is_ascii_alphanumeric() {
+ matched = true;
+ }
+ }
+ b"alpha" => {
+ if t_ch.is_ascii_alphabetic() {
+ matched = true;
+ }
+ }
+ b"blank" => {
+ if t_ch.is_ascii_whitespace() {
+ matched = true;
+ }
+ }
+ b"cntrl" => {
+ if t_ch.is_ascii_control() {
+ matched = true;
+ }
+ }
+ b"digit" => {
+ if t_ch.is_ascii_digit() {
+ matched = true;
+ }
+ }
+
+ b"graph" => {
+ if t_ch.is_ascii_graphic() {
+ matched = true;
+ }
+ }
+ b"lower" => {
+ if t_ch.is_ascii_lowercase() {
+ matched = true;
+ }
+ }
+ b"print" => {
+ if (0x20u8..=0x7e).contains(&t_ch) {
+ matched = true;
+ }
+ }
+ b"punct" => {
+ if t_ch.is_ascii_punctuation() {
+ matched = true;
+ }
+ }
+ b"space" => {
+ if t_ch == b' ' {
+ matched = true;
+ }
+ }
+ b"upper" => {
+ if t_ch.is_ascii_uppercase()
+ || mode.contains(Mode::IGNORE_CASE) && t_ch.is_ascii_lowercase()
+ {
+ matched = true;
+ }
+ }
+ b"xdigit" => {
+ if t_ch.is_ascii_hexdigit() {
+ matched = true;
+ }
+ }
+ _ => return AbortAll,
+ };
+ prev_p_ch = 0;
+ }
+ }
+ _ => {
+ prev_p_ch = p_ch;
+ if p_ch == t_ch {
+ matched = true;
+ }
+ }
+ },
+ };
+ next = p.next();
+ if let Some((_, BRACKET_CLOSE)) = next {
+ break;
+ }
+ }
+ if matched == negated || mode.contains(Mode::NO_MATCH_SLASH_LITERAL) && t_ch == SLASH {
+ return NoMatch;
+ }
+ continue;
+ }
+ non_glob_ch => {
+ if non_glob_ch != t_ch {
+ return NoMatch;
+ } else {
+ continue;
+ }
+ }
+ }
+ }
+ t.next().map(|_| NoMatch).unwrap_or(Match)
+ }
+
+ /// Employ pattern matching to see if `value` matches `pattern`.
+ ///
+ /// `mode` can be used to adjust the way the matching is performed.
+ pub fn wildmatch(pattern: &BStr, value: &BStr, mode: Mode) -> bool {
+ match_recursive(pattern, value, mode) == Result::Match
+ }
+}