summaryrefslogtreecommitdiffstats
path: root/vendor/gix-glob/src
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 02:49:50 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 02:49:50 +0000
commit9835e2ae736235810b4ea1c162ca5e65c547e770 (patch)
tree3fcebf40ed70e581d776a8a4c65923e8ec20e026 /vendor/gix-glob/src
parentReleasing progress-linux version 1.70.0+dfsg2-1~progress7.99u1. (diff)
downloadrustc-9835e2ae736235810b4ea1c162ca5e65c547e770.tar.xz
rustc-9835e2ae736235810b4ea1c162ca5e65c547e770.zip
Merging upstream version 1.71.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/gix-glob/src')
-rw-r--r--vendor/gix-glob/src/lib.rs4
-rw-r--r--vendor/gix-glob/src/parse.rs43
-rw-r--r--vendor/gix-glob/src/pattern.rs23
-rw-r--r--vendor/gix-glob/src/search/mod.rs42
-rw-r--r--vendor/gix-glob/src/search/pattern.rs152
-rw-r--r--vendor/gix-glob/src/wildmatch.rs3
6 files changed, 215 insertions, 52 deletions
diff --git a/vendor/gix-glob/src/lib.rs b/vendor/gix-glob/src/lib.rs
index 48d011a52..a753e671a 100644
--- a/vendor/gix-glob/src/lib.rs
+++ b/vendor/gix-glob/src/lib.rs
@@ -14,7 +14,7 @@ use bstr::BString;
///
/// For normal globbing, use [`wildmatch()`] instead.
#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
-#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct Pattern {
/// the actual pattern bytes
pub text: BString,
@@ -27,6 +27,8 @@ pub struct Pattern {
///
pub mod pattern;
+pub mod search;
+
///
pub mod wildmatch;
pub use wildmatch::function::wildmatch;
diff --git a/vendor/gix-glob/src/parse.rs b/vendor/gix-glob/src/parse.rs
index 3693f88ef..665f459b9 100644
--- a/vendor/gix-glob/src/parse.rs
+++ b/vendor/gix-glob/src/parse.rs
@@ -1,4 +1,4 @@
-use bstr::{BString, ByteSlice};
+use bstr::ByteSlice;
use crate::{pattern, pattern::Mode};
@@ -7,7 +7,7 @@ use crate::{pattern, pattern::Mode};
/// using `pattern::Mode` flags.
///
/// Returns `(pattern, mode, no_wildcard_len)`
-pub fn pattern(mut pat: &[u8]) -> Option<(BString, pattern::Mode, Option<usize>)> {
+pub fn pattern(mut pat: &[u8]) -> Option<(&[u8], pattern::Mode, Option<usize>)> {
let mut mode = Mode::empty();
if pat.is_empty() {
return None;
@@ -28,10 +28,9 @@ pub fn pattern(mut pat: &[u8]) -> Option<(BString, pattern::Mode, Option<usize>)
mode |= Mode::ABSOLUTE;
pat = &pat[1..];
}
- let mut pat = truncate_non_escaped_trailing_spaces(pat);
if pat.last() == Some(&b'/') {
mode |= Mode::MUST_BE_DIR;
- pat.pop();
+ pat = &pat[..pat.len() - 1];
}
if !pat.contains(&b'/') {
@@ -41,7 +40,7 @@ pub fn pattern(mut pat: &[u8]) -> Option<(BString, pattern::Mode, Option<usize>)
mode |= Mode::ENDS_WITH;
}
- let pos_of_first_wildcard = first_wildcard_pos(&pat);
+ let pos_of_first_wildcard = first_wildcard_pos(pat);
Some((pat, mode, pos_of_first_wildcard))
}
@@ -50,37 +49,3 @@ fn first_wildcard_pos(pat: &[u8]) -> Option<usize> {
}
pub(crate) const GLOB_CHARACTERS: &[u8] = br"*?[\";
-
-/// We always copy just because that's ultimately needed anyway, not because we always have to.
-fn truncate_non_escaped_trailing_spaces(buf: &[u8]) -> BString {
- match buf.rfind_not_byteset(br"\ ") {
- Some(pos) if pos + 1 == buf.len() => buf.into(), // does not end in (escaped) whitespace
- None => buf.into(),
- Some(start_of_non_space) => {
- // This seems a bit strange but attempts to recreate the git implementation while
- // actually removing the escape characters before spaces. We leave other backslashes
- // for escapes to be handled by `glob/globset`.
- let mut res: BString = buf[..start_of_non_space + 1].into();
-
- let mut trailing_bytes = buf[start_of_non_space + 1..].iter();
- let mut bare_spaces = 0;
- while let Some(b) = trailing_bytes.next() {
- match b {
- b' ' => {
- bare_spaces += 1;
- }
- b'\\' => {
- res.extend(std::iter::repeat(b' ').take(bare_spaces));
- bare_spaces = 0;
- // Skip what follows, like git does, but keep spaces if possible.
- if trailing_bytes.next() == Some(&b' ') {
- res.push(b' ');
- }
- }
- _ => unreachable!("BUG: this must be either backslash or space"),
- }
- }
- res
- }
- }
-}
diff --git a/vendor/gix-glob/src/pattern.rs b/vendor/gix-glob/src/pattern.rs
index fa874b226..2aefaa9a0 100644
--- a/vendor/gix-glob/src/pattern.rs
+++ b/vendor/gix-glob/src/pattern.rs
@@ -12,7 +12,8 @@ bitflags! {
/// keep special rules only applicable when matching paths.
///
/// The mode is typically created when parsing the pattern by inspecting it and isn't typically handled by the user.
- #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+ #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+ #[derive(Debug, PartialEq, Eq, Hash, Copy, Clone, Ord, PartialOrd)]
pub struct Mode: u32 {
/// The pattern does not contain a sub-directory and - it doesn't contain slashes after removing the trailing one.
const NO_SUB_DIR = 1 << 0;
@@ -30,30 +31,29 @@ bitflags! {
/// Describes whether to match a path case sensitively or not.
///
/// Used in [Pattern::matches_repo_relative_path()].
-#[derive(Debug, PartialOrd, PartialEq, Copy, Clone, Hash, Ord, Eq)]
+#[derive(Default, Debug, PartialOrd, PartialEq, Copy, Clone, Hash, Ord, Eq)]
pub enum Case {
/// The case affects the match
+ #[default]
Sensitive,
/// Ignore the case of ascii characters.
Fold,
}
-impl Default for Case {
- fn default() -> Self {
- Case::Sensitive
- }
-}
-
+/// Instantiation
impl Pattern {
/// Parse the given `text` as pattern, or return `None` if `text` was empty.
pub fn from_bytes(text: &[u8]) -> Option<Self> {
crate::parse::pattern(text).map(|(text, mode, first_wildcard_pos)| Pattern {
- text,
+ text: text.into(),
mode,
first_wildcard_pos,
})
}
+}
+/// Access
+impl Pattern {
/// Return true if a match is negated.
pub fn is_negative(&self) -> bool {
self.mode.contains(Mode::NEGATIVE)
@@ -104,8 +104,9 @@ impl Pattern {
/// `mode` can identify `value` as path which won't match the slash character, and can match
/// strings with cases ignored as well. Note that the case folding performed here is ASCII only.
///
- /// Note that this method uses some shortcuts to accelerate simple patterns.
- fn matches<'a>(&self, value: impl Into<&'a BStr>, mode: wildmatch::Mode) -> bool {
+ /// Note that this method uses some shortcuts to accelerate simple patterns, but falls back to
+ /// [wildmatch()][crate::wildmatch()] if these fail.
+ pub fn matches<'a>(&self, value: impl Into<&'a BStr>, mode: wildmatch::Mode) -> bool {
let value = value.into();
match self.first_wildcard_pos {
// "*literal" case, overrides starts-with
diff --git a/vendor/gix-glob/src/search/mod.rs b/vendor/gix-glob/src/search/mod.rs
new file mode 100644
index 000000000..a31e2af37
--- /dev/null
+++ b/vendor/gix-glob/src/search/mod.rs
@@ -0,0 +1,42 @@
+//! Utilities for searching matches of paths to patterns.
+//!
+//! Please note that these are specific to how both excludes and attributes are searched, and this is
+//! merely a way to share code among them.
+use std::path::{Path, PathBuf};
+
+///
+pub mod pattern;
+
+/// A trait to convert bytes into patterns and their associated value.
+///
+/// This is used for `gitattributes` which have a value, and `gitignore` which don't.
+pub trait Pattern: Clone + PartialEq + Eq + std::fmt::Debug + std::hash::Hash + Ord + PartialOrd + Default {
+ /// The value associated with a pattern.
+ type Value: PartialEq + Eq + std::fmt::Debug + std::hash::Hash + Ord + PartialOrd + Clone;
+
+ /// Parse all patterns in `bytes` line by line, ignoring lines with errors, and collect them.
+ fn bytes_to_patterns(bytes: &[u8], source: &Path) -> Vec<pattern::Mapping<Self::Value>>;
+
+ /// Returns true if the given pattern may be used for matching.
+ fn may_use_glob_pattern(pattern: &crate::Pattern) -> bool;
+}
+
+/// Add the given file at `source` if it exists, otherwise do nothing.
+/// If a `root` is provided, it's not considered a global file anymore.
+/// Returns `true` if the file was added, or `false` if it didn't exist.
+pub fn add_patterns_file<T: Pattern>(
+ patterns: &mut Vec<pattern::List<T>>,
+ source: impl Into<PathBuf>,
+ follow_symlinks: bool,
+ root: Option<&Path>,
+ buf: &mut Vec<u8>,
+) -> std::io::Result<bool> {
+ let previous_len = patterns.len();
+ patterns.extend(pattern::List::<T>::from_file(
+ source.into(),
+ root,
+ follow_symlinks,
+ buf,
+ )?);
+ Ok(patterns.len() != previous_len)
+}
diff --git a/vendor/gix-glob/src/search/pattern.rs b/vendor/gix-glob/src/search/pattern.rs
new file mode 100644
index 000000000..8bb195757
--- /dev/null
+++ b/vendor/gix-glob/src/search/pattern.rs
@@ -0,0 +1,152 @@
+use std::{
+ io::Read,
+ path::{Path, PathBuf},
+};
+
+use bstr::{BStr, BString, ByteSlice, ByteVec};
+
+use crate::{pattern::Case, search::Pattern};
+
+/// A list of patterns which optionally know where they were loaded from and what their base is.
+///
+/// Knowing their base which is relative to a source directory, it will ignore all path to match against
+/// that don't also start with said base.
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Default)]
+pub struct List<T: Pattern> {
+ /// Patterns and their associated data in the order they were loaded in or specified,
+ /// the line number in its source file or its sequence number (_`(pattern, value, line_number)`_).
+ ///
+ /// During matching, this order is reversed.
+ pub patterns: Vec<Mapping<T::Value>>,
+
+ /// The path from which the patterns were read, or `None` if the patterns
+ /// don't originate in a file on disk.
+ pub source: Option<PathBuf>,
+
+ /// The parent directory of source, or `None` if the patterns are _global_ to match against the repository root.
+ /// It's processed to contain slashes only and to end with a trailing slash, and is relative to the repository root.
+ pub base: Option<BString>,
+}
+
+/// An association of a pattern with its value, along with a sequence number providing a sort order in relation to its peers.
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
+pub struct Mapping<T> {
+ /// The pattern itself, like `/target/*`
+ pub pattern: crate::Pattern,
+ /// The value associated with the pattern.
+ pub value: T,
+ /// Typically the line number in the file the pattern was parsed from.
+ pub sequence_number: usize,
+}
+
+fn read_in_full_ignore_missing(path: &Path, follow_symlinks: bool, buf: &mut Vec<u8>) -> std::io::Result<bool> {
+ buf.clear();
+ let file = if follow_symlinks {
+ std::fs::File::open(path)
+ } else {
+ gix_features::fs::open_options_no_follow().read(true).open(path)
+ };
+ Ok(match file {
+ Ok(mut file) => {
+ file.read_to_end(buf)?;
+ true
+ }
+ Err(err) if err.kind() == std::io::ErrorKind::NotFound => false,
+ Err(err) => return Err(err),
+ })
+}
+
+/// Instantiation
+impl<T> List<T>
+where
+ T: Pattern,
+{
+ /// `source_file` is the location of the `bytes` which represents a list of patterns, one pattern per line.
+ /// If `root` is `Some(…)` it's used to see `source_file` as relative to itself, if `source_file` is absolute.
+ /// If source is relative and should be treated as base, set `root` to `Some("")`.
+ pub fn from_bytes(bytes: &[u8], source_file: impl Into<PathBuf>, root: Option<&Path>) -> Self {
+ let source = source_file.into();
+ let patterns = T::bytes_to_patterns(bytes, source.as_path());
+
+ let base = root
+ .and_then(|root| source.parent().expect("file").strip_prefix(root).ok())
+ .and_then(|base| {
+ (!base.as_os_str().is_empty()).then(|| {
+ let mut base: BString =
+ gix_path::to_unix_separators_on_windows(gix_path::into_bstr(base)).into_owned();
+
+ base.push_byte(b'/');
+ base
+ })
+ });
+ List {
+ patterns,
+ source: Some(source),
+ base,
+ }
+ }
+
+ /// Create a pattern list from the `source` file, which may be located underneath `root`, while optionally
+ /// following symlinks with `follow_symlinks`, providing `buf` to temporarily store the data contained in the file.
+ pub fn from_file(
+ source: impl Into<PathBuf>,
+ root: Option<&Path>,
+ follow_symlinks: bool,
+ buf: &mut Vec<u8>,
+ ) -> std::io::Result<Option<Self>> {
+ let source = source.into();
+ Ok(read_in_full_ignore_missing(&source, follow_symlinks, buf)?.then(|| Self::from_bytes(buf, source, root)))
+ }
+}
+
+/// Utilities
+impl<T> List<T>
+where
+ T: Pattern,
+{
+ /// If this list is anchored to a base path, return `relative_path` as being relative to our base and return
+ /// an updated `basename_pos` as well if it was set.
+ /// `case` is respected for the comparison.
+ ///
+ /// This is useful to turn repository-relative paths into paths relative to a particular search base.
+ pub fn strip_base_handle_recompute_basename_pos<'a>(
+ &self,
+ relative_path: &'a BStr,
+ basename_pos: Option<usize>,
+ case: Case,
+ ) -> Option<(&'a BStr, Option<usize>)> {
+ match self.base.as_deref() {
+ Some(base) => strip_base_handle_recompute_basename_pos(base.as_bstr(), relative_path, basename_pos, case)?,
+ None => (relative_path, basename_pos),
+ }
+ .into()
+ }
+}
+
+/// Return`relative_path` as being relative to `base` along with an updated `basename_pos` if it was set.
+/// `case` is respected for the comparison.
+///
+/// This is useful to turn repository-relative paths into paths relative to a particular search base.
+pub fn strip_base_handle_recompute_basename_pos<'a>(
+ base: &BStr,
+ relative_path: &'a BStr,
+ basename_pos: Option<usize>,
+ case: Case,
+) -> Option<(&'a BStr, Option<usize>)> {
+ Some((
+ match case {
+ Case::Sensitive => relative_path.strip_prefix(base.as_bytes())?.as_bstr(),
+ Case::Fold => {
+ let rela_dir = relative_path.get(..base.len())?;
+ if !rela_dir.eq_ignore_ascii_case(base) {
+ return None;
+ }
+ &relative_path[base.len()..]
+ }
+ },
+ basename_pos.and_then(|pos| {
+ let pos = pos - base.len();
+ (pos != 0).then_some(pos)
+ }),
+ ))
+}
diff --git a/vendor/gix-glob/src/wildmatch.rs b/vendor/gix-glob/src/wildmatch.rs
index 4b2e33948..5144a9b8b 100644
--- a/vendor/gix-glob/src/wildmatch.rs
+++ b/vendor/gix-glob/src/wildmatch.rs
@@ -1,7 +1,8 @@
use bitflags::bitflags;
bitflags! {
/// The match mode employed in [`Pattern::matches()`][crate::Pattern::matches()].
- #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+ #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+ #[derive(Debug, Default, Copy, Clone, Eq, PartialEq)]
pub struct Mode: u8 {
/// Let globs like `*` and `?` not match the slash `/` literal, which is useful when matching paths.
const NO_MATCH_SLASH_LITERAL = 1 << 0;