diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 02:49:42 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 02:49:42 +0000 |
commit | 837b550238aa671a591ccf282dddeab29cadb206 (patch) | |
tree | 914b6b8862bace72bd3245ca184d374b08d8a672 /vendor/gix-glob/src | |
parent | Adding debian version 1.70.0+dfsg2-1. (diff) | |
download | rustc-837b550238aa671a591ccf282dddeab29cadb206.tar.xz rustc-837b550238aa671a591ccf282dddeab29cadb206.zip |
Merging upstream version 1.71.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/gix-glob/src')
-rw-r--r-- | vendor/gix-glob/src/lib.rs | 4 | ||||
-rw-r--r-- | vendor/gix-glob/src/parse.rs | 43 | ||||
-rw-r--r-- | vendor/gix-glob/src/pattern.rs | 23 | ||||
-rw-r--r-- | vendor/gix-glob/src/search/mod.rs | 42 | ||||
-rw-r--r-- | vendor/gix-glob/src/search/pattern.rs | 152 | ||||
-rw-r--r-- | vendor/gix-glob/src/wildmatch.rs | 3 |
6 files changed, 215 insertions, 52 deletions
diff --git a/vendor/gix-glob/src/lib.rs b/vendor/gix-glob/src/lib.rs index 48d011a52..a753e671a 100644 --- a/vendor/gix-glob/src/lib.rs +++ b/vendor/gix-glob/src/lib.rs @@ -14,7 +14,7 @@ use bstr::BString; /// /// For normal globbing, use [`wildmatch()`] instead. #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] -#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct Pattern { /// the actual pattern bytes pub text: BString, @@ -27,6 +27,8 @@ pub struct Pattern { /// pub mod pattern; +pub mod search; + /// pub mod wildmatch; pub use wildmatch::function::wildmatch; diff --git a/vendor/gix-glob/src/parse.rs b/vendor/gix-glob/src/parse.rs index 3693f88ef..665f459b9 100644 --- a/vendor/gix-glob/src/parse.rs +++ b/vendor/gix-glob/src/parse.rs @@ -1,4 +1,4 @@ -use bstr::{BString, ByteSlice}; +use bstr::ByteSlice; use crate::{pattern, pattern::Mode}; @@ -7,7 +7,7 @@ use crate::{pattern, pattern::Mode}; /// using `pattern::Mode` flags. /// /// Returns `(pattern, mode, no_wildcard_len)` -pub fn pattern(mut pat: &[u8]) -> Option<(BString, pattern::Mode, Option<usize>)> { +pub fn pattern(mut pat: &[u8]) -> Option<(&[u8], pattern::Mode, Option<usize>)> { let mut mode = Mode::empty(); if pat.is_empty() { return None; @@ -28,10 +28,9 @@ pub fn pattern(mut pat: &[u8]) -> Option<(BString, pattern::Mode, Option<usize>) mode |= Mode::ABSOLUTE; pat = &pat[1..]; } - let mut pat = truncate_non_escaped_trailing_spaces(pat); if pat.last() == Some(&b'/') { mode |= Mode::MUST_BE_DIR; - pat.pop(); + pat = &pat[..pat.len() - 1]; } if !pat.contains(&b'/') { @@ -41,7 +40,7 @@ pub fn pattern(mut pat: &[u8]) -> Option<(BString, pattern::Mode, Option<usize>) mode |= Mode::ENDS_WITH; } - let pos_of_first_wildcard = first_wildcard_pos(&pat); + let pos_of_first_wildcard = first_wildcard_pos(pat); Some((pat, mode, pos_of_first_wildcard)) } @@ -50,37 +49,3 @@ fn first_wildcard_pos(pat: &[u8]) -> Option<usize> { } pub(crate) const GLOB_CHARACTERS: &[u8] = br"*?[\"; - -/// We always copy just because that's ultimately needed anyway, not because we always have to. -fn truncate_non_escaped_trailing_spaces(buf: &[u8]) -> BString { - match buf.rfind_not_byteset(br"\ ") { - Some(pos) if pos + 1 == buf.len() => buf.into(), // does not end in (escaped) whitespace - None => buf.into(), - Some(start_of_non_space) => { - // This seems a bit strange but attempts to recreate the git implementation while - // actually removing the escape characters before spaces. We leave other backslashes - // for escapes to be handled by `glob/globset`. - let mut res: BString = buf[..start_of_non_space + 1].into(); - - let mut trailing_bytes = buf[start_of_non_space + 1..].iter(); - let mut bare_spaces = 0; - while let Some(b) = trailing_bytes.next() { - match b { - b' ' => { - bare_spaces += 1; - } - b'\\' => { - res.extend(std::iter::repeat(b' ').take(bare_spaces)); - bare_spaces = 0; - // Skip what follows, like git does, but keep spaces if possible. - if trailing_bytes.next() == Some(&b' ') { - res.push(b' '); - } - } - _ => unreachable!("BUG: this must be either backslash or space"), - } - } - res - } - } -} diff --git a/vendor/gix-glob/src/pattern.rs b/vendor/gix-glob/src/pattern.rs index fa874b226..2aefaa9a0 100644 --- a/vendor/gix-glob/src/pattern.rs +++ b/vendor/gix-glob/src/pattern.rs @@ -12,7 +12,8 @@ bitflags! { /// keep special rules only applicable when matching paths. /// /// The mode is typically created when parsing the pattern by inspecting it and isn't typically handled by the user. - #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] + #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] + #[derive(Debug, PartialEq, Eq, Hash, Copy, Clone, Ord, PartialOrd)] pub struct Mode: u32 { /// The pattern does not contain a sub-directory and - it doesn't contain slashes after removing the trailing one. const NO_SUB_DIR = 1 << 0; @@ -30,30 +31,29 @@ bitflags! { /// Describes whether to match a path case sensitively or not. /// /// Used in [Pattern::matches_repo_relative_path()]. -#[derive(Debug, PartialOrd, PartialEq, Copy, Clone, Hash, Ord, Eq)] +#[derive(Default, Debug, PartialOrd, PartialEq, Copy, Clone, Hash, Ord, Eq)] pub enum Case { /// The case affects the match + #[default] Sensitive, /// Ignore the case of ascii characters. Fold, } -impl Default for Case { - fn default() -> Self { - Case::Sensitive - } -} - +/// Instantiation impl Pattern { /// Parse the given `text` as pattern, or return `None` if `text` was empty. pub fn from_bytes(text: &[u8]) -> Option<Self> { crate::parse::pattern(text).map(|(text, mode, first_wildcard_pos)| Pattern { - text, + text: text.into(), mode, first_wildcard_pos, }) } +} +/// Access +impl Pattern { /// Return true if a match is negated. pub fn is_negative(&self) -> bool { self.mode.contains(Mode::NEGATIVE) @@ -104,8 +104,9 @@ impl Pattern { /// `mode` can identify `value` as path which won't match the slash character, and can match /// strings with cases ignored as well. Note that the case folding performed here is ASCII only. /// - /// Note that this method uses some shortcuts to accelerate simple patterns. - fn matches<'a>(&self, value: impl Into<&'a BStr>, mode: wildmatch::Mode) -> bool { + /// Note that this method uses some shortcuts to accelerate simple patterns, but falls back to + /// [wildmatch()][crate::wildmatch()] if these fail. + pub fn matches<'a>(&self, value: impl Into<&'a BStr>, mode: wildmatch::Mode) -> bool { let value = value.into(); match self.first_wildcard_pos { // "*literal" case, overrides starts-with diff --git a/vendor/gix-glob/src/search/mod.rs b/vendor/gix-glob/src/search/mod.rs new file mode 100644 index 000000000..a31e2af37 --- /dev/null +++ b/vendor/gix-glob/src/search/mod.rs @@ -0,0 +1,42 @@ +//! Utilities for searching matches of paths to patterns. +//! +//! Please note that these are specific to how both excludes and attributes are searched, and this is +//! merely a way to share code among them. +use std::path::{Path, PathBuf}; + +/// +pub mod pattern; + +/// A trait to convert bytes into patterns and their associated value. +/// +/// This is used for `gitattributes` which have a value, and `gitignore` which don't. +pub trait Pattern: Clone + PartialEq + Eq + std::fmt::Debug + std::hash::Hash + Ord + PartialOrd + Default { + /// The value associated with a pattern. + type Value: PartialEq + Eq + std::fmt::Debug + std::hash::Hash + Ord + PartialOrd + Clone; + + /// Parse all patterns in `bytes` line by line, ignoring lines with errors, and collect them. + fn bytes_to_patterns(bytes: &[u8], source: &Path) -> Vec<pattern::Mapping<Self::Value>>; + + /// Returns true if the given pattern may be used for matching. + fn may_use_glob_pattern(pattern: &crate::Pattern) -> bool; +} + +/// Add the given file at `source` if it exists, otherwise do nothing. +/// If a `root` is provided, it's not considered a global file anymore. +/// Returns `true` if the file was added, or `false` if it didn't exist. +pub fn add_patterns_file<T: Pattern>( + patterns: &mut Vec<pattern::List<T>>, + source: impl Into<PathBuf>, + follow_symlinks: bool, + root: Option<&Path>, + buf: &mut Vec<u8>, +) -> std::io::Result<bool> { + let previous_len = patterns.len(); + patterns.extend(pattern::List::<T>::from_file( + source.into(), + root, + follow_symlinks, + buf, + )?); + Ok(patterns.len() != previous_len) +} diff --git a/vendor/gix-glob/src/search/pattern.rs b/vendor/gix-glob/src/search/pattern.rs new file mode 100644 index 000000000..8bb195757 --- /dev/null +++ b/vendor/gix-glob/src/search/pattern.rs @@ -0,0 +1,152 @@ +use std::{ + io::Read, + path::{Path, PathBuf}, +}; + +use bstr::{BStr, BString, ByteSlice, ByteVec}; + +use crate::{pattern::Case, search::Pattern}; + +/// A list of patterns which optionally know where they were loaded from and what their base is. +/// +/// Knowing their base which is relative to a source directory, it will ignore all path to match against +/// that don't also start with said base. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Default)] +pub struct List<T: Pattern> { + /// Patterns and their associated data in the order they were loaded in or specified, + /// the line number in its source file or its sequence number (_`(pattern, value, line_number)`_). + /// + /// During matching, this order is reversed. + pub patterns: Vec<Mapping<T::Value>>, + + /// The path from which the patterns were read, or `None` if the patterns + /// don't originate in a file on disk. + pub source: Option<PathBuf>, + + /// The parent directory of source, or `None` if the patterns are _global_ to match against the repository root. + /// It's processed to contain slashes only and to end with a trailing slash, and is relative to the repository root. + pub base: Option<BString>, +} + +/// An association of a pattern with its value, along with a sequence number providing a sort order in relation to its peers. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +pub struct Mapping<T> { + /// The pattern itself, like `/target/*` + pub pattern: crate::Pattern, + /// The value associated with the pattern. + pub value: T, + /// Typically the line number in the file the pattern was parsed from. + pub sequence_number: usize, +} + +fn read_in_full_ignore_missing(path: &Path, follow_symlinks: bool, buf: &mut Vec<u8>) -> std::io::Result<bool> { + buf.clear(); + let file = if follow_symlinks { + std::fs::File::open(path) + } else { + gix_features::fs::open_options_no_follow().read(true).open(path) + }; + Ok(match file { + Ok(mut file) => { + file.read_to_end(buf)?; + true + } + Err(err) if err.kind() == std::io::ErrorKind::NotFound => false, + Err(err) => return Err(err), + }) +} + +/// Instantiation +impl<T> List<T> +where + T: Pattern, +{ + /// `source_file` is the location of the `bytes` which represents a list of patterns, one pattern per line. + /// If `root` is `Some(…)` it's used to see `source_file` as relative to itself, if `source_file` is absolute. + /// If source is relative and should be treated as base, set `root` to `Some("")`. + pub fn from_bytes(bytes: &[u8], source_file: impl Into<PathBuf>, root: Option<&Path>) -> Self { + let source = source_file.into(); + let patterns = T::bytes_to_patterns(bytes, source.as_path()); + + let base = root + .and_then(|root| source.parent().expect("file").strip_prefix(root).ok()) + .and_then(|base| { + (!base.as_os_str().is_empty()).then(|| { + let mut base: BString = + gix_path::to_unix_separators_on_windows(gix_path::into_bstr(base)).into_owned(); + + base.push_byte(b'/'); + base + }) + }); + List { + patterns, + source: Some(source), + base, + } + } + + /// Create a pattern list from the `source` file, which may be located underneath `root`, while optionally + /// following symlinks with `follow_symlinks`, providing `buf` to temporarily store the data contained in the file. + pub fn from_file( + source: impl Into<PathBuf>, + root: Option<&Path>, + follow_symlinks: bool, + buf: &mut Vec<u8>, + ) -> std::io::Result<Option<Self>> { + let source = source.into(); + Ok(read_in_full_ignore_missing(&source, follow_symlinks, buf)?.then(|| Self::from_bytes(buf, source, root))) + } +} + +/// Utilities +impl<T> List<T> +where + T: Pattern, +{ + /// If this list is anchored to a base path, return `relative_path` as being relative to our base and return + /// an updated `basename_pos` as well if it was set. + /// `case` is respected for the comparison. + /// + /// This is useful to turn repository-relative paths into paths relative to a particular search base. + pub fn strip_base_handle_recompute_basename_pos<'a>( + &self, + relative_path: &'a BStr, + basename_pos: Option<usize>, + case: Case, + ) -> Option<(&'a BStr, Option<usize>)> { + match self.base.as_deref() { + Some(base) => strip_base_handle_recompute_basename_pos(base.as_bstr(), relative_path, basename_pos, case)?, + None => (relative_path, basename_pos), + } + .into() + } +} + +/// Return`relative_path` as being relative to `base` along with an updated `basename_pos` if it was set. +/// `case` is respected for the comparison. +/// +/// This is useful to turn repository-relative paths into paths relative to a particular search base. +pub fn strip_base_handle_recompute_basename_pos<'a>( + base: &BStr, + relative_path: &'a BStr, + basename_pos: Option<usize>, + case: Case, +) -> Option<(&'a BStr, Option<usize>)> { + Some(( + match case { + Case::Sensitive => relative_path.strip_prefix(base.as_bytes())?.as_bstr(), + Case::Fold => { + let rela_dir = relative_path.get(..base.len())?; + if !rela_dir.eq_ignore_ascii_case(base) { + return None; + } + &relative_path[base.len()..] + } + }, + basename_pos.and_then(|pos| { + let pos = pos - base.len(); + (pos != 0).then_some(pos) + }), + )) +} diff --git a/vendor/gix-glob/src/wildmatch.rs b/vendor/gix-glob/src/wildmatch.rs index 4b2e33948..5144a9b8b 100644 --- a/vendor/gix-glob/src/wildmatch.rs +++ b/vendor/gix-glob/src/wildmatch.rs @@ -1,7 +1,8 @@ use bitflags::bitflags; bitflags! { /// The match mode employed in [`Pattern::matches()`][crate::Pattern::matches()]. - #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] + #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] + #[derive(Debug, Default, Copy, Clone, Eq, PartialEq)] pub struct Mode: u8 { /// Let globs like `*` and `?` not match the slash `/` literal, which is useful when matching paths. const NO_MATCH_SLASH_LITERAL = 1 << 0; |