diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 02:49:50 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 02:49:50 +0000 |
commit | 9835e2ae736235810b4ea1c162ca5e65c547e770 (patch) | |
tree | 3fcebf40ed70e581d776a8a4c65923e8ec20e026 /vendor/gix-attributes/src | |
parent | Releasing progress-linux version 1.70.0+dfsg2-1~progress7.99u1. (diff) | |
download | rustc-9835e2ae736235810b4ea1c162ca5e65c547e770.tar.xz rustc-9835e2ae736235810b4ea1c162ca5e65c547e770.zip |
Merging upstream version 1.71.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/gix-attributes/src')
-rw-r--r-- | vendor/gix-attributes/src/assignment.rs | 23 | ||||
-rw-r--r-- | vendor/gix-attributes/src/lib.rs | 93 | ||||
-rw-r--r-- | vendor/gix-attributes/src/match_group.rs | 354 | ||||
-rw-r--r-- | vendor/gix-attributes/src/name.rs | 26 | ||||
-rw-r--r-- | vendor/gix-attributes/src/parse.rs (renamed from vendor/gix-attributes/src/parse/attribute.rs) | 17 | ||||
-rw-r--r-- | vendor/gix-attributes/src/parse/ignore.rs | 36 | ||||
-rw-r--r-- | vendor/gix-attributes/src/parse/mod.rs | 10 | ||||
-rw-r--r-- | vendor/gix-attributes/src/search/attributes.rs | 217 | ||||
-rw-r--r-- | vendor/gix-attributes/src/search/mod.rs | 150 | ||||
-rw-r--r-- | vendor/gix-attributes/src/search/outcome.rs | 365 | ||||
-rw-r--r-- | vendor/gix-attributes/src/search/refmap.rs | 52 | ||||
-rw-r--r-- | vendor/gix-attributes/src/source.rs | 27 | ||||
-rw-r--r-- | vendor/gix-attributes/src/state.rs | 84 |
13 files changed, 986 insertions, 468 deletions
diff --git a/vendor/gix-attributes/src/assignment.rs b/vendor/gix-attributes/src/assignment.rs index e1d7263f7..87689d443 100644 --- a/vendor/gix-attributes/src/assignment.rs +++ b/vendor/gix-attributes/src/assignment.rs @@ -1,4 +1,6 @@ use crate::{Assignment, AssignmentRef, NameRef, StateRef}; +use bstr::ByteSlice; +use std::fmt::Write; impl<'a> AssignmentRef<'a> { pub(crate) fn new(name: NameRef<'a>, state: StateRef<'a>) -> AssignmentRef<'a> { @@ -26,3 +28,24 @@ impl<'a> Assignment { AssignmentRef::new(self.name.as_ref(), self.state.as_ref()) } } + +impl std::fmt::Display for AssignmentRef<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self.state { + StateRef::Set => f.write_str(self.name.as_str()), + StateRef::Unset => { + f.write_char('-')?; + f.write_str(self.name.as_str()) + } + StateRef::Value(v) => { + f.write_str(self.name.as_str())?; + f.write_char('=')?; + f.write_str(v.as_bstr().to_str_lossy().as_ref()) + } + StateRef::Unspecified => { + f.write_char('!')?; + f.write_str(self.name.as_str()) + } + } + } +} diff --git a/vendor/gix-attributes/src/lib.rs b/vendor/gix-attributes/src/lib.rs index 7d95c022f..7eaac4282 100644 --- a/vendor/gix-attributes/src/lib.rs +++ b/vendor/gix-attributes/src/lib.rs @@ -1,4 +1,4 @@ -//! Parse `.gitattribute` and `.gitignore` files and provide utilities to match against them. +//! Parse `.gitattribute` files and provide utilities to match against them. //! //! ## Feature Flags #![cfg_attr( @@ -6,25 +6,26 @@ cfg_attr(doc, doc = ::document_features::document_features!()) )] #![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))] -#![deny(missing_docs, rust_2018_idioms)] -#![forbid(unsafe_code)] +#![deny(missing_docs, rust_2018_idioms, unsafe_code)] -use std::path::PathBuf; - -use bstr::{BStr, BString}; pub use gix_glob as glob; +use kstring::{KString, KStringRef}; mod assignment; /// pub mod name; -mod state; +/// +pub mod state; -mod match_group; -pub use match_group::{Attributes, Ignore, Match, Pattern}; +/// +pub mod search; /// pub mod parse; -/// Parse attribute assignments line by line from `bytes`. + +/// Parse attribute assignments line by line from `bytes`, and fail the operation on error. +/// +/// For leniency, ignore errors using `filter_map(Result::ok)` for example. pub fn parse(bytes: &[u8]) -> parse::Lines<'_> { parse::Lines::new(bytes) } @@ -33,7 +34,7 @@ pub fn parse(bytes: &[u8]) -> parse::Lines<'_> { /// /// Note that this doesn't contain the name. #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] -#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub enum StateRef<'a> { /// The attribute is listed, or has the special value 'true' Set, @@ -41,8 +42,8 @@ pub enum StateRef<'a> { Unset, /// The attribute is set to the given value, which followed the `=` sign. /// Note that values can be empty. - #[cfg_attr(feature = "serde1", serde(borrow))] - Value(&'a BStr), + #[cfg_attr(feature = "serde", serde(borrow))] + Value(state::ValueRef<'a>), /// The attribute isn't mentioned with a given path or is explicitly set to `Unspecified` using the `!` sign. Unspecified, } @@ -51,7 +52,7 @@ pub enum StateRef<'a> { /// /// Note that this doesn't contain the name. #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] -#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub enum State { /// The attribute is listed, or has the special value 'true' Set, @@ -59,23 +60,23 @@ pub enum State { Unset, /// The attribute is set to the given value, which followed the `=` sign. /// Note that values can be empty. - Value(BString), // TODO(performance): Is there a non-utf8 compact_str/KBString crate? See https://github.com/cobalt-org/kstring/issues/37#issuecomment-1446777265 . + Value(state::Value), /// The attribute isn't mentioned with a given path or is explicitly set to `Unspecified` using the `!` sign. Unspecified, } /// Represents a validated attribute name #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] -#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] -pub struct Name(pub(crate) String); // TODO(performance): See if `KBString` or `compact_string` could be meaningful here. +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct Name(pub(crate) KString); /// Holds a validated attribute name as a reference -#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd)] -pub struct NameRef<'a>(&'a str); +#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash, Ord, PartialOrd)] +pub struct NameRef<'a>(KStringRef<'a>); /// Name an attribute and describe it's assigned state. #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] -#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct Assignment { /// The validated name of the attribute. pub name: Name, @@ -84,7 +85,7 @@ pub struct Assignment { } /// Holds validated attribute data as a reference -#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd)] +#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash, Ord, PartialOrd)] pub struct AssignmentRef<'a> { /// The name of the attribute. pub name: NameRef<'a>, @@ -92,46 +93,34 @@ pub struct AssignmentRef<'a> { pub state: StateRef<'a>, } -/// A grouping of lists of patterns while possibly keeping associated to their base path. +/// A grouping of lists of patterns while possibly keeping associated to their base path in order to find matches. /// /// Pattern lists with base path are queryable relative to that base, otherwise they are relative to the repository root. #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Default)] -pub struct MatchGroup<T: Pattern = Attributes> { +pub struct Search { /// A list of pattern lists, each representing a patterns from a file or specified by hand, in the order they were /// specified in. /// - /// During matching, this order is reversed. - pub patterns: Vec<PatternList<T>>, + /// When matching, this order is reversed. + patterns: Vec<gix_glob::search::pattern::List<search::Attributes>>, } -/// A list of patterns which optionally know where they were loaded from and what their base is. +/// A list of known global sources for git attribute files in order of ascending precedence. /// -/// Knowing their base which is relative to a source directory, it will ignore all path to match against -/// that don't also start with said base. -#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Default)] -pub struct PatternList<T: Pattern> { - /// Patterns and their associated data in the order they were loaded in or specified, - /// the line number in its source file or its sequence number (_`(pattern, value, line_number)`_). +/// This means that values from the first variant will be returned first. +#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Ord, PartialOrd)] +pub enum Source { + /// The attribute file that the installation itself ships with. + GitInstallation, + /// System-wide attributes file. This is typically defined as + /// `$(prefix)/etc/gitattributes` (where prefix is the git-installation directory). + System, + /// This is `<xdg-config-home>/git/attributes` and is git application configuration per user. /// - /// During matching, this order is reversed. - pub patterns: Vec<PatternMapping<T::Value>>, - - /// The path from which the patterns were read, or `None` if the patterns - /// don't originate in a file on disk. - pub source: Option<PathBuf>, - - /// The parent directory of source, or `None` if the patterns are _global_ to match against the repository root. - /// It's processed to contain slashes only and to end with a trailing slash, and is relative to the repository root. - pub base: Option<BString>, + /// Note that there is no `~/.gitattributes` file. + Git, + /// The configuration of the repository itself, located in `$GIT_DIR/info/attributes`. + Local, } -/// An association of a pattern with its value, along with a sequence number providing a sort order in relation to its peers. -#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] -pub struct PatternMapping<T> { - /// The pattern itself, like `/target/*` - pub pattern: gix_glob::Pattern, - /// The value associated with the pattern. - pub value: T, - /// Typically the line number in the file the pattern was parsed from. - pub sequence_number: usize, -} +mod source; diff --git a/vendor/gix-attributes/src/match_group.rs b/vendor/gix-attributes/src/match_group.rs deleted file mode 100644 index 018bf2567..000000000 --- a/vendor/gix-attributes/src/match_group.rs +++ /dev/null @@ -1,354 +0,0 @@ -use std::{ - ffi::OsString, - io::Read, - path::{Path, PathBuf}, -}; - -use bstr::{BStr, BString, ByteSlice, ByteVec}; - -use crate::{Assignment, MatchGroup, PatternList, PatternMapping}; - -fn into_owned_assignments<'a>( - attrs: impl Iterator<Item = Result<crate::AssignmentRef<'a>, crate::name::Error>>, -) -> Result<Vec<Assignment>, crate::name::Error> { - attrs.map(|res| res.map(|attr| attr.to_owned())).collect() -} - -/// A trait to convert bytes into patterns and their associated value. -/// -/// This is used for `gitattributes` which have a value, and `gitignore` which don't. -pub trait Pattern: Clone + PartialEq + Eq + std::fmt::Debug + std::hash::Hash + Ord + PartialOrd + Default { - /// The value associated with a pattern. - type Value: PartialEq + Eq + std::fmt::Debug + std::hash::Hash + Ord + PartialOrd + Clone; - - /// Parse all patterns in `bytes` line by line, ignoring lines with errors, and collect them. - fn bytes_to_patterns(bytes: &[u8]) -> Vec<PatternMapping<Self::Value>>; - - /// Returns true if the given pattern may be used for matching. - fn may_use_glob_pattern(pattern: &gix_glob::Pattern) -> bool; -} - -/// An implementation of the [`Pattern`] trait for ignore patterns. -#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Default)] -pub struct Ignore; - -impl Pattern for Ignore { - type Value = (); - - fn bytes_to_patterns(bytes: &[u8]) -> Vec<PatternMapping<Self::Value>> { - crate::parse::ignore(bytes) - .map(|(pattern, line_number)| PatternMapping { - pattern, - value: (), - sequence_number: line_number, - }) - .collect() - } - - fn may_use_glob_pattern(_pattern: &gix_glob::Pattern) -> bool { - true - } -} - -/// A value of an attribute pattern, which is either a macro definition or -#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] -pub enum Value { - MacroAttributes(Vec<Assignment>), - Assignments(Vec<Assignment>), -} - -/// An implementation of the [`Pattern`] trait for attributes. -#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Default)] -pub struct Attributes; - -impl Pattern for Attributes { - type Value = Value; - - fn bytes_to_patterns(bytes: &[u8]) -> Vec<PatternMapping<Self::Value>> { - crate::parse(bytes) - .filter_map(Result::ok) - .filter_map(|(pattern_kind, assignments, line_number)| { - let (pattern, value) = match pattern_kind { - crate::parse::Kind::Macro(macro_name) => ( - gix_glob::Pattern { - text: macro_name.as_str().into(), - mode: gix_glob::pattern::Mode::all(), - first_wildcard_pos: None, - }, - Value::MacroAttributes(into_owned_assignments(assignments).ok()?), - ), - crate::parse::Kind::Pattern(p) => ( - (!p.is_negative()).then_some(p)?, - Value::Assignments(into_owned_assignments(assignments).ok()?), - ), - }; - PatternMapping { - pattern, - value, - sequence_number: line_number, - } - .into() - }) - .collect() - } - - fn may_use_glob_pattern(pattern: &gix_glob::Pattern) -> bool { - pattern.mode != gix_glob::pattern::Mode::all() - } -} - -/// Describes a matching value within a [`MatchGroup`]. -#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] -pub struct Match<'a, T> { - /// The glob pattern itself, like `/target/*`. - pub pattern: &'a gix_glob::Pattern, - /// The value associated with the pattern. - pub value: &'a T, - /// The path to the source from which the pattern was loaded, or `None` if it was specified by other means. - pub source: Option<&'a Path>, - /// The line at which the pattern was found in its `source` file, or the occurrence in which it was provided. - pub sequence_number: usize, -} - -impl<T> MatchGroup<T> -where - T: Pattern, -{ - /// Match `relative_path`, a path relative to the repository containing all patterns, and return the first match if available. - // TODO: better docs - pub fn pattern_matching_relative_path<'a>( - &self, - relative_path: impl Into<&'a BStr>, - is_dir: Option<bool>, - case: gix_glob::pattern::Case, - ) -> Option<Match<'_, T::Value>> { - let relative_path = relative_path.into(); - let basename_pos = relative_path.rfind(b"/").map(|p| p + 1); - self.patterns - .iter() - .rev() - .find_map(|pl| pl.pattern_matching_relative_path(relative_path, basename_pos, is_dir, case)) - } -} - -impl MatchGroup<Ignore> { - /// Given `git_dir`, a `.git` repository, load ignore patterns from `info/exclude` and from `excludes_file` if it - /// is provided. - /// Note that it's not considered an error if the provided `excludes_file` does not exist. - pub fn from_git_dir( - git_dir: impl AsRef<Path>, - excludes_file: Option<PathBuf>, - buf: &mut Vec<u8>, - ) -> std::io::Result<Self> { - let mut group = Self::default(); - - let follow_symlinks = true; - // order matters! More important ones first. - group.patterns.extend( - excludes_file - .map(|file| PatternList::<Ignore>::from_file(file, None, follow_symlinks, buf)) - .transpose()? - .flatten(), - ); - group.patterns.extend(PatternList::<Ignore>::from_file( - git_dir.as_ref().join("info").join("exclude"), - None, - follow_symlinks, - buf, - )?); - Ok(group) - } - - /// See [PatternList::<Ignore>::from_overrides()] for details. - pub fn from_overrides(patterns: impl IntoIterator<Item = impl Into<OsString>>) -> Self { - MatchGroup { - patterns: vec![PatternList::<Ignore>::from_overrides(patterns)], - } - } - - /// Add the given file at `source` if it exists, otherwise do nothing. If a `root` is provided, it's not considered a global file anymore. - /// Returns true if the file was added, or false if it didn't exist. - pub fn add_patterns_file( - &mut self, - source: impl Into<PathBuf>, - follow_symlinks: bool, - root: Option<&Path>, - buf: &mut Vec<u8>, - ) -> std::io::Result<bool> { - let previous_len = self.patterns.len(); - self.patterns.extend(PatternList::<Ignore>::from_file( - source.into(), - root, - follow_symlinks, - buf, - )?); - Ok(self.patterns.len() != previous_len) - } - - /// Add patterns as parsed from `bytes`, providing their `source` path and possibly their `root` path, the path they - /// are relative to. This also means that `source` is contained within `root` if `root` is provided. - pub fn add_patterns_buffer(&mut self, bytes: &[u8], source: impl Into<PathBuf>, root: Option<&Path>) { - self.patterns - .push(PatternList::<Ignore>::from_bytes(bytes, source.into(), root)); - } -} - -fn read_in_full_ignore_missing(path: &Path, follow_symlinks: bool, buf: &mut Vec<u8>) -> std::io::Result<bool> { - buf.clear(); - let file = if follow_symlinks { - std::fs::File::open(path) - } else { - gix_features::fs::open_options_no_follow().read(true).open(path) - }; - Ok(match file { - Ok(mut file) => { - file.read_to_end(buf)?; - true - } - Err(err) if err.kind() == std::io::ErrorKind::NotFound => false, - Err(err) => return Err(err), - }) -} - -impl<T> PatternList<T> -where - T: Pattern, -{ - /// `source` is the location of the `bytes` which represent a list of patterns line by line. - pub fn from_bytes(bytes: &[u8], source: impl Into<PathBuf>, root: Option<&Path>) -> Self { - let source = source.into(); - let patterns = T::bytes_to_patterns(bytes); - - let base = root - .and_then(|root| source.parent().expect("file").strip_prefix(root).ok()) - .and_then(|base| { - (!base.as_os_str().is_empty()).then(|| { - let mut base: BString = - gix_path::to_unix_separators_on_windows(gix_path::into_bstr(base)).into_owned(); - - base.push_byte(b'/'); - base - }) - }); - PatternList { - patterns, - source: Some(source), - base, - } - } - - /// Create a pattern list from the `source` file, which may be located underneath `root`, while optionally - /// following symlinks with `follow_symlinks`, providing `buf` to temporarily store the data contained in the file. - pub fn from_file( - source: impl Into<PathBuf>, - root: Option<&Path>, - follow_symlinks: bool, - buf: &mut Vec<u8>, - ) -> std::io::Result<Option<Self>> { - let source = source.into(); - Ok(read_in_full_ignore_missing(&source, follow_symlinks, buf)?.then(|| Self::from_bytes(buf, source, root))) - } -} - -impl<T> PatternList<T> -where - T: Pattern, -{ - /// Return a match if a pattern matches `relative_path`, providing a pre-computed `basename_pos` which is the - /// starting position of the basename of `relative_path`. `is_dir` is true if `relative_path` is a directory. - /// `case` specifies whether cases should be folded during matching or not. - pub fn pattern_matching_relative_path( - &self, - relative_path: &BStr, - basename_pos: Option<usize>, - is_dir: Option<bool>, - case: gix_glob::pattern::Case, - ) -> Option<Match<'_, T::Value>> { - let (relative_path, basename_start_pos) = - self.strip_base_handle_recompute_basename_pos(relative_path, basename_pos)?; - self.patterns - .iter() - .rev() - .filter(|pm| T::may_use_glob_pattern(&pm.pattern)) - .find_map( - |PatternMapping { - pattern, - value, - sequence_number, - }| { - pattern - .matches_repo_relative_path(relative_path, basename_start_pos, is_dir, case) - .then_some(Match { - pattern, - value, - source: self.source.as_deref(), - sequence_number: *sequence_number, - }) - }, - ) - } - - /// Like [`pattern_matching_relative_path()`][Self::pattern_matching_relative_path()], but returns an index to the pattern - /// that matched `relative_path`, instead of the match itself. - pub fn pattern_idx_matching_relative_path( - &self, - relative_path: &BStr, - basename_pos: Option<usize>, - is_dir: Option<bool>, - case: gix_glob::pattern::Case, - ) -> Option<usize> { - let (relative_path, basename_start_pos) = - self.strip_base_handle_recompute_basename_pos(relative_path, basename_pos)?; - self.patterns - .iter() - .enumerate() - .rev() - .filter(|(_, pm)| T::may_use_glob_pattern(&pm.pattern)) - .find_map(|(idx, pm)| { - pm.pattern - .matches_repo_relative_path(relative_path, basename_start_pos, is_dir, case) - .then_some(idx) - }) - } - - fn strip_base_handle_recompute_basename_pos<'a>( - &self, - relative_path: &'a BStr, - basename_pos: Option<usize>, - ) -> Option<(&'a BStr, Option<usize>)> { - match self.base.as_deref() { - Some(base) => ( - relative_path.strip_prefix(base.as_slice())?.as_bstr(), - basename_pos.and_then(|pos| { - let pos = pos - base.len(); - (pos != 0).then_some(pos) - }), - ), - None => (relative_path, basename_pos), - } - .into() - } -} - -impl PatternList<Ignore> { - /// Parse a list of patterns, using slashes as path separators - pub fn from_overrides(patterns: impl IntoIterator<Item = impl Into<OsString>>) -> Self { - PatternList { - patterns: patterns - .into_iter() - .map(Into::into) - .enumerate() - .filter_map(|(seq_id, pattern)| { - let pattern = gix_path::try_into_bstr(PathBuf::from(pattern)).ok()?; - gix_glob::parse(pattern.as_ref()).map(|p| PatternMapping { - pattern: p, - value: (), - sequence_number: seq_id, - }) - }) - .collect(), - source: None, - base: None, - } - } -} diff --git a/vendor/gix-attributes/src/name.rs b/vendor/gix-attributes/src/name.rs index 03064dbda..40d86fd4c 100644 --- a/vendor/gix-attributes/src/name.rs +++ b/vendor/gix-attributes/src/name.rs @@ -1,4 +1,5 @@ -use bstr::BString; +use bstr::{BStr, BString, ByteSlice}; +use kstring::KStringRef; use crate::{Name, NameRef}; @@ -10,13 +11,32 @@ impl<'a> NameRef<'a> { /// Return the inner `str`. pub fn as_str(&self) -> &str { - self.0 + self.0.as_str() } } impl AsRef<str> for NameRef<'_> { fn as_ref(&self) -> &str { - self.0 + self.0.as_ref() + } +} + +impl<'a> TryFrom<&'a BStr> for NameRef<'a> { + type Error = Error; + + fn try_from(attr: &'a BStr) -> Result<Self, Self::Error> { + fn attr_valid(attr: &BStr) -> bool { + if attr.first() == Some(&b'-') { + return false; + } + + attr.bytes() + .all(|b| matches!(b, b'-' | b'.' | b'_' | b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9')) + } + + attr_valid(attr) + .then(|| NameRef(KStringRef::from_ref(attr.to_str().expect("no illformed utf8")))) + .ok_or_else(|| Error { attribute: attr.into() }) } } diff --git a/vendor/gix-attributes/src/parse/attribute.rs b/vendor/gix-attributes/src/parse.rs index 9e4b4c66e..0b70cb306 100644 --- a/vendor/gix-attributes/src/parse/attribute.rs +++ b/vendor/gix-attributes/src/parse.rs @@ -1,12 +1,13 @@ use std::borrow::Cow; use bstr::{BStr, ByteSlice}; +use kstring::KStringRef; use crate::{name, AssignmentRef, Name, NameRef, StateRef}; /// The kind of attribute that was parsed. #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] -#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub enum Kind { /// A pattern to match paths against Pattern(gix_glob::Pattern), @@ -58,12 +59,7 @@ impl<'a> Iter<'a> { } else if attr.first() == Some(&b'!') { (&attr[1..], StateRef::Unspecified) } else { - ( - attr, - possibly_value - .map(|v| StateRef::Value(v.as_bstr())) - .unwrap_or(StateRef::Set), - ) + (attr, possibly_value.map(StateRef::from_bytes).unwrap_or(StateRef::Set)) }; Ok(AssignmentRef::new(check_attr(attr)?, state)) } @@ -80,7 +76,7 @@ fn check_attr(attr: &BStr) -> Result<NameRef<'_>, name::Error> { } attr_valid(attr) - .then(|| NameRef(attr.to_str().expect("no illformed utf8"))) + .then(|| NameRef(KStringRef::from_ref(attr.to_str().expect("no illformed utf8")))) .ok_or_else(|| name::Error { attribute: attr.into() }) } @@ -93,6 +89,7 @@ impl<'a> Iterator for Iter<'a> { } } +/// Instantiation impl<'a> Lines<'a> { /// Create a new instance to parse all attributes in all lines of the input `bytes`. pub fn new(bytes: &'a [u8]) -> Self { @@ -145,11 +142,11 @@ fn parse_line(line: &BStr, line_number: usize) -> Option<Result<(Kind, Iter<'_>, let kind_res = match line.strip_prefix(b"[attr]") { Some(macro_name) => check_attr(macro_name.into()) - .map(|name| Kind::Macro(name.to_owned())) .map_err(|err| Error::MacroName { line_number, macro_name: err.attribute, - }), + }) + .map(|name| Kind::Macro(name.to_owned())), None => { let pattern = gix_glob::Pattern::from_bytes(line.as_ref())?; if pattern.mode.contains(gix_glob::pattern::Mode::NEGATIVE) { diff --git a/vendor/gix-attributes/src/parse/ignore.rs b/vendor/gix-attributes/src/parse/ignore.rs deleted file mode 100644 index a27ee0285..000000000 --- a/vendor/gix-attributes/src/parse/ignore.rs +++ /dev/null @@ -1,36 +0,0 @@ -use bstr::ByteSlice; - -/// An iterator over line-wise ignore patterns parsed from a buffer. -pub struct Lines<'a> { - lines: bstr::Lines<'a>, - line_no: usize, -} - -impl<'a> Lines<'a> { - /// Create a new instance from `buf` to parse ignore patterns from. - pub fn new(buf: &'a [u8]) -> Self { - let bom = unicode_bom::Bom::from(buf); - Lines { - lines: buf[bom.len()..].lines(), - line_no: 0, - } - } -} - -impl<'a> Iterator for Lines<'a> { - type Item = (gix_glob::Pattern, usize); - - fn next(&mut self) -> Option<Self::Item> { - for line in self.lines.by_ref() { - self.line_no += 1; - if line.first() == Some(&b'#') { - continue; - } - match gix_glob::Pattern::from_bytes(line) { - None => continue, - Some(pattern) => return Some((pattern, self.line_no)), - } - } - None - } -} diff --git a/vendor/gix-attributes/src/parse/mod.rs b/vendor/gix-attributes/src/parse/mod.rs deleted file mode 100644 index 82cacc8ed..000000000 --- a/vendor/gix-attributes/src/parse/mod.rs +++ /dev/null @@ -1,10 +0,0 @@ -/// -pub mod ignore; - -mod attribute; -pub use attribute::{Error, Iter, Kind, Lines}; - -/// Parse git ignore patterns, line by line, from `bytes`. -pub fn ignore(bytes: &[u8]) -> ignore::Lines<'_> { - ignore::Lines::new(bytes) -} diff --git a/vendor/gix-attributes/src/search/attributes.rs b/vendor/gix-attributes/src/search/attributes.rs new file mode 100644 index 000000000..a34ae8b3e --- /dev/null +++ b/vendor/gix-attributes/src/search/attributes.rs @@ -0,0 +1,217 @@ +use std::path::{Path, PathBuf}; + +use bstr::{BStr, ByteSlice}; +use gix_glob::search::{pattern, Pattern}; + +use super::Attributes; +use crate::{ + search::{Assignments, MetadataCollection, Outcome, TrackedAssignment, Value}, + Search, +}; + +/// Instantiation and initialization. +impl Search { + /// Create a search instance preloaded with *built-ins* followed by attribute `files` from various global locations. + /// + /// See [`Source`][crate::Source] for a way to obtain these paths. + /// + /// Note that parsing is lenient and errors are logged. + /// + /// * `buf` is used to read `files` from disk which will be ignored if they do not exist. + /// * `collection` will be updated with information necessary to perform lookups later. + pub fn new_globals( + files: impl IntoIterator<Item = impl Into<PathBuf>>, + buf: &mut Vec<u8>, + collection: &mut MetadataCollection, + ) -> std::io::Result<Self> { + let mut group = Self::default(); + group.add_patterns_buffer(b"[attr]binary -diff -merge -text", "[builtin]", None, collection); + + for path in files.into_iter() { + group.add_patterns_file(path, true, None, buf, collection)?; + } + Ok(group) + } +} + +/// Mutation +impl Search { + /// Add the given file at `source` to our patterns if it exists, otherwise do nothing. + /// Update `collection` with newly added attribute names. + /// If a `root` is provided, it's not considered a global file anymore. + /// Returns `true` if the file was added, or `false` if it didn't exist. + pub fn add_patterns_file( + &mut self, + source: impl Into<PathBuf>, + follow_symlinks: bool, + root: Option<&Path>, + buf: &mut Vec<u8>, + collection: &mut MetadataCollection, + ) -> std::io::Result<bool> { + let was_added = gix_glob::search::add_patterns_file(&mut self.patterns, source, follow_symlinks, root, buf)?; + if was_added { + collection.update_from_list(self.patterns.last_mut().expect("just added")); + } + Ok(was_added) + } + /// Add patterns as parsed from `bytes`, providing their `source` path and possibly their `root` path, the path they + /// are relative to. This also means that `source` is contained within `root` if `root` is provided. + pub fn add_patterns_buffer( + &mut self, + bytes: &[u8], + source: impl Into<PathBuf>, + root: Option<&Path>, + collection: &mut MetadataCollection, + ) { + self.patterns.push(pattern::List::from_bytes(bytes, source, root)); + collection.update_from_list(self.patterns.last_mut().expect("just added")); + } + + /// Pop the last attribute patterns list from our queue. + pub fn pop_pattern_list(&mut self) -> Option<gix_glob::search::pattern::List<Attributes>> { + self.patterns.pop() + } +} + +/// Access and matching +impl Search { + /// Match `relative_path`, a path relative to the repository, while respective `case`-sensitivity and write them to `out` + /// Return `true` if at least one pattern matched. + pub fn pattern_matching_relative_path<'a, 'b>( + &'a self, + relative_path: impl Into<&'b BStr>, + case: gix_glob::pattern::Case, + out: &mut Outcome, + ) -> bool { + let relative_path = relative_path.into(); + let basename_pos = relative_path.rfind(b"/").map(|p| p + 1); + let mut has_match = false; + self.patterns.iter().rev().any(|pl| { + has_match |= pattern_matching_relative_path(pl, relative_path, basename_pos, case, out); + out.is_done() + }); + has_match + } + + /// Return the amount of pattern lists contained in this instance. + pub fn num_pattern_lists(&self) -> usize { + self.patterns.len() + } +} + +impl Pattern for Attributes { + type Value = Value; + + fn bytes_to_patterns(bytes: &[u8], source: &std::path::Path) -> Vec<pattern::Mapping<Self::Value>> { + fn into_owned_assignments<'a>( + attrs: impl Iterator<Item = Result<crate::AssignmentRef<'a>, crate::name::Error>>, + ) -> Option<Assignments> { + let res = attrs + .map(|res| { + res.map(|a| TrackedAssignment { + id: Default::default(), + inner: a.to_owned(), + }) + }) + .collect::<Result<Assignments, _>>(); + match res { + Ok(res) => Some(res), + Err(err) => { + log::warn!("{}", err); + None + } + } + } + + crate::parse(bytes) + .filter_map(|res| match res { + Ok(pattern) => Some(pattern), + Err(err) => { + log::warn!("{}: {}", source.display(), err); + None + } + }) + .filter_map(|(pattern_kind, assignments, line_number)| { + let (pattern, value) = match pattern_kind { + crate::parse::Kind::Macro(macro_name) => ( + gix_glob::Pattern { + text: macro_name.as_str().into(), + mode: macro_mode(), + first_wildcard_pos: None, + }, + Value::MacroAssignments { + id: Default::default(), + assignments: into_owned_assignments(assignments)?, + }, + ), + crate::parse::Kind::Pattern(p) => ( + (!p.is_negative()).then_some(p)?, + Value::Assignments(into_owned_assignments(assignments)?), + ), + }; + pattern::Mapping { + pattern, + value, + sequence_number: line_number, + } + .into() + }) + .collect() + } + + fn may_use_glob_pattern(pattern: &gix_glob::Pattern) -> bool { + pattern.mode != macro_mode() + } +} + +fn macro_mode() -> gix_glob::pattern::Mode { + gix_glob::pattern::Mode::all() +} + +/// Append all matches of patterns matching `relative_path` to `out`, +/// providing a pre-computed `basename_pos` which is the starting position of the basename of `relative_path`. +/// `case` specifies whether cases should be folded during matching or not. +/// `is_dir` is true if `relative_path` is a directory. +/// Return `true` if at least one pattern matched. +#[allow(unused_variables)] +fn pattern_matching_relative_path( + list: &gix_glob::search::pattern::List<Attributes>, + relative_path: &BStr, + basename_pos: Option<usize>, + case: gix_glob::pattern::Case, + out: &mut Outcome, +) -> bool { + let (relative_path, basename_start_pos) = + match list.strip_base_handle_recompute_basename_pos(relative_path, basename_pos, case) { + Some(r) => r, + None => return false, + }; + let cur_len = out.remaining(); + 'outer: for pattern::Mapping { + pattern, + value, + sequence_number, + } in list + .patterns + .iter() + .rev() + .filter(|pm| Attributes::may_use_glob_pattern(&pm.pattern)) + { + let value: &Value = value; + let attrs = match value { + Value::MacroAssignments { .. } => { + unreachable!("we can't match on macros as they have no pattern") + } + Value::Assignments(attrs) => attrs, + }; + if out.has_unspecified_attributes(attrs.iter().map(|attr| attr.id)) + && pattern.matches_repo_relative_path(relative_path, basename_start_pos, None, case) + { + let all_filled = out.fill_attributes(attrs.iter(), pattern, list.source.as_ref(), *sequence_number); + if all_filled { + break 'outer; + } + } + } + cur_len != out.remaining() +} diff --git a/vendor/gix-attributes/src/search/mod.rs b/vendor/gix-attributes/src/search/mod.rs new file mode 100644 index 000000000..e70c3b8b1 --- /dev/null +++ b/vendor/gix-attributes/src/search/mod.rs @@ -0,0 +1,150 @@ +use std::collections::HashMap; + +use kstring::KString; +use smallvec::SmallVec; + +use crate::{Assignment, AssignmentRef}; + +mod attributes; +mod outcome; +mod refmap; +pub(crate) use refmap::RefMap; + +/// A typically sized list of attributes. +pub type Assignments = SmallVec<[TrackedAssignment; AVERAGE_NUM_ATTRS]>; + +/// A value of a [pattern mapping][gix_glob::search::pattern::Mapping], +/// which is either a macro definition or a set of attributes. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +pub enum Value { + /// A macro, whose name resolves to the contained assignments. Note that the name is the pattern of the mapping itself. + MacroAssignments { + /// The id of the macro itself, which is both an attribute as well as a set of additional attributes into which the macro + /// resolves + id: AttributeId, + /// The attributes or assignments that the macro resolves to. + assignments: Assignments, + }, + /// A set of assignments which are the attributes themselves. + Assignments(Assignments), +} + +/// A way to have an assignment (`attr=value`) but also associated it with an id that allows perfect mapping +/// to tracking information. +/// Note that the order is produced after the files are parsed as global ordering is needed that goes beyond the scope of a +/// single `Search` instance. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +pub struct TrackedAssignment { + /// The order of the assignment. + pub id: AttributeId, + /// The actual assignment information. + pub inner: Assignment, +} + +/// An implementation of the [`Pattern`][gix_glob::search::Pattern] trait for attributes. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Default)] +pub struct Attributes; + +/// Describes a matching pattern with +#[derive(Clone, PartialEq, Eq, Debug, Hash, Ord, PartialOrd)] +pub struct Match<'a> { + /// The glob pattern itself, like `/target/*`. + pub pattern: &'a gix_glob::Pattern, + /// The key=value pair of the attribute that matched at the pattern. There can be multiple matches per pattern. + pub assignment: AssignmentRef<'a>, + /// Additional information about the kind of match. + pub kind: MatchKind, + /// Information about the location of the match. + pub location: MatchLocation<'a>, +} + +/// Describes in which what file and line the match was found. +#[derive(Clone, PartialEq, Eq, Debug, Hash, Ord, PartialOrd)] +pub struct MatchLocation<'a> { + /// The path to the source from which the pattern was loaded, or `None` if it was specified by other means. + pub source: Option<&'a std::path::Path>, + /// The line at which the pattern was found in its `source` file, or the occurrence in which it was provided. + pub sequence_number: usize, +} + +/// The kind of attribute within the context of a [match][Match]. +#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash, Ord, PartialOrd)] +pub enum MatchKind { + /// A attribute. + Attribute { + /// The location of the macro which referred to it the list with all in-order attributes and macros, or `None` if + /// this is attribute wasn't resolved. + /// + /// Use [`Outcome::match_by_id()`] to retrieve the macro. + macro_id: Option<AttributeId>, + }, + /// The attribute is a macro, which will resolve into one or more attributes or macros. + Macro { + /// The location of the parent macro which referred to this one in the list with all in-order attributes and macros, + /// or `None` if this is macro wasn't resolved by another one. + /// + /// Use [`Outcome::match_by_id()`] to retrieve the parent. + parent_macro_id: Option<AttributeId>, + }, +} + +/// The result of a search, containing all matching attributes. +#[derive(Default)] +pub struct Outcome { + /// The list of all available attributes, by ascending order. Each slots index corresponds to an attribute with that order, i.e. + /// `arr[attr.id] = <attr info>`. + /// + /// This list needs to be up-to-date with the search group so all possible attribute names are known. + matches_by_id: Vec<Slot>, + /// A stack of attributes to use for processing attributes of matched patterns and for resolving their macros. + attrs_stack: SmallVec<[(AttributeId, Assignment, Option<AttributeId>); 8]>, + /// A set of attributes we should limit ourselves to, or empty if we should fill in all attributes, made of + selected: SmallVec<[(KString, Option<AttributeId>); AVERAGE_NUM_ATTRS]>, + /// storage for all patterns we have matched so far (in order to avoid referencing them, we copy them, but only once). + patterns: RefMap<gix_glob::Pattern>, + /// storage for all assignments we have matched so far (in order to avoid referencing them, we copy them, but only once). + assignments: RefMap<Assignment>, + /// storage for all source paths we have matched so far (in order to avoid referencing them, we copy them, but only once). + source_paths: RefMap<std::path::PathBuf>, + /// The amount of attributes that still need to be set, or `None` if this outcome is consumed which means it + /// needs to be re-initialized. + remaining: Option<usize>, +} + +#[derive(Default, Clone)] +struct Slot { + r#match: Option<outcome::Match>, + /// A list of all assignments, being an empty list for non-macro attributes, or all assignments (with order) for macros. + /// It's used to resolve macros. + macro_attributes: Assignments, +} + +/// A type to denote an id of an attribute assignment for uniquely identifying each attribute or assignment. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] +pub struct AttributeId(pub usize); + +impl Default for AttributeId { + fn default() -> Self { + AttributeId(usize::MAX) + } +} + +/// A utility type to collect metadata for each attribute, unified by its name. +#[derive(Clone, Debug, Default)] +pub struct MetadataCollection { + /// A mapping of an attribute or macro name to its order, that is the time when it was *first* seen. + /// + /// This is the inverse of the order attributes are searched. + name_to_meta: HashMap<KString, Metadata>, +} + +/// Metadata associated with an attribute or macro name. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +pub struct Metadata { + /// The id to uniquely identify an attribute in the [MetadataCollection]. + pub id: AttributeId, + /// If non-zero in length, this entry belongs to a macro which resolves to these attribute names. + pub macro_attributes: Assignments, +} + +const AVERAGE_NUM_ATTRS: usize = 3; diff --git a/vendor/gix-attributes/src/search/outcome.rs b/vendor/gix-attributes/src/search/outcome.rs new file mode 100644 index 000000000..5d5a26b44 --- /dev/null +++ b/vendor/gix-attributes/src/search/outcome.rs @@ -0,0 +1,365 @@ +use bstr::{BString, ByteSlice}; +use gix_glob::Pattern; +use kstring::{KString, KStringRef}; + +use crate::search::refmap::RefMapKey; +use crate::{ + search::{ + Assignments, AttributeId, Attributes, MatchKind, Metadata, MetadataCollection, Outcome, TrackedAssignment, + Value, + }, + AssignmentRef, NameRef, StateRef, +}; + +/// Initialization +impl Outcome { + /// Initialize this instance to collect outcomes for all names in `collection`, which represents all possible attributes + /// or macros we may visit, and [`reset`][Self::reset()] it unconditionally. + /// + /// This must be called after each time `collection` changes. + pub fn initialize(&mut self, collection: &MetadataCollection) { + if self.matches_by_id.len() != collection.name_to_meta.len() { + let global_num_attrs = collection.name_to_meta.len(); + + self.matches_by_id.resize(global_num_attrs, Default::default()); + + // NOTE: This works only under the assumption that macros remain defined. + for (order, macro_attributes) in collection.iter().filter_map(|(_, meta)| { + (!meta.macro_attributes.is_empty()).then_some((meta.id.0, &meta.macro_attributes)) + }) { + self.matches_by_id[order].macro_attributes = macro_attributes.clone() + } + } + self.reset(); + } + + /// Like [`initialize()`][Self::initialize()], but limits the set of attributes to look for and fill in + /// to `attribute_names`. + /// Users of this instance should prefer to limit their search as this would allow it to finish earlier. + /// + /// Note that `attribute_names` aren't validated to be valid names here, as invalid names definitely will always be unspecified. + pub fn initialize_with_selection<'a>( + &mut self, + collection: &MetadataCollection, + attribute_names: impl IntoIterator<Item = impl Into<KStringRef<'a>>>, + ) { + self.initialize(collection); + + self.selected.clear(); + self.selected.extend(attribute_names.into_iter().map(|name| { + let name = name.into(); + ( + name.to_owned(), + collection.name_to_meta.get(name.as_str()).map(|meta| meta.id), + ) + })); + self.reset_remaining(); + } + + /// Prepare for a new search over the known set of attributes by resetting our state. + pub fn reset(&mut self) { + self.matches_by_id.iter_mut().for_each(|item| item.r#match = None); + self.attrs_stack.clear(); + self.reset_remaining(); + } + + fn reset_remaining(&mut self) { + self.remaining = Some(if self.selected.is_empty() { + self.matches_by_id.len() + } else { + self.selected.iter().filter(|(_name, id)| id.is_some()).count() + }); + } +} + +/// Access +impl Outcome { + /// Return an iterator over all filled attributes we were initialized with. + /// + /// ### Note + /// + /// If [`initialize_with_selection`][Self::initialize_with_selection()] was used, + /// use [`iter_selected()`][Self::iter_selected()] instead. + /// + /// ### Deviation + /// + /// It's possible that the order in which the attribute are returned (if not limited to a set of attributes) isn't exactly + /// the same as what `git` provides. + /// Ours is in order of declaration, whereas `git` seems to list macros first somehow. Since the values are the same, this + /// shouldn't be an issue. + pub fn iter(&self) -> impl Iterator<Item = crate::search::Match<'_>> { + self.matches_by_id + .iter() + .filter_map(|item| item.r#match.as_ref().map(|m| m.to_outer(self))) + } + + /// Iterate over all matches of the attribute selection in their original order. + /// + /// This only yields values if this instance was initialized with [`Outcome::initialize_with_selection()`]. + pub fn iter_selected(&self) -> impl Iterator<Item = crate::search::Match<'_>> { + static DUMMY: Pattern = Pattern { + text: BString::new(Vec::new()), + mode: gix_glob::pattern::Mode::empty(), + first_wildcard_pos: None, + }; + self.selected.iter().map(|(name, id)| { + id.and_then(|id| self.matches_by_id[id.0].r#match.as_ref().map(|m| m.to_outer(self))) + .unwrap_or_else(|| crate::search::Match { + pattern: &DUMMY, + assignment: AssignmentRef { + name: NameRef::try_from(name.as_bytes().as_bstr()) + .unwrap_or_else(|_| NameRef("invalid".into())), + state: StateRef::Unspecified, + }, + kind: MatchKind::Attribute { macro_id: None }, + location: crate::search::MatchLocation { + source: None, + sequence_number: 0, + }, + }) + }) + } + + /// Obtain a match by the order of its attribute, if the order exists in our initialized attribute list and there was a match. + pub fn match_by_id(&self, id: AttributeId) -> Option<crate::search::Match<'_>> { + self.matches_by_id + .get(id.0) + .and_then(|m| m.r#match.as_ref().map(|m| m.to_outer(self))) + } + + /// Return `true` if there is nothing more to be done as all attributes were filled. + pub fn is_done(&self) -> bool { + self.remaining() == 0 + } +} + +/// Mutation +impl Outcome { + /// Fill all `attrs` and resolve them recursively if they are macros. Return `true` if there is no attribute left to be resolved and + /// we are totally done. + /// `pattern` is what matched a patch and is passed for contextual information, + /// providing `sequence_number` and `source` as well. + pub(crate) fn fill_attributes<'a>( + &mut self, + attrs: impl Iterator<Item = &'a TrackedAssignment>, + pattern: &gix_glob::Pattern, + source: Option<&std::path::PathBuf>, + sequence_number: usize, + ) -> bool { + self.attrs_stack.extend(attrs.filter_map(|attr| { + self.matches_by_id[attr.id.0] + .r#match + .is_none() + .then(|| (attr.id, attr.inner.clone(), None)) + })); + while let Some((id, assignment, parent_order)) = self.attrs_stack.pop() { + let slot = &mut self.matches_by_id[id.0]; + if slot.r#match.is_some() { + continue; + } + // Let's be explicit - this is only non-empty for macros. + let is_macro = !slot.macro_attributes.is_empty(); + + slot.r#match = Some(Match { + pattern: self.patterns.insert(pattern), + assignment: self.assignments.insert_owned(assignment), + kind: if is_macro { + MatchKind::Macro { + parent_macro_id: parent_order, + } + } else { + MatchKind::Attribute { macro_id: parent_order } + }, + location: MatchLocation { + source: source.map(|path| self.source_paths.insert(path)), + sequence_number, + }, + }); + if self.reduce_and_check_if_done(id) { + return true; + } + + if is_macro { + // TODO(borrowchk): one fine day we should be able to re-borrow `slot` without having to redo the array access. + let slot = &self.matches_by_id[id.0]; + self.attrs_stack.extend(slot.macro_attributes.iter().filter_map(|attr| { + self.matches_by_id[attr.id.0] + .r#match + .is_none() + .then(|| (attr.id, attr.inner.clone(), Some(id))) + })); + } + } + false + } +} + +impl Outcome { + /// Given a list of `attrs` by order, return true if at least one of them is not set + pub(crate) fn has_unspecified_attributes(&self, mut attrs: impl Iterator<Item = AttributeId>) -> bool { + attrs.any(|order| self.matches_by_id[order.0].r#match.is_none()) + } + /// Return the amount of attributes haven't yet been found. + /// + /// If this number reaches 0, then the search can be stopped as there is nothing more to fill in. + pub(crate) fn remaining(&self) -> usize { + self.remaining + .expect("BUG: instance must be initialized for each search set") + } + + fn reduce_and_check_if_done(&mut self, attr: AttributeId) -> bool { + if self.selected.is_empty() + || self + .selected + .iter() + .any(|(_name, id)| id.map_or(false, |id| id == attr)) + { + *self.remaining.as_mut().expect("initialized") -= 1; + } + self.is_done() + } +} + +/// Mutation +impl MetadataCollection { + /// Assign order ids to each attribute either in macros (along with macros themselves) or attributes of patterns, and store + /// them in this collection. + /// + /// Must be called before querying matches. + pub fn update_from_list(&mut self, list: &mut gix_glob::search::pattern::List<Attributes>) { + for pattern in &mut list.patterns { + match &mut pattern.value { + Value::MacroAssignments { id: order, assignments } => { + *order = self.id_for_macro( + pattern + .pattern + .text + .to_str() + .expect("valid macro names are always UTF8 and this was verified"), + assignments, + ); + } + Value::Assignments(assignments) => { + self.assign_order_to_attributes(assignments); + } + } + } + } +} + +/// Access +impl MetadataCollection { + /// Return an iterator over the contents of the map in an easy-to-consume form. + pub fn iter(&self) -> impl Iterator<Item = (&str, &Metadata)> { + self.name_to_meta.iter().map(|(k, v)| (k.as_str(), v)) + } +} + +impl MetadataCollection { + pub(crate) fn id_for_macro(&mut self, name: &str, attrs: &mut Assignments) -> AttributeId { + let order = match self.name_to_meta.get_mut(name) { + Some(meta) => meta.id, + None => { + let order = AttributeId(self.name_to_meta.len()); + self.name_to_meta.insert( + KString::from_ref(name), + Metadata { + id: order, + macro_attributes: Default::default(), + }, + ); + order + } + }; + + self.assign_order_to_attributes(attrs); + self.name_to_meta.get_mut(name).expect("just added").macro_attributes = attrs.clone(); + + order + } + pub(crate) fn id_for_attribute(&mut self, name: &str) -> AttributeId { + match self.name_to_meta.get(name) { + Some(meta) => meta.id, + None => { + let order = AttributeId(self.name_to_meta.len()); + self.name_to_meta.insert(KString::from_ref(name), order.into()); + order + } + } + } + pub(crate) fn assign_order_to_attributes(&mut self, attributes: &mut [TrackedAssignment]) { + for TrackedAssignment { + id: order, + inner: crate::Assignment { name, .. }, + } in attributes + { + *order = self.id_for_attribute(&name.0); + } + } +} + +impl From<AttributeId> for Metadata { + fn from(order: AttributeId) -> Self { + Metadata { + id: order, + macro_attributes: Default::default(), + } + } +} + +impl MatchKind { + /// return the id of the macro that resolved us, or `None` if that didn't happen. + pub fn source_id(&self) -> Option<AttributeId> { + match self { + MatchKind::Attribute { macro_id: id } | MatchKind::Macro { parent_macro_id: id } => *id, + } + } +} + +/// A version of `Match` without references. +#[derive(Clone, PartialEq, Eq, Debug, Hash, Ord, PartialOrd)] +pub struct Match { + /// The glob pattern itself, like `/target/*`. + pub pattern: RefMapKey, + /// The key=value pair of the attribute that matched at the pattern. There can be multiple matches per pattern. + pub assignment: RefMapKey, + /// Additional information about the kind of match. + pub kind: MatchKind, + /// Information about the location of the match. + pub location: MatchLocation, +} + +impl Match { + fn to_outer<'a>(&self, out: &'a Outcome) -> crate::search::Match<'a> { + crate::search::Match { + pattern: out.patterns.resolve(self.pattern).expect("pattern still present"), + assignment: out + .assignments + .resolve(self.assignment) + .expect("assignment present") + .as_ref(), + kind: self.kind, + location: self.location.to_outer(out), + } + } +} + +/// A version of `MatchLocation` without references. +#[derive(Clone, PartialEq, Eq, Debug, Hash, Ord, PartialOrd)] +pub struct MatchLocation { + /// The path to the source from which the pattern was loaded, or `None` if it was specified by other means. + pub source: Option<RefMapKey>, + /// The line at which the pattern was found in its `source` file, or the occurrence in which it was provided. + pub sequence_number: usize, +} + +impl MatchLocation { + fn to_outer<'a>(&self, out: &'a Outcome) -> crate::search::MatchLocation<'a> { + crate::search::MatchLocation { + source: self + .source + .and_then(|source| out.source_paths.resolve(source).map(|p| p.as_path())), + sequence_number: self.sequence_number, + } + } +} diff --git a/vendor/gix-attributes/src/search/refmap.rs b/vendor/gix-attributes/src/search/refmap.rs new file mode 100644 index 000000000..3dc51265c --- /dev/null +++ b/vendor/gix-attributes/src/search/refmap.rs @@ -0,0 +1,52 @@ +//! A utility to store objects by identity, which deduplicates them while avoiding lifetimes. +//! +//! We chose to use hashing/identity over pointers as it's possible that different objects end up in the same memory location, +//! which would create obscure bugs. The same could happen with hash collisions, but they these are designed to be less likely. +use std::collections::btree_map::Entry; +use std::collections::hash_map::DefaultHasher; +use std::collections::BTreeMap; +use std::hash::{Hash, Hasher}; + +pub(crate) type RefMapKey = u64; +pub(crate) struct RefMap<T>(BTreeMap<RefMapKey, T>); + +impl<T> Default for RefMap<T> { + fn default() -> Self { + RefMap(Default::default()) + } +} + +impl<T> RefMap<T> +where + T: Hash + Clone, +{ + pub(crate) fn insert(&mut self, value: &T) -> RefMapKey { + let mut s = DefaultHasher::new(); + value.hash(&mut s); + let key = s.finish(); + match self.0.entry(key) { + Entry::Vacant(e) => { + e.insert(value.clone()); + key + } + Entry::Occupied(_) => key, + } + } + + pub(crate) fn insert_owned(&mut self, value: T) -> RefMapKey { + let mut s = DefaultHasher::new(); + value.hash(&mut s); + let key = s.finish(); + match self.0.entry(key) { + Entry::Vacant(e) => { + e.insert(value); + key + } + Entry::Occupied(_) => key, + } + } + + pub(crate) fn resolve(&self, key: RefMapKey) -> Option<&T> { + self.0.get(&key) + } +} diff --git a/vendor/gix-attributes/src/source.rs b/vendor/gix-attributes/src/source.rs new file mode 100644 index 000000000..4977c4739 --- /dev/null +++ b/vendor/gix-attributes/src/source.rs @@ -0,0 +1,27 @@ +use std::{borrow::Cow, ffi::OsString, path::Path}; + +use crate::Source; + +impl Source { + /// Produce a storage location for the this source while potentially querying environment variables using `env_var(<name>)`, + /// or `None` if the storage location could not be obtained. + /// + /// Note that local sources are returned as relative paths to be joined with the base in a separate step. + pub fn storage_location(self, env_var: &mut dyn FnMut(&str) -> Option<OsString>) -> Option<Cow<'static, Path>> { + use Source::*; + Some(match self { + GitInstallation => gix_path::env::installation_config_prefix()? + .join("gitattributes") + .into(), + System => { + if env_var("GIT_ATTR_NOSYSTEM").is_some() { + return None; + } else { + gix_path::env::system_prefix()?.join("etc/gitattributes").into() + } + } + Git => return gix_path::env::xdg_config("attributes", env_var).map(Cow::Owned), + Local => Cow::Borrowed(Path::new("info/attributes")), + }) + } +} diff --git a/vendor/gix-attributes/src/state.rs b/vendor/gix-attributes/src/state.rs index 02dc8ee0d..27ce2a247 100644 --- a/vendor/gix-attributes/src/state.rs +++ b/vendor/gix-attributes/src/state.rs @@ -1,7 +1,85 @@ -use bstr::ByteSlice; +use bstr::{BStr, ByteSlice}; +use kstring::{KString, KStringRef}; use crate::{State, StateRef}; +/// A container to encapsulate a tightly packed and typically unallocated byte value that isn't necessarily UTF8 encoded. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct Value(KString); + +/// A reference container to encapsulate a tightly packed and typically unallocated byte value that isn't necessarily UTF8 encoded. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct ValueRef<'a>(#[cfg_attr(feature = "serde", serde(borrow))] KStringRef<'a>); + +/// Conversions +impl<'a> ValueRef<'a> { + /// Keep `input` as our value. + pub fn from_bytes(input: &'a [u8]) -> Self { + Self(KStringRef::from_ref( + // SAFETY: our API makes accessing that value as `str` impossible, so illformed UTF8 is never exposed as such. + #[allow(unsafe_code)] + unsafe { + std::str::from_utf8_unchecked(input) + }, + )) + } + + /// Access this value as byte string. + pub fn as_bstr(&self) -> &BStr { + self.0.as_bytes().as_bstr() + } + + /// Convert this instance into its owned form. + pub fn to_owned(self) -> Value { + self.into() + } +} + +impl<'a> From<&'a str> for ValueRef<'a> { + fn from(v: &'a str) -> Self { + ValueRef(v.into()) + } +} + +impl<'a> From<ValueRef<'a>> for Value { + fn from(v: ValueRef<'a>) -> Self { + Value(v.0.into()) + } +} + +impl From<&str> for Value { + fn from(v: &str) -> Self { + Value(KString::from_ref(v)) + } +} + +/// Access +impl Value { + /// Return ourselves as reference. + pub fn as_ref(&self) -> ValueRef<'_> { + ValueRef(self.0.as_ref()) + } +} + +/// Access +impl StateRef<'_> { + /// Return `true` if the associated attribute was set to be unspecified using the `!attr` prefix or it wasn't mentioned. + pub fn is_unspecified(&self) -> bool { + matches!(self, StateRef::Unspecified) + } +} + +/// Initialization +impl<'a> StateRef<'a> { + /// Keep `input` in one of our enums. + pub fn from_bytes(input: &'a [u8]) -> Self { + Self::Value(ValueRef::from_bytes(input)) + } +} + +/// Access impl<'a> StateRef<'a> { /// Turn ourselves into our owned counterpart. pub fn to_owned(self) -> State { @@ -13,7 +91,7 @@ impl<'a> State { /// Turn ourselves into our ref-type. pub fn as_ref(&'a self) -> StateRef<'a> { match self { - State::Value(v) => StateRef::Value(v.as_bytes().as_bstr()), + State::Value(v) => StateRef::Value(v.as_ref()), State::Set => StateRef::Set, State::Unset => StateRef::Unset, State::Unspecified => StateRef::Unspecified, @@ -24,7 +102,7 @@ impl<'a> State { impl<'a> From<StateRef<'a>> for State { fn from(s: StateRef<'a>) -> Self { match s { - StateRef::Value(v) => State::Value(v.to_str().expect("no illformed unicode").into()), + StateRef::Value(v) => State::Value(v.into()), StateRef::Set => State::Set, StateRef::Unset => State::Unset, StateRef::Unspecified => State::Unspecified, |