diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:41:41 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:41:41 +0000 |
commit | 10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87 (patch) | |
tree | bdffd5d80c26cf4a7a518281a204be1ace85b4c1 /vendor/gix-attributes/src | |
parent | Releasing progress-linux version 1.70.0+dfsg1-9~progress7.99u1. (diff) | |
download | rustc-10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87.tar.xz rustc-10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87.zip |
Merging upstream version 1.70.0+dfsg2.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/gix-attributes/src')
-rw-r--r-- | vendor/gix-attributes/src/assignment.rs | 28 | ||||
-rw-r--r-- | vendor/gix-attributes/src/lib.rs | 137 | ||||
-rw-r--r-- | vendor/gix-attributes/src/match_group.rs | 354 | ||||
-rw-r--r-- | vendor/gix-attributes/src/name.rs | 47 | ||||
-rw-r--r-- | vendor/gix-attributes/src/parse/attribute.rs | 172 | ||||
-rw-r--r-- | vendor/gix-attributes/src/parse/ignore.rs | 36 | ||||
-rw-r--r-- | vendor/gix-attributes/src/parse/mod.rs | 10 | ||||
-rw-r--r-- | vendor/gix-attributes/src/state.rs | 33 |
8 files changed, 817 insertions, 0 deletions
diff --git a/vendor/gix-attributes/src/assignment.rs b/vendor/gix-attributes/src/assignment.rs new file mode 100644 index 000000000..e1d7263f7 --- /dev/null +++ b/vendor/gix-attributes/src/assignment.rs @@ -0,0 +1,28 @@ +use crate::{Assignment, AssignmentRef, NameRef, StateRef}; + +impl<'a> AssignmentRef<'a> { + pub(crate) fn new(name: NameRef<'a>, state: StateRef<'a>) -> AssignmentRef<'a> { + AssignmentRef { name, state } + } + + /// Turn this reference into its owned counterpart. + pub fn to_owned(self) -> Assignment { + self.into() + } +} + +impl<'a> From<AssignmentRef<'a>> for Assignment { + fn from(a: AssignmentRef<'a>) -> Self { + Assignment { + name: a.name.to_owned(), + state: a.state.to_owned(), + } + } +} + +impl<'a> Assignment { + /// Provide a ref type to this owned instance. + pub fn as_ref(&'a self) -> AssignmentRef<'a> { + AssignmentRef::new(self.name.as_ref(), self.state.as_ref()) + } +} diff --git a/vendor/gix-attributes/src/lib.rs b/vendor/gix-attributes/src/lib.rs new file mode 100644 index 000000000..7d95c022f --- /dev/null +++ b/vendor/gix-attributes/src/lib.rs @@ -0,0 +1,137 @@ +//! Parse `.gitattribute` and `.gitignore` files and provide utilities to match against them. +//! +//! ## Feature Flags +#![cfg_attr( + feature = "document-features", + cfg_attr(doc, doc = ::document_features::document_features!()) +)] +#![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))] +#![deny(missing_docs, rust_2018_idioms)] +#![forbid(unsafe_code)] + +use std::path::PathBuf; + +use bstr::{BStr, BString}; +pub use gix_glob as glob; + +mod assignment; +/// +pub mod name; +mod state; + +mod match_group; +pub use match_group::{Attributes, Ignore, Match, Pattern}; + +/// +pub mod parse; +/// Parse attribute assignments line by line from `bytes`. +pub fn parse(bytes: &[u8]) -> parse::Lines<'_> { + parse::Lines::new(bytes) +} + +/// The state an attribute can be in, referencing the value. +/// +/// Note that this doesn't contain the name. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub enum StateRef<'a> { + /// The attribute is listed, or has the special value 'true' + Set, + /// The attribute has the special value 'false', or was prefixed with a `-` sign. + Unset, + /// The attribute is set to the given value, which followed the `=` sign. + /// Note that values can be empty. + #[cfg_attr(feature = "serde1", serde(borrow))] + Value(&'a BStr), + /// The attribute isn't mentioned with a given path or is explicitly set to `Unspecified` using the `!` sign. + Unspecified, +} + +/// The state an attribute can be in, owning the value. +/// +/// Note that this doesn't contain the name. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub enum State { + /// The attribute is listed, or has the special value 'true' + Set, + /// The attribute has the special value 'false', or was prefixed with a `-` sign. + Unset, + /// The attribute is set to the given value, which followed the `=` sign. + /// Note that values can be empty. + Value(BString), // TODO(performance): Is there a non-utf8 compact_str/KBString crate? See https://github.com/cobalt-org/kstring/issues/37#issuecomment-1446777265 . + /// The attribute isn't mentioned with a given path or is explicitly set to `Unspecified` using the `!` sign. + Unspecified, +} + +/// Represents a validated attribute name +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct Name(pub(crate) String); // TODO(performance): See if `KBString` or `compact_string` could be meaningful here. + +/// Holds a validated attribute name as a reference +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd)] +pub struct NameRef<'a>(&'a str); + +/// Name an attribute and describe it's assigned state. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct Assignment { + /// The validated name of the attribute. + pub name: Name, + /// The state of the attribute. + pub state: State, +} + +/// Holds validated attribute data as a reference +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd)] +pub struct AssignmentRef<'a> { + /// The name of the attribute. + pub name: NameRef<'a>, + /// The state of the attribute. + pub state: StateRef<'a>, +} + +/// A grouping of lists of patterns while possibly keeping associated to their base path. +/// +/// Pattern lists with base path are queryable relative to that base, otherwise they are relative to the repository root. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Default)] +pub struct MatchGroup<T: Pattern = Attributes> { + /// A list of pattern lists, each representing a patterns from a file or specified by hand, in the order they were + /// specified in. + /// + /// During matching, this order is reversed. + pub patterns: Vec<PatternList<T>>, +} + +/// A list of patterns which optionally know where they were loaded from and what their base is. +/// +/// Knowing their base which is relative to a source directory, it will ignore all path to match against +/// that don't also start with said base. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Default)] +pub struct PatternList<T: Pattern> { + /// Patterns and their associated data in the order they were loaded in or specified, + /// the line number in its source file or its sequence number (_`(pattern, value, line_number)`_). + /// + /// During matching, this order is reversed. + pub patterns: Vec<PatternMapping<T::Value>>, + + /// The path from which the patterns were read, or `None` if the patterns + /// don't originate in a file on disk. + pub source: Option<PathBuf>, + + /// The parent directory of source, or `None` if the patterns are _global_ to match against the repository root. + /// It's processed to contain slashes only and to end with a trailing slash, and is relative to the repository root. + pub base: Option<BString>, +} + +/// An association of a pattern with its value, along with a sequence number providing a sort order in relation to its peers. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +pub struct PatternMapping<T> { + /// The pattern itself, like `/target/*` + pub pattern: gix_glob::Pattern, + /// The value associated with the pattern. + pub value: T, + /// Typically the line number in the file the pattern was parsed from. + pub sequence_number: usize, +} diff --git a/vendor/gix-attributes/src/match_group.rs b/vendor/gix-attributes/src/match_group.rs new file mode 100644 index 000000000..018bf2567 --- /dev/null +++ b/vendor/gix-attributes/src/match_group.rs @@ -0,0 +1,354 @@ +use std::{ + ffi::OsString, + io::Read, + path::{Path, PathBuf}, +}; + +use bstr::{BStr, BString, ByteSlice, ByteVec}; + +use crate::{Assignment, MatchGroup, PatternList, PatternMapping}; + +fn into_owned_assignments<'a>( + attrs: impl Iterator<Item = Result<crate::AssignmentRef<'a>, crate::name::Error>>, +) -> Result<Vec<Assignment>, crate::name::Error> { + attrs.map(|res| res.map(|attr| attr.to_owned())).collect() +} + +/// A trait to convert bytes into patterns and their associated value. +/// +/// This is used for `gitattributes` which have a value, and `gitignore` which don't. +pub trait Pattern: Clone + PartialEq + Eq + std::fmt::Debug + std::hash::Hash + Ord + PartialOrd + Default { + /// The value associated with a pattern. + type Value: PartialEq + Eq + std::fmt::Debug + std::hash::Hash + Ord + PartialOrd + Clone; + + /// Parse all patterns in `bytes` line by line, ignoring lines with errors, and collect them. + fn bytes_to_patterns(bytes: &[u8]) -> Vec<PatternMapping<Self::Value>>; + + /// Returns true if the given pattern may be used for matching. + fn may_use_glob_pattern(pattern: &gix_glob::Pattern) -> bool; +} + +/// An implementation of the [`Pattern`] trait for ignore patterns. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Default)] +pub struct Ignore; + +impl Pattern for Ignore { + type Value = (); + + fn bytes_to_patterns(bytes: &[u8]) -> Vec<PatternMapping<Self::Value>> { + crate::parse::ignore(bytes) + .map(|(pattern, line_number)| PatternMapping { + pattern, + value: (), + sequence_number: line_number, + }) + .collect() + } + + fn may_use_glob_pattern(_pattern: &gix_glob::Pattern) -> bool { + true + } +} + +/// A value of an attribute pattern, which is either a macro definition or +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +pub enum Value { + MacroAttributes(Vec<Assignment>), + Assignments(Vec<Assignment>), +} + +/// An implementation of the [`Pattern`] trait for attributes. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Default)] +pub struct Attributes; + +impl Pattern for Attributes { + type Value = Value; + + fn bytes_to_patterns(bytes: &[u8]) -> Vec<PatternMapping<Self::Value>> { + crate::parse(bytes) + .filter_map(Result::ok) + .filter_map(|(pattern_kind, assignments, line_number)| { + let (pattern, value) = match pattern_kind { + crate::parse::Kind::Macro(macro_name) => ( + gix_glob::Pattern { + text: macro_name.as_str().into(), + mode: gix_glob::pattern::Mode::all(), + first_wildcard_pos: None, + }, + Value::MacroAttributes(into_owned_assignments(assignments).ok()?), + ), + crate::parse::Kind::Pattern(p) => ( + (!p.is_negative()).then_some(p)?, + Value::Assignments(into_owned_assignments(assignments).ok()?), + ), + }; + PatternMapping { + pattern, + value, + sequence_number: line_number, + } + .into() + }) + .collect() + } + + fn may_use_glob_pattern(pattern: &gix_glob::Pattern) -> bool { + pattern.mode != gix_glob::pattern::Mode::all() + } +} + +/// Describes a matching value within a [`MatchGroup`]. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +pub struct Match<'a, T> { + /// The glob pattern itself, like `/target/*`. + pub pattern: &'a gix_glob::Pattern, + /// The value associated with the pattern. + pub value: &'a T, + /// The path to the source from which the pattern was loaded, or `None` if it was specified by other means. + pub source: Option<&'a Path>, + /// The line at which the pattern was found in its `source` file, or the occurrence in which it was provided. + pub sequence_number: usize, +} + +impl<T> MatchGroup<T> +where + T: Pattern, +{ + /// Match `relative_path`, a path relative to the repository containing all patterns, and return the first match if available. + // TODO: better docs + pub fn pattern_matching_relative_path<'a>( + &self, + relative_path: impl Into<&'a BStr>, + is_dir: Option<bool>, + case: gix_glob::pattern::Case, + ) -> Option<Match<'_, T::Value>> { + let relative_path = relative_path.into(); + let basename_pos = relative_path.rfind(b"/").map(|p| p + 1); + self.patterns + .iter() + .rev() + .find_map(|pl| pl.pattern_matching_relative_path(relative_path, basename_pos, is_dir, case)) + } +} + +impl MatchGroup<Ignore> { + /// Given `git_dir`, a `.git` repository, load ignore patterns from `info/exclude` and from `excludes_file` if it + /// is provided. + /// Note that it's not considered an error if the provided `excludes_file` does not exist. + pub fn from_git_dir( + git_dir: impl AsRef<Path>, + excludes_file: Option<PathBuf>, + buf: &mut Vec<u8>, + ) -> std::io::Result<Self> { + let mut group = Self::default(); + + let follow_symlinks = true; + // order matters! More important ones first. + group.patterns.extend( + excludes_file + .map(|file| PatternList::<Ignore>::from_file(file, None, follow_symlinks, buf)) + .transpose()? + .flatten(), + ); + group.patterns.extend(PatternList::<Ignore>::from_file( + git_dir.as_ref().join("info").join("exclude"), + None, + follow_symlinks, + buf, + )?); + Ok(group) + } + + /// See [PatternList::<Ignore>::from_overrides()] for details. + pub fn from_overrides(patterns: impl IntoIterator<Item = impl Into<OsString>>) -> Self { + MatchGroup { + patterns: vec![PatternList::<Ignore>::from_overrides(patterns)], + } + } + + /// Add the given file at `source` if it exists, otherwise do nothing. If a `root` is provided, it's not considered a global file anymore. + /// Returns true if the file was added, or false if it didn't exist. + pub fn add_patterns_file( + &mut self, + source: impl Into<PathBuf>, + follow_symlinks: bool, + root: Option<&Path>, + buf: &mut Vec<u8>, + ) -> std::io::Result<bool> { + let previous_len = self.patterns.len(); + self.patterns.extend(PatternList::<Ignore>::from_file( + source.into(), + root, + follow_symlinks, + buf, + )?); + Ok(self.patterns.len() != previous_len) + } + + /// Add patterns as parsed from `bytes`, providing their `source` path and possibly their `root` path, the path they + /// are relative to. This also means that `source` is contained within `root` if `root` is provided. + pub fn add_patterns_buffer(&mut self, bytes: &[u8], source: impl Into<PathBuf>, root: Option<&Path>) { + self.patterns + .push(PatternList::<Ignore>::from_bytes(bytes, source.into(), root)); + } +} + +fn read_in_full_ignore_missing(path: &Path, follow_symlinks: bool, buf: &mut Vec<u8>) -> std::io::Result<bool> { + buf.clear(); + let file = if follow_symlinks { + std::fs::File::open(path) + } else { + gix_features::fs::open_options_no_follow().read(true).open(path) + }; + Ok(match file { + Ok(mut file) => { + file.read_to_end(buf)?; + true + } + Err(err) if err.kind() == std::io::ErrorKind::NotFound => false, + Err(err) => return Err(err), + }) +} + +impl<T> PatternList<T> +where + T: Pattern, +{ + /// `source` is the location of the `bytes` which represent a list of patterns line by line. + pub fn from_bytes(bytes: &[u8], source: impl Into<PathBuf>, root: Option<&Path>) -> Self { + let source = source.into(); + let patterns = T::bytes_to_patterns(bytes); + + let base = root + .and_then(|root| source.parent().expect("file").strip_prefix(root).ok()) + .and_then(|base| { + (!base.as_os_str().is_empty()).then(|| { + let mut base: BString = + gix_path::to_unix_separators_on_windows(gix_path::into_bstr(base)).into_owned(); + + base.push_byte(b'/'); + base + }) + }); + PatternList { + patterns, + source: Some(source), + base, + } + } + + /// Create a pattern list from the `source` file, which may be located underneath `root`, while optionally + /// following symlinks with `follow_symlinks`, providing `buf` to temporarily store the data contained in the file. + pub fn from_file( + source: impl Into<PathBuf>, + root: Option<&Path>, + follow_symlinks: bool, + buf: &mut Vec<u8>, + ) -> std::io::Result<Option<Self>> { + let source = source.into(); + Ok(read_in_full_ignore_missing(&source, follow_symlinks, buf)?.then(|| Self::from_bytes(buf, source, root))) + } +} + +impl<T> PatternList<T> +where + T: Pattern, +{ + /// Return a match if a pattern matches `relative_path`, providing a pre-computed `basename_pos` which is the + /// starting position of the basename of `relative_path`. `is_dir` is true if `relative_path` is a directory. + /// `case` specifies whether cases should be folded during matching or not. + pub fn pattern_matching_relative_path( + &self, + relative_path: &BStr, + basename_pos: Option<usize>, + is_dir: Option<bool>, + case: gix_glob::pattern::Case, + ) -> Option<Match<'_, T::Value>> { + let (relative_path, basename_start_pos) = + self.strip_base_handle_recompute_basename_pos(relative_path, basename_pos)?; + self.patterns + .iter() + .rev() + .filter(|pm| T::may_use_glob_pattern(&pm.pattern)) + .find_map( + |PatternMapping { + pattern, + value, + sequence_number, + }| { + pattern + .matches_repo_relative_path(relative_path, basename_start_pos, is_dir, case) + .then_some(Match { + pattern, + value, + source: self.source.as_deref(), + sequence_number: *sequence_number, + }) + }, + ) + } + + /// Like [`pattern_matching_relative_path()`][Self::pattern_matching_relative_path()], but returns an index to the pattern + /// that matched `relative_path`, instead of the match itself. + pub fn pattern_idx_matching_relative_path( + &self, + relative_path: &BStr, + basename_pos: Option<usize>, + is_dir: Option<bool>, + case: gix_glob::pattern::Case, + ) -> Option<usize> { + let (relative_path, basename_start_pos) = + self.strip_base_handle_recompute_basename_pos(relative_path, basename_pos)?; + self.patterns + .iter() + .enumerate() + .rev() + .filter(|(_, pm)| T::may_use_glob_pattern(&pm.pattern)) + .find_map(|(idx, pm)| { + pm.pattern + .matches_repo_relative_path(relative_path, basename_start_pos, is_dir, case) + .then_some(idx) + }) + } + + fn strip_base_handle_recompute_basename_pos<'a>( + &self, + relative_path: &'a BStr, + basename_pos: Option<usize>, + ) -> Option<(&'a BStr, Option<usize>)> { + match self.base.as_deref() { + Some(base) => ( + relative_path.strip_prefix(base.as_slice())?.as_bstr(), + basename_pos.and_then(|pos| { + let pos = pos - base.len(); + (pos != 0).then_some(pos) + }), + ), + None => (relative_path, basename_pos), + } + .into() + } +} + +impl PatternList<Ignore> { + /// Parse a list of patterns, using slashes as path separators + pub fn from_overrides(patterns: impl IntoIterator<Item = impl Into<OsString>>) -> Self { + PatternList { + patterns: patterns + .into_iter() + .map(Into::into) + .enumerate() + .filter_map(|(seq_id, pattern)| { + let pattern = gix_path::try_into_bstr(PathBuf::from(pattern)).ok()?; + gix_glob::parse(pattern.as_ref()).map(|p| PatternMapping { + pattern: p, + value: (), + sequence_number: seq_id, + }) + }) + .collect(), + source: None, + base: None, + } + } +} diff --git a/vendor/gix-attributes/src/name.rs b/vendor/gix-attributes/src/name.rs new file mode 100644 index 000000000..03064dbda --- /dev/null +++ b/vendor/gix-attributes/src/name.rs @@ -0,0 +1,47 @@ +use bstr::BString; + +use crate::{Name, NameRef}; + +impl<'a> NameRef<'a> { + /// Turn this ref into its owned counterpart. + pub fn to_owned(self) -> Name { + Name(self.0.into()) + } + + /// Return the inner `str`. + pub fn as_str(&self) -> &str { + self.0 + } +} + +impl AsRef<str> for NameRef<'_> { + fn as_ref(&self) -> &str { + self.0 + } +} + +impl<'a> Name { + /// Provide our ref-type. + pub fn as_ref(&'a self) -> NameRef<'a> { + NameRef(self.0.as_ref()) + } + + /// Return the inner `str`. + pub fn as_str(&self) -> &str { + self.0.as_str() + } +} + +impl AsRef<str> for Name { + fn as_ref(&self) -> &str { + self.0.as_str() + } +} + +/// The error returned by [`parse::Iter`][crate::parse::Iter]. +#[derive(Debug, thiserror::Error)] +#[error("Attribute has non-ascii characters or starts with '-': {attribute}")] +pub struct Error { + /// The attribute that failed to parse. + pub attribute: BString, +} diff --git a/vendor/gix-attributes/src/parse/attribute.rs b/vendor/gix-attributes/src/parse/attribute.rs new file mode 100644 index 000000000..9e4b4c66e --- /dev/null +++ b/vendor/gix-attributes/src/parse/attribute.rs @@ -0,0 +1,172 @@ +use std::borrow::Cow; + +use bstr::{BStr, ByteSlice}; + +use crate::{name, AssignmentRef, Name, NameRef, StateRef}; + +/// The kind of attribute that was parsed. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub enum Kind { + /// A pattern to match paths against + Pattern(gix_glob::Pattern), + /// The name of the macro to define, always a valid attribute name + Macro(Name), +} + +mod error { + use bstr::BString; + /// The error returned by [`parse::Lines`][crate::parse::Lines]. + #[derive(thiserror::Error, Debug)] + #[allow(missing_docs)] + pub enum Error { + #[error("Line {line_number} has a negative pattern, for literal characters use \\!: {line}")] + PatternNegation { line_number: usize, line: BString }, + #[error("Attribute in line {line_number} has non-ascii characters or starts with '-': {attribute}")] + AttributeName { line_number: usize, attribute: BString }, + #[error("Macro in line {line_number} has non-ascii characters or starts with '-': {macro_name}")] + MacroName { line_number: usize, macro_name: BString }, + #[error("Could not unquote attributes line")] + Unquote(#[from] gix_quote::ansi_c::undo::Error), + } +} +pub use error::Error; + +/// An iterator over attribute assignments, parsed line by line. +pub struct Lines<'a> { + lines: bstr::Lines<'a>, + line_no: usize, +} + +/// An iterator over attribute assignments in a single line. +pub struct Iter<'a> { + attrs: bstr::Fields<'a>, +} + +impl<'a> Iter<'a> { + /// Create a new instance to parse attribute assignments from `input`. + pub fn new(input: &'a BStr) -> Self { + Iter { attrs: input.fields() } + } + + fn parse_attr(&self, attr: &'a [u8]) -> Result<AssignmentRef<'a>, name::Error> { + let mut tokens = attr.splitn(2, |b| *b == b'='); + let attr = tokens.next().expect("attr itself").as_bstr(); + let possibly_value = tokens.next(); + let (attr, state) = if attr.first() == Some(&b'-') { + (&attr[1..], StateRef::Unset) + } else if attr.first() == Some(&b'!') { + (&attr[1..], StateRef::Unspecified) + } else { + ( + attr, + possibly_value + .map(|v| StateRef::Value(v.as_bstr())) + .unwrap_or(StateRef::Set), + ) + }; + Ok(AssignmentRef::new(check_attr(attr)?, state)) + } +} + +fn check_attr(attr: &BStr) -> Result<NameRef<'_>, name::Error> { + fn attr_valid(attr: &BStr) -> bool { + if attr.first() == Some(&b'-') { + return false; + } + + attr.bytes() + .all(|b| matches!(b, b'-' | b'.' | b'_' | b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9')) + } + + attr_valid(attr) + .then(|| NameRef(attr.to_str().expect("no illformed utf8"))) + .ok_or_else(|| name::Error { attribute: attr.into() }) +} + +impl<'a> Iterator for Iter<'a> { + type Item = Result<AssignmentRef<'a>, name::Error>; + + fn next(&mut self) -> Option<Self::Item> { + let attr = self.attrs.next().filter(|a| !a.is_empty())?; + self.parse_attr(attr).into() + } +} + +impl<'a> Lines<'a> { + /// Create a new instance to parse all attributes in all lines of the input `bytes`. + pub fn new(bytes: &'a [u8]) -> Self { + let bom = unicode_bom::Bom::from(bytes); + Lines { + lines: bytes[bom.len()..].lines(), + line_no: 0, + } + } +} + +impl<'a> Iterator for Lines<'a> { + type Item = Result<(Kind, Iter<'a>, usize), Error>; + + fn next(&mut self) -> Option<Self::Item> { + fn skip_blanks(line: &BStr) -> &BStr { + line.find_not_byteset(BLANKS).map(|pos| &line[pos..]).unwrap_or(line) + } + for line in self.lines.by_ref() { + self.line_no += 1; + let line = skip_blanks(line.into()); + if line.first() == Some(&b'#') { + continue; + } + match parse_line(line, self.line_no) { + None => continue, + Some(res) => return Some(res), + } + } + None + } +} + +fn parse_line(line: &BStr, line_number: usize) -> Option<Result<(Kind, Iter<'_>, usize), Error>> { + if line.is_empty() { + return None; + } + + let (line, attrs): (Cow<'_, _>, _) = if line.starts_with(b"\"") { + let (unquoted, consumed) = match gix_quote::ansi_c::undo(line) { + Ok(res) => res, + Err(err) => return Some(Err(err.into())), + }; + (unquoted, &line[consumed..]) + } else { + line.find_byteset(BLANKS) + .map(|pos| (line[..pos].as_bstr().into(), line[pos..].as_bstr())) + .unwrap_or((line.into(), [].as_bstr())) + }; + + let kind_res = match line.strip_prefix(b"[attr]") { + Some(macro_name) => check_attr(macro_name.into()) + .map(|name| Kind::Macro(name.to_owned())) + .map_err(|err| Error::MacroName { + line_number, + macro_name: err.attribute, + }), + None => { + let pattern = gix_glob::Pattern::from_bytes(line.as_ref())?; + if pattern.mode.contains(gix_glob::pattern::Mode::NEGATIVE) { + Err(Error::PatternNegation { + line: line.into_owned(), + line_number, + }) + } else { + Ok(Kind::Pattern(pattern)) + } + } + }; + let kind = match kind_res { + Ok(kind) => kind, + Err(err) => return Some(Err(err)), + }; + Ok((kind, Iter::new(attrs), line_number)).into() +} + +const BLANKS: &[u8] = b" \t\r"; diff --git a/vendor/gix-attributes/src/parse/ignore.rs b/vendor/gix-attributes/src/parse/ignore.rs new file mode 100644 index 000000000..a27ee0285 --- /dev/null +++ b/vendor/gix-attributes/src/parse/ignore.rs @@ -0,0 +1,36 @@ +use bstr::ByteSlice; + +/// An iterator over line-wise ignore patterns parsed from a buffer. +pub struct Lines<'a> { + lines: bstr::Lines<'a>, + line_no: usize, +} + +impl<'a> Lines<'a> { + /// Create a new instance from `buf` to parse ignore patterns from. + pub fn new(buf: &'a [u8]) -> Self { + let bom = unicode_bom::Bom::from(buf); + Lines { + lines: buf[bom.len()..].lines(), + line_no: 0, + } + } +} + +impl<'a> Iterator for Lines<'a> { + type Item = (gix_glob::Pattern, usize); + + fn next(&mut self) -> Option<Self::Item> { + for line in self.lines.by_ref() { + self.line_no += 1; + if line.first() == Some(&b'#') { + continue; + } + match gix_glob::Pattern::from_bytes(line) { + None => continue, + Some(pattern) => return Some((pattern, self.line_no)), + } + } + None + } +} diff --git a/vendor/gix-attributes/src/parse/mod.rs b/vendor/gix-attributes/src/parse/mod.rs new file mode 100644 index 000000000..82cacc8ed --- /dev/null +++ b/vendor/gix-attributes/src/parse/mod.rs @@ -0,0 +1,10 @@ +/// +pub mod ignore; + +mod attribute; +pub use attribute::{Error, Iter, Kind, Lines}; + +/// Parse git ignore patterns, line by line, from `bytes`. +pub fn ignore(bytes: &[u8]) -> ignore::Lines<'_> { + ignore::Lines::new(bytes) +} diff --git a/vendor/gix-attributes/src/state.rs b/vendor/gix-attributes/src/state.rs new file mode 100644 index 000000000..02dc8ee0d --- /dev/null +++ b/vendor/gix-attributes/src/state.rs @@ -0,0 +1,33 @@ +use bstr::ByteSlice; + +use crate::{State, StateRef}; + +impl<'a> StateRef<'a> { + /// Turn ourselves into our owned counterpart. + pub fn to_owned(self) -> State { + self.into() + } +} + +impl<'a> State { + /// Turn ourselves into our ref-type. + pub fn as_ref(&'a self) -> StateRef<'a> { + match self { + State::Value(v) => StateRef::Value(v.as_bytes().as_bstr()), + State::Set => StateRef::Set, + State::Unset => StateRef::Unset, + State::Unspecified => StateRef::Unspecified, + } + } +} + +impl<'a> From<StateRef<'a>> for State { + fn from(s: StateRef<'a>) -> Self { + match s { + StateRef::Value(v) => State::Value(v.to_str().expect("no illformed unicode").into()), + StateRef::Set => State::Set, + StateRef::Unset => State::Unset, + StateRef::Unspecified => State::Unspecified, + } + } +} |