From 9835e2ae736235810b4ea1c162ca5e65c547e770 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 18 May 2024 04:49:50 +0200 Subject: Merging upstream version 1.71.1+dfsg1. Signed-off-by: Daniel Baumann --- vendor/gix-ignore/src/lib.rs | 34 ++++++++ vendor/gix-ignore/src/parse.rs | 63 ++++++++++++++ vendor/gix-ignore/src/search.rs | 183 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 280 insertions(+) create mode 100644 vendor/gix-ignore/src/lib.rs create mode 100644 vendor/gix-ignore/src/parse.rs create mode 100644 vendor/gix-ignore/src/search.rs (limited to 'vendor/gix-ignore/src') diff --git a/vendor/gix-ignore/src/lib.rs b/vendor/gix-ignore/src/lib.rs new file mode 100644 index 000000000..20ca1cc8c --- /dev/null +++ b/vendor/gix-ignore/src/lib.rs @@ -0,0 +1,34 @@ +//! Parse `.gitignore` files and provide utilities to match against them. +//! +//! ## Feature Flags +#![cfg_attr( + feature = "document-features", + cfg_attr(doc, doc = ::document_features::document_features!()) +)] +#![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))] +#![deny(missing_docs, rust_2018_idioms)] +#![forbid(unsafe_code)] + +pub use gix_glob as glob; + +/// +pub mod search; +/// A grouping of lists of patterns while possibly keeping associated to their base path in order to find matches. +/// +/// Pattern lists with base path are queryable relative to that base, otherwise they are relative to the repository root. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Default)] +pub struct Search { + /// A list of pattern lists, each representing a patterns from a file or specified by hand, in the order they were + /// specified in. + /// + /// When matching, this order is reversed. + pub patterns: Vec>, +} + +/// +pub mod parse; + +/// Parse git ignore patterns, line by line, from `bytes`. +pub fn parse(bytes: &[u8]) -> parse::Lines<'_> { + parse::Lines::new(bytes) +} diff --git a/vendor/gix-ignore/src/parse.rs b/vendor/gix-ignore/src/parse.rs new file mode 100644 index 000000000..11ceaabaf --- /dev/null +++ b/vendor/gix-ignore/src/parse.rs @@ -0,0 +1,63 @@ +use bstr::ByteSlice; + +/// An iterator over line-wise ignore patterns parsed from a buffer. +pub struct Lines<'a> { + lines: bstr::Lines<'a>, + line_no: usize, +} + +impl<'a> Lines<'a> { + /// Create a new instance from `buf` to parse ignore patterns from. + pub fn new(buf: &'a [u8]) -> Self { + let bom = unicode_bom::Bom::from(buf); + Lines { + lines: buf[bom.len()..].lines(), + line_no: 0, + } + } +} + +impl<'a> Iterator for Lines<'a> { + type Item = (gix_glob::Pattern, usize); + + fn next(&mut self) -> Option { + for line in self.lines.by_ref() { + self.line_no += 1; + if line.first() == Some(&b'#') { + continue; + } + match gix_glob::Pattern::from_bytes(truncate_non_escaped_trailing_spaces(line)) { + None => continue, + Some(pattern) => return Some((pattern, self.line_no)), + } + } + None + } +} + +/// We always copy just because that's ultimately needed anyway, not because we always have to. +fn truncate_non_escaped_trailing_spaces(buf: &[u8]) -> &[u8] { + let mut last_space_pos = None; + let mut bytes = buf.iter().enumerate(); + while let Some((pos, b)) = bytes.next() { + match *b { + b' ' => { + last_space_pos.get_or_insert(pos); + continue; + } + b'\\' => { + if bytes.next().is_none() { + return buf; + } + } + _ => {} + } + last_space_pos = None; + } + + if let Some(pos) = last_space_pos { + &buf[..pos] + } else { + buf + } +} diff --git a/vendor/gix-ignore/src/search.rs b/vendor/gix-ignore/src/search.rs new file mode 100644 index 000000000..5c957f136 --- /dev/null +++ b/vendor/gix-ignore/src/search.rs @@ -0,0 +1,183 @@ +use std::{ + ffi::OsString, + path::{Path, PathBuf}, +}; + +use bstr::{BStr, ByteSlice}; +use gix_glob::search::{pattern, Pattern}; + +use crate::Search; + +/// Describes a matching pattern within a search for ignored paths. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +pub struct Match<'a, T> { + /// The glob pattern itself, like `/target/*`. + pub pattern: &'a gix_glob::Pattern, + /// The value associated with the pattern. + pub value: &'a T, + /// The path to the source from which the pattern was loaded, or `None` if it was specified by other means. + pub source: Option<&'a Path>, + /// The line at which the pattern was found in its `source` file, or the occurrence in which it was provided. + pub sequence_number: usize, +} + +/// An implementation of the [`Pattern`] trait for ignore patterns. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Default)] +pub struct Ignore; + +impl Pattern for Ignore { + type Value = (); + + fn bytes_to_patterns(bytes: &[u8], _source: &std::path::Path) -> Vec> { + crate::parse(bytes) + .map(|(pattern, line_number)| pattern::Mapping { + pattern, + value: (), + sequence_number: line_number, + }) + .collect() + } + + fn may_use_glob_pattern(_pattern: &gix_glob::Pattern) -> bool { + true + } +} + +/// Instantiation of a search for ignore patterns. +impl Search { + /// Given `git_dir`, a `.git` repository, load static ignore patterns from `info/exclude` + /// and from `excludes_file` if it is provided. + /// Note that it's not considered an error if the provided `excludes_file` does not exist. + pub fn from_git_dir( + git_dir: impl AsRef, + excludes_file: Option, + buf: &mut Vec, + ) -> std::io::Result { + let mut group = Self::default(); + + let follow_symlinks = true; + // order matters! More important ones first. + group.patterns.extend( + excludes_file + .and_then(|file| pattern::List::::from_file(file, None, follow_symlinks, buf).transpose()) + .transpose()?, + ); + group.patterns.extend(pattern::List::::from_file( + git_dir.as_ref().join("info").join("exclude"), + None, + follow_symlinks, + buf, + )?); + Ok(group) + } + + /// Parse a list of patterns, using slashes as path separators + pub fn from_overrides(patterns: impl IntoIterator>) -> Self { + Search { + patterns: vec![pattern::List { + patterns: patterns + .into_iter() + .map(Into::into) + .enumerate() + .filter_map(|(seq_id, pattern)| { + let pattern = gix_path::try_into_bstr(PathBuf::from(pattern)).ok()?; + gix_glob::parse(pattern.as_ref()).map(|p| pattern::Mapping { + pattern: p, + value: (), + sequence_number: seq_id, + }) + }) + .collect(), + source: None, + base: None, + }], + } + } +} + +/// Mutation +impl Search { + /// Add patterns as parsed from `bytes`, providing their `source` path and possibly their `root` path, the path they + /// are relative to. This also means that `source` is contained within `root` if `root` is provided. + pub fn add_patterns_buffer(&mut self, bytes: &[u8], source: impl Into, root: Option<&Path>) { + self.patterns + .push(pattern::List::from_bytes(bytes, source.into(), root)); + } +} + +/// Return a match if a pattern matches `relative_path`, providing a pre-computed `basename_pos` which is the +/// starting position of the basename of `relative_path`. `is_dir` is true if `relative_path` is a directory. +/// `case` specifies whether cases should be folded during matching or not. +pub fn pattern_matching_relative_path<'a>( + list: &'a gix_glob::search::pattern::List, + relative_path: &BStr, + basename_pos: Option, + is_dir: Option, + case: gix_glob::pattern::Case, +) -> Option> { + let (relative_path, basename_start_pos) = + list.strip_base_handle_recompute_basename_pos(relative_path, basename_pos, case)?; + list.patterns + .iter() + .rev() + .filter(|pm| Ignore::may_use_glob_pattern(&pm.pattern)) + .find_map( + |pattern::Mapping { + pattern, + value, + sequence_number, + }| { + pattern + .matches_repo_relative_path(relative_path, basename_start_pos, is_dir, case) + .then_some(Match { + pattern, + value, + source: list.source.as_deref(), + sequence_number: *sequence_number, + }) + }, + ) +} + +/// Like [`pattern_matching_relative_path()`], but returns an index to the pattern +/// that matched `relative_path`, instead of the match itself. +pub fn pattern_idx_matching_relative_path( + list: &gix_glob::search::pattern::List, + relative_path: &BStr, + basename_pos: Option, + is_dir: Option, + case: gix_glob::pattern::Case, +) -> Option { + let (relative_path, basename_start_pos) = + list.strip_base_handle_recompute_basename_pos(relative_path, basename_pos, case)?; + list.patterns + .iter() + .enumerate() + .rev() + .filter(|(_, pm)| Ignore::may_use_glob_pattern(&pm.pattern)) + .find_map(|(idx, pm)| { + pm.pattern + .matches_repo_relative_path(relative_path, basename_start_pos, is_dir, case) + .then_some(idx) + }) +} + +/// Matching of ignore patterns. +impl Search { + /// Match `relative_path` and return the first match if found. + /// `is_dir` is true if `relative_path` is a directory. + /// `case` specifies whether cases should be folded during matching or not. + pub fn pattern_matching_relative_path<'a>( + &self, + relative_path: impl Into<&'a BStr>, + is_dir: Option, + case: gix_glob::pattern::Case, + ) -> Option> { + let relative_path = relative_path.into(); + let basename_pos = relative_path.rfind(b"/").map(|p| p + 1); + self.patterns + .iter() + .rev() + .find_map(|pl| pattern_matching_relative_path(pl, relative_path, basename_pos, is_dir, case)) + } +} -- cgit v1.2.3