summaryrefslogtreecommitdiffstats
path: root/vendor/gix-attributes/src/parse.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/gix-attributes/src/parse.rs')
-rw-r--r--vendor/gix-attributes/src/parse.rs169
1 files changed, 169 insertions, 0 deletions
diff --git a/vendor/gix-attributes/src/parse.rs b/vendor/gix-attributes/src/parse.rs
new file mode 100644
index 000000000..0b70cb306
--- /dev/null
+++ b/vendor/gix-attributes/src/parse.rs
@@ -0,0 +1,169 @@
+use std::borrow::Cow;
+
+use bstr::{BStr, ByteSlice};
+use kstring::KStringRef;
+
+use crate::{name, AssignmentRef, Name, NameRef, StateRef};
+
+/// The kind of attribute that was parsed.
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+pub enum Kind {
+ /// A pattern to match paths against
+ Pattern(gix_glob::Pattern),
+ /// The name of the macro to define, always a valid attribute name
+ Macro(Name),
+}
+
+mod error {
+ use bstr::BString;
+ /// The error returned by [`parse::Lines`][crate::parse::Lines].
+ #[derive(thiserror::Error, Debug)]
+ #[allow(missing_docs)]
+ pub enum Error {
+ #[error("Line {line_number} has a negative pattern, for literal characters use \\!: {line}")]
+ PatternNegation { line_number: usize, line: BString },
+ #[error("Attribute in line {line_number} has non-ascii characters or starts with '-': {attribute}")]
+ AttributeName { line_number: usize, attribute: BString },
+ #[error("Macro in line {line_number} has non-ascii characters or starts with '-': {macro_name}")]
+ MacroName { line_number: usize, macro_name: BString },
+ #[error("Could not unquote attributes line")]
+ Unquote(#[from] gix_quote::ansi_c::undo::Error),
+ }
+}
+pub use error::Error;
+
+/// An iterator over attribute assignments, parsed line by line.
+pub struct Lines<'a> {
+ lines: bstr::Lines<'a>,
+ line_no: usize,
+}
+
+/// An iterator over attribute assignments in a single line.
+pub struct Iter<'a> {
+ attrs: bstr::Fields<'a>,
+}
+
+impl<'a> Iter<'a> {
+ /// Create a new instance to parse attribute assignments from `input`.
+ pub fn new(input: &'a BStr) -> Self {
+ Iter { attrs: input.fields() }
+ }
+
+ fn parse_attr(&self, attr: &'a [u8]) -> Result<AssignmentRef<'a>, name::Error> {
+ let mut tokens = attr.splitn(2, |b| *b == b'=');
+ let attr = tokens.next().expect("attr itself").as_bstr();
+ let possibly_value = tokens.next();
+ let (attr, state) = if attr.first() == Some(&b'-') {
+ (&attr[1..], StateRef::Unset)
+ } else if attr.first() == Some(&b'!') {
+ (&attr[1..], StateRef::Unspecified)
+ } else {
+ (attr, possibly_value.map(StateRef::from_bytes).unwrap_or(StateRef::Set))
+ };
+ Ok(AssignmentRef::new(check_attr(attr)?, state))
+ }
+}
+
+fn check_attr(attr: &BStr) -> Result<NameRef<'_>, name::Error> {
+ fn attr_valid(attr: &BStr) -> bool {
+ if attr.first() == Some(&b'-') {
+ return false;
+ }
+
+ attr.bytes()
+ .all(|b| matches!(b, b'-' | b'.' | b'_' | b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9'))
+ }
+
+ attr_valid(attr)
+ .then(|| NameRef(KStringRef::from_ref(attr.to_str().expect("no illformed utf8"))))
+ .ok_or_else(|| name::Error { attribute: attr.into() })
+}
+
+impl<'a> Iterator for Iter<'a> {
+ type Item = Result<AssignmentRef<'a>, name::Error>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ let attr = self.attrs.next().filter(|a| !a.is_empty())?;
+ self.parse_attr(attr).into()
+ }
+}
+
+/// Instantiation
+impl<'a> Lines<'a> {
+ /// Create a new instance to parse all attributes in all lines of the input `bytes`.
+ pub fn new(bytes: &'a [u8]) -> Self {
+ let bom = unicode_bom::Bom::from(bytes);
+ Lines {
+ lines: bytes[bom.len()..].lines(),
+ line_no: 0,
+ }
+ }
+}
+
+impl<'a> Iterator for Lines<'a> {
+ type Item = Result<(Kind, Iter<'a>, usize), Error>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ fn skip_blanks(line: &BStr) -> &BStr {
+ line.find_not_byteset(BLANKS).map(|pos| &line[pos..]).unwrap_or(line)
+ }
+ for line in self.lines.by_ref() {
+ self.line_no += 1;
+ let line = skip_blanks(line.into());
+ if line.first() == Some(&b'#') {
+ continue;
+ }
+ match parse_line(line, self.line_no) {
+ None => continue,
+ Some(res) => return Some(res),
+ }
+ }
+ None
+ }
+}
+
+fn parse_line(line: &BStr, line_number: usize) -> Option<Result<(Kind, Iter<'_>, usize), Error>> {
+ if line.is_empty() {
+ return None;
+ }
+
+ let (line, attrs): (Cow<'_, _>, _) = if line.starts_with(b"\"") {
+ let (unquoted, consumed) = match gix_quote::ansi_c::undo(line) {
+ Ok(res) => res,
+ Err(err) => return Some(Err(err.into())),
+ };
+ (unquoted, &line[consumed..])
+ } else {
+ line.find_byteset(BLANKS)
+ .map(|pos| (line[..pos].as_bstr().into(), line[pos..].as_bstr()))
+ .unwrap_or((line.into(), [].as_bstr()))
+ };
+
+ let kind_res = match line.strip_prefix(b"[attr]") {
+ Some(macro_name) => check_attr(macro_name.into())
+ .map_err(|err| Error::MacroName {
+ line_number,
+ macro_name: err.attribute,
+ })
+ .map(|name| Kind::Macro(name.to_owned())),
+ None => {
+ let pattern = gix_glob::Pattern::from_bytes(line.as_ref())?;
+ if pattern.mode.contains(gix_glob::pattern::Mode::NEGATIVE) {
+ Err(Error::PatternNegation {
+ line: line.into_owned(),
+ line_number,
+ })
+ } else {
+ Ok(Kind::Pattern(pattern))
+ }
+ }
+ };
+ let kind = match kind_res {
+ Ok(kind) => kind,
+ Err(err) => return Some(Err(err)),
+ };
+ Ok((kind, Iter::new(attrs), line_number)).into()
+}
+
+const BLANKS: &[u8] = b" \t\r";