use std::borrow::Cow; use bstr::{BStr, BString, ByteSlice, ByteVec}; use crate::{Defaults, MagicSignature, Pattern, SearchMode}; /// The error returned by [parse()][crate::parse()]. #[derive(thiserror::Error, Debug)] #[allow(missing_docs)] pub enum Error { #[error("An empty string is not a valid pathspec")] EmptyString, #[error("Found {keyword:?} in signature, which is not a valid keyword")] InvalidKeyword { keyword: BString }, #[error("Unimplemented short keyword: {short_keyword:?}")] Unimplemented { short_keyword: char }, #[error("Missing ')' at the end of pathspec signature")] MissingClosingParenthesis, #[error("Attribute has non-ascii characters or starts with '-': {attribute:?}")] InvalidAttribute { attribute: BString }, #[error("Invalid character in attribute value: {character:?}")] InvalidAttributeValue { character: char }, #[error("Escape character '\\' is not allowed as the last character in an attribute value")] TrailingEscapeCharacter, #[error("Attribute specification cannot be empty")] EmptyAttribute, #[error("Only one attribute specification is allowed in the same pathspec")] MultipleAttributeSpecifications, #[error("'literal' and 'glob' keywords cannot be used together in the same pathspec")] IncompatibleSearchModes, } impl Pattern { /// Try to parse a path-spec pattern from the given `input` bytes. pub fn from_bytes( input: &[u8], Defaults { signature, search_mode, literal, }: Defaults, ) -> Result { if input.is_empty() { return Err(Error::EmptyString); } if literal { return Ok(Self::from_literal(input, signature)); } if input.as_bstr() == ":" { return Ok(Pattern { nil: true, ..Default::default() }); } let mut p = Pattern { signature, search_mode: SearchMode::default(), ..Default::default() }; let mut cursor = 0; if input.first() == Some(&b':') { cursor += 1; p.signature |= parse_short_keywords(input, &mut cursor)?; if let Some(b'(') = input.get(cursor) { cursor += 1; parse_long_keywords(input, &mut p, &mut cursor)?; } } if search_mode != Default::default() && p.search_mode == Default::default() { p.search_mode = search_mode; } let mut path = &input[cursor..]; if path.last() == Some(&b'/') { p.signature |= MagicSignature::MUST_BE_DIR; path = &path[..path.len() - 1]; } p.path = path.into(); Ok(p) } /// Take `input` literally without parsing anything. This will also set our mode to `literal` to allow this pathspec to match `input` verbatim, and /// use `default_signature` as magic signature. pub fn from_literal(input: &[u8], default_signature: MagicSignature) -> Self { Pattern { path: input.into(), signature: default_signature, search_mode: SearchMode::Literal, ..Default::default() } } } fn parse_short_keywords(input: &[u8], cursor: &mut usize) -> Result { let unimplemented_chars = b"\"#%&'-',;<=>@_`~"; let mut signature = MagicSignature::empty(); while let Some(&b) = input.get(*cursor) { *cursor += 1; signature |= match b { b'/' => MagicSignature::TOP, b'^' | b'!' => MagicSignature::EXCLUDE, b':' => break, _ if unimplemented_chars.contains(&b) => { return Err(Error::Unimplemented { short_keyword: b.into(), }); } _ => { *cursor -= 1; break; } } } Ok(signature) } fn parse_long_keywords(input: &[u8], p: &mut Pattern, cursor: &mut usize) -> Result<(), Error> { let end = input.find(")").ok_or(Error::MissingClosingParenthesis)?; let input = &input[*cursor..end]; *cursor = end + 1; if input.is_empty() { return Ok(()); } split_on_non_escaped_char(input, b',', |keyword| { let attr_prefix = b"attr:"; match keyword { b"attr" => {} b"top" => p.signature |= MagicSignature::TOP, b"icase" => p.signature |= MagicSignature::ICASE, b"exclude" => p.signature |= MagicSignature::EXCLUDE, b"literal" => match p.search_mode { SearchMode::PathAwareGlob => return Err(Error::IncompatibleSearchModes), _ => p.search_mode = SearchMode::Literal, }, b"glob" => match p.search_mode { SearchMode::Literal => return Err(Error::IncompatibleSearchModes), _ => p.search_mode = SearchMode::PathAwareGlob, }, _ if keyword.starts_with(attr_prefix) => { if p.attributes.is_empty() { p.attributes = parse_attributes(&keyword[attr_prefix.len()..])?; } else { return Err(Error::MultipleAttributeSpecifications); } } _ => { return Err(Error::InvalidKeyword { keyword: BString::from(keyword), }); } }; Ok(()) }) } fn split_on_non_escaped_char( input: &[u8], split_char: u8, mut f: impl FnMut(&[u8]) -> Result<(), Error>, ) -> Result<(), Error> { let mut i = 0; let mut last = 0; for window in input.windows(2) { i += 1; if window[0] != b'\\' && window[1] == split_char { let keyword = &input[last..i]; f(keyword)?; last = i + 1; } } let last_keyword = &input[last..]; f(last_keyword) } fn parse_attributes(input: &[u8]) -> Result, Error> { if input.is_empty() { return Err(Error::EmptyAttribute); } let unescaped = unescape_attribute_values(input.into())?; gix_attributes::parse::Iter::new(unescaped.as_bstr()) .map(|res| res.map(gix_attributes::AssignmentRef::to_owned)) .collect::, _>>() .map_err(|e| Error::InvalidAttribute { attribute: e.attribute }) } fn unescape_attribute_values(input: &BStr) -> Result, Error> { if !input.contains(&b'=') { return Ok(Cow::Borrowed(input)); } let mut out: Cow<'_, BStr> = Cow::Borrowed("".into()); for attr in input.split(|&c| c == b' ') { let split_point = attr.find_byte(b'=').map_or_else(|| attr.len(), |i| i + 1); let (name, value) = attr.split_at(split_point); if value.contains(&b'\\') { let out = out.to_mut(); out.push_str(name); out.push_str(unescape_and_check_attr_value(value.into())?); out.push(b' '); } else { check_attribute_value(value.as_bstr())?; match out { Cow::Borrowed(_) => { let end = out.len() + attr.len() + 1; out = Cow::Borrowed(&input[0..end.min(input.len())]); } Cow::Owned(_) => { let out = out.to_mut(); out.push_str(name); out.push_str(value); out.push(b' '); } } } } Ok(out) } fn unescape_and_check_attr_value(value: &BStr) -> Result { let mut out = BString::from(Vec::with_capacity(value.len())); let mut bytes = value.iter(); while let Some(mut b) = bytes.next().copied() { if b == b'\\' { b = *bytes.next().ok_or(Error::TrailingEscapeCharacter)?; } out.push(validated_attr_value_byte(b)?); } Ok(out) } fn check_attribute_value(input: &BStr) -> Result<(), Error> { match input.iter().copied().find(|b| !is_valid_attr_value(*b)) { Some(b) => Err(Error::InvalidAttributeValue { character: b as char }), None => Ok(()), } } fn is_valid_attr_value(byte: u8) -> bool { byte.is_ascii_alphanumeric() || b",-_".contains(&byte) } fn validated_attr_value_byte(byte: u8) -> Result { if is_valid_attr_value(byte) { Ok(byte) } else { Err(Error::InvalidAttributeValue { character: byte as char, }) } }