diff options
Diffstat (limited to 'vendor/gix-pathspec/src/parse.rs')
-rw-r--r-- | vendor/gix-pathspec/src/parse.rs | 263 |
1 files changed, 263 insertions, 0 deletions
diff --git a/vendor/gix-pathspec/src/parse.rs b/vendor/gix-pathspec/src/parse.rs new file mode 100644 index 000000000..9a21511ca --- /dev/null +++ b/vendor/gix-pathspec/src/parse.rs @@ -0,0 +1,263 @@ +use std::borrow::Cow; + +use bstr::{BStr, BString, ByteSlice, ByteVec}; + +use crate::{Defaults, MagicSignature, Pattern, SearchMode}; + +/// The error returned by [parse()][crate::parse()]. +#[derive(thiserror::Error, Debug)] +#[allow(missing_docs)] +pub enum Error { + #[error("An empty string is not a valid pathspec")] + EmptyString, + #[error("Found {keyword:?} in signature, which is not a valid keyword")] + InvalidKeyword { keyword: BString }, + #[error("Unimplemented short keyword: {short_keyword:?}")] + Unimplemented { short_keyword: char }, + #[error("Missing ')' at the end of pathspec signature")] + MissingClosingParenthesis, + #[error("Attribute has non-ascii characters or starts with '-': {attribute:?}")] + InvalidAttribute { attribute: BString }, + #[error("Invalid character in attribute value: {character:?}")] + InvalidAttributeValue { character: char }, + #[error("Escape character '\\' is not allowed as the last character in an attribute value")] + TrailingEscapeCharacter, + #[error("Attribute specification cannot be empty")] + EmptyAttribute, + #[error("Only one attribute specification is allowed in the same pathspec")] + MultipleAttributeSpecifications, + #[error("'literal' and 'glob' keywords cannot be used together in the same pathspec")] + IncompatibleSearchModes, +} + +impl Pattern { + /// Try to parse a path-spec pattern from the given `input` bytes. + pub fn from_bytes( + input: &[u8], + Defaults { + signature, + search_mode, + literal, + }: Defaults, + ) -> Result<Self, Error> { + if input.is_empty() { + return Err(Error::EmptyString); + } + if literal { + return Ok(Self::from_literal(input, signature)); + } + if input.as_bstr() == ":" { + return Ok(Pattern { + nil: true, + ..Default::default() + }); + } + + let mut p = Pattern { + signature, + search_mode: SearchMode::default(), + ..Default::default() + }; + + let mut cursor = 0; + if input.first() == Some(&b':') { + cursor += 1; + p.signature |= parse_short_keywords(input, &mut cursor)?; + if let Some(b'(') = input.get(cursor) { + cursor += 1; + parse_long_keywords(input, &mut p, &mut cursor)?; + } + } + + if search_mode != Default::default() && p.search_mode == Default::default() { + p.search_mode = search_mode; + } + let mut path = &input[cursor..]; + if path.last() == Some(&b'/') { + p.signature |= MagicSignature::MUST_BE_DIR; + path = &path[..path.len() - 1]; + } + p.path = path.into(); + Ok(p) + } + + /// Take `input` literally without parsing anything. This will also set our mode to `literal` to allow this pathspec to match `input` verbatim, and + /// use `default_signature` as magic signature. + pub fn from_literal(input: &[u8], default_signature: MagicSignature) -> Self { + Pattern { + path: input.into(), + signature: default_signature, + search_mode: SearchMode::Literal, + ..Default::default() + } + } +} + +fn parse_short_keywords(input: &[u8], cursor: &mut usize) -> Result<MagicSignature, Error> { + let unimplemented_chars = b"\"#%&'-',;<=>@_`~"; + + let mut signature = MagicSignature::empty(); + while let Some(&b) = input.get(*cursor) { + *cursor += 1; + signature |= match b { + b'/' => MagicSignature::TOP, + b'^' | b'!' => MagicSignature::EXCLUDE, + b':' => break, + _ if unimplemented_chars.contains(&b) => { + return Err(Error::Unimplemented { + short_keyword: b.into(), + }); + } + _ => { + *cursor -= 1; + break; + } + } + } + + Ok(signature) +} + +fn parse_long_keywords(input: &[u8], p: &mut Pattern, cursor: &mut usize) -> Result<(), Error> { + let end = input.find(")").ok_or(Error::MissingClosingParenthesis)?; + + let input = &input[*cursor..end]; + *cursor = end + 1; + + if input.is_empty() { + return Ok(()); + } + + split_on_non_escaped_char(input, b',', |keyword| { + let attr_prefix = b"attr:"; + match keyword { + b"attr" => {} + b"top" => p.signature |= MagicSignature::TOP, + b"icase" => p.signature |= MagicSignature::ICASE, + b"exclude" => p.signature |= MagicSignature::EXCLUDE, + b"literal" => match p.search_mode { + SearchMode::PathAwareGlob => return Err(Error::IncompatibleSearchModes), + _ => p.search_mode = SearchMode::Literal, + }, + b"glob" => match p.search_mode { + SearchMode::Literal => return Err(Error::IncompatibleSearchModes), + _ => p.search_mode = SearchMode::PathAwareGlob, + }, + _ if keyword.starts_with(attr_prefix) => { + if p.attributes.is_empty() { + p.attributes = parse_attributes(&keyword[attr_prefix.len()..])?; + } else { + return Err(Error::MultipleAttributeSpecifications); + } + } + _ => { + return Err(Error::InvalidKeyword { + keyword: BString::from(keyword), + }); + } + }; + Ok(()) + }) +} + +fn split_on_non_escaped_char( + input: &[u8], + split_char: u8, + mut f: impl FnMut(&[u8]) -> Result<(), Error>, +) -> Result<(), Error> { + let mut i = 0; + let mut last = 0; + for window in input.windows(2) { + i += 1; + if window[0] != b'\\' && window[1] == split_char { + let keyword = &input[last..i]; + f(keyword)?; + last = i + 1; + } + } + let last_keyword = &input[last..]; + f(last_keyword) +} + +fn parse_attributes(input: &[u8]) -> Result<Vec<gix_attributes::Assignment>, Error> { + if input.is_empty() { + return Err(Error::EmptyAttribute); + } + + let unescaped = unescape_attribute_values(input.into())?; + + gix_attributes::parse::Iter::new(unescaped.as_bstr()) + .map(|res| res.map(gix_attributes::AssignmentRef::to_owned)) + .collect::<Result<Vec<_>, _>>() + .map_err(|e| Error::InvalidAttribute { attribute: e.attribute }) +} + +fn unescape_attribute_values(input: &BStr) -> Result<Cow<'_, BStr>, Error> { + if !input.contains(&b'=') { + return Ok(Cow::Borrowed(input)); + } + + let mut out: Cow<'_, BStr> = Cow::Borrowed("".into()); + + for attr in input.split(|&c| c == b' ') { + let split_point = attr.find_byte(b'=').map_or_else(|| attr.len(), |i| i + 1); + let (name, value) = attr.split_at(split_point); + + if value.contains(&b'\\') { + let out = out.to_mut(); + out.push_str(name); + out.push_str(unescape_and_check_attr_value(value.into())?); + out.push(b' '); + } else { + check_attribute_value(value.as_bstr())?; + match out { + Cow::Borrowed(_) => { + let end = out.len() + attr.len() + 1; + out = Cow::Borrowed(&input[0..end.min(input.len())]); + } + Cow::Owned(_) => { + let out = out.to_mut(); + out.push_str(name); + out.push_str(value); + out.push(b' '); + } + } + } + } + + Ok(out) +} + +fn unescape_and_check_attr_value(value: &BStr) -> Result<BString, Error> { + let mut out = BString::from(Vec::with_capacity(value.len())); + let mut bytes = value.iter(); + while let Some(mut b) = bytes.next().copied() { + if b == b'\\' { + b = *bytes.next().ok_or(Error::TrailingEscapeCharacter)?; + } + + out.push(validated_attr_value_byte(b)?); + } + Ok(out) +} + +fn check_attribute_value(input: &BStr) -> Result<(), Error> { + match input.iter().copied().find(|b| !is_valid_attr_value(*b)) { + Some(b) => Err(Error::InvalidAttributeValue { character: b as char }), + None => Ok(()), + } +} + +fn is_valid_attr_value(byte: u8) -> bool { + byte.is_ascii_alphanumeric() || b",-_".contains(&byte) +} + +fn validated_attr_value_byte(byte: u8) -> Result<u8, Error> { + if is_valid_attr_value(byte) { + Ok(byte) + } else { + Err(Error::InvalidAttributeValue { + character: byte as char, + }) + } +} |