diff options
Diffstat (limited to 'vendor/gix-config/src/parse')
-rw-r--r-- | vendor/gix-config/src/parse/event.rs | 2 | ||||
-rw-r--r-- | vendor/gix-config/src/parse/events.rs | 36 | ||||
-rw-r--r-- | vendor/gix-config/src/parse/key.rs | 3 | ||||
-rw-r--r-- | vendor/gix-config/src/parse/mod.rs | 2 | ||||
-rw-r--r-- | vendor/gix-config/src/parse/nom/mod.rs | 560 | ||||
-rw-r--r-- | vendor/gix-config/src/parse/nom/tests.rs | 557 | ||||
-rw-r--r-- | vendor/gix-config/src/parse/section/header.rs | 2 |
7 files changed, 523 insertions, 639 deletions
diff --git a/vendor/gix-config/src/parse/event.rs b/vendor/gix-config/src/parse/event.rs index b7b96934d..f528e2077 100644 --- a/vendor/gix-config/src/parse/event.rs +++ b/vendor/gix-config/src/parse/event.rs @@ -33,7 +33,7 @@ impl Event<'_> { /// Stream ourselves to the given `out`, in order to reproduce this event mostly losslessly /// as it was parsed. - pub fn write_to(&self, mut out: impl std::io::Write) -> std::io::Result<()> { + pub fn write_to(&self, mut out: &mut dyn std::io::Write) -> std::io::Result<()> { match self { Self::ValueNotDone(e) => { out.write_all(e.as_ref())?; diff --git a/vendor/gix-config/src/parse/events.rs b/vendor/gix-config/src/parse/events.rs index 24bb45253..f3f527500 100644 --- a/vendor/gix-config/src/parse/events.rs +++ b/vendor/gix-config/src/parse/events.rs @@ -10,26 +10,26 @@ use crate::{ /// A type store without allocation all events that are typically preceding the first section. pub type FrontMatterEvents<'a> = SmallVec<[Event<'a>; 8]>; -/// A zero-copy `gix-config` file parser. +/// A zero-copy `git-config` file parser. /// -/// This is parser exposes low-level syntactic events from a `gix-config` file. +/// This is parser exposes low-level syntactic events from a `git-config` file. /// Generally speaking, you'll want to use [`File`] as it wraps -/// around the parser to provide a higher-level abstraction to a `gix-config` +/// around the parser to provide a higher-level abstraction to a `git-config` /// file, including querying, modifying, and updating values. /// /// This parser guarantees that the events emitted are sufficient to -/// reconstruct a `gix-config` file identical to the source `gix-config` +/// reconstruct a `git-config` file identical to the source `git-config` /// when writing it. /// /// # Differences between a `.ini` parser /// -/// While the `gix-config` format closely resembles the [`.ini` file format], +/// While the `git-config` format closely resembles the [`.ini` file format], /// there are subtle differences that make them incompatible. For one, the file /// format is not well defined, and there exists no formal specification to /// adhere to. /// /// For concrete examples, some notable differences are: -/// - `gix-config` sections permit subsections via either a quoted string +/// - `git-config` sections permit subsections via either a quoted string /// (`[some-section "subsection"]`) or via the deprecated dot notation /// (`[some-section.subsection]`). Successful parsing these section names is not /// well defined in typical `.ini` parsers. This parser will handle these cases @@ -45,7 +45,7 @@ pub type FrontMatterEvents<'a> = SmallVec<[Event<'a>; 8]>; /// which should be interpreted as `5hello world` after /// [normalization][crate::value::normalize()]. /// - Line continuations via a `\` character is supported (inside or outside of quotes) -/// - Whitespace handling similarly follows the `gix-config` specification as +/// - Whitespace handling similarly follows the `git-config` specification as /// closely as possible, where excess whitespace after a non-quoted value are /// trimmed, and line continuations onto a new line with excess spaces are kept. /// - Only equal signs (optionally padded by spaces) are valid name/value @@ -74,7 +74,7 @@ pub type FrontMatterEvents<'a> = SmallVec<[Event<'a>; 8]>; /// /// ## `Value` events do not immediately follow `Key` events /// -/// Consider the following `gix-config` example: +/// Consider the following `git-config` example: /// /// ```text /// [core] @@ -110,7 +110,7 @@ pub type FrontMatterEvents<'a> = SmallVec<[Event<'a>; 8]>; /// /// ## `KeyValueSeparator` event is not guaranteed to emit /// -/// Consider the following `gix-config` example: +/// Consider the following `git-config` example: /// /// ```text /// [core] @@ -139,7 +139,7 @@ pub type FrontMatterEvents<'a> = SmallVec<[Event<'a>; 8]>; /// /// ## Quoted values are not unquoted /// -/// Consider the following `gix-config` example: +/// Consider the following `git-config` example: /// /// ```text /// [core] @@ -175,7 +175,7 @@ pub type FrontMatterEvents<'a> = SmallVec<[Event<'a>; 8]>; /// /// ## Whitespace after line continuations are part of the value /// -/// Consider the following `gix-config` example: +/// Consider the following `git-config` example: /// /// ```text /// [some-section] @@ -183,7 +183,7 @@ pub type FrontMatterEvents<'a> = SmallVec<[Event<'a>; 8]>; /// c /// ``` /// -/// Because how `gix-config` treats continuations, the whitespace preceding `c` +/// Because how `git-config` treats continuations, the whitespace preceding `c` /// are in fact part of the value of `file`. The fully interpreted key/value /// pair is actually `file=a c`. As a result, the parser will provide this /// split value accordingly: @@ -208,7 +208,7 @@ pub type FrontMatterEvents<'a> = SmallVec<[Event<'a>; 8]>; /// /// [`File`]: crate::File /// [`.ini` file format]: https://en.wikipedia.org/wiki/INI_file -/// [`git`'s documentation]: https://git-scm.com/docs/gix-config#_configuration_file +/// [`git`'s documentation]: https://git-scm.com/docs/git-config#_configuration_file /// [`FromStr`]: std::str::FromStr /// [`From<&'_ str>`]: std::convert::From #[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default)] @@ -229,19 +229,19 @@ impl Events<'static> { input: &'a [u8], filter: Option<fn(&Event<'a>) -> bool>, ) -> Result<Events<'static>, parse::Error> { - from_bytes(input, |e| e.to_owned(), filter) + from_bytes(input, &|e| e.to_owned(), filter) } } impl<'a> Events<'a> { /// Attempt to zero-copy parse the provided bytes. On success, returns a /// [`Events`] that provides methods to accessing leading comments and sections - /// of a `gix-config` file and can be converted into an iterator of [`Event`] + /// of a `git-config` file and can be converted into an iterator of [`Event`] /// for higher level processing. /// /// Use `filter` to only include those events for which it returns true. pub fn from_bytes(input: &'a [u8], filter: Option<fn(&Event<'a>) -> bool>) -> Result<Events<'a>, parse::Error> { - from_bytes(input, std::convert::identity, filter) + from_bytes(input, &std::convert::identity, filter) } /// Attempt to zero-copy parse the provided `input` string. @@ -288,14 +288,14 @@ impl<'a> TryFrom<&'a [u8]> for Events<'a> { fn from_bytes<'a, 'b>( input: &'a [u8], - convert: impl Fn(Event<'a>) -> Event<'b>, + convert: &dyn Fn(Event<'a>) -> Event<'b>, filter: Option<fn(&Event<'a>) -> bool>, ) -> Result<Events<'b>, parse::Error> { let mut header = None; let mut events = section::Events::default(); let mut frontmatter = FrontMatterEvents::default(); let mut sections = Vec::new(); - parse::from_bytes(input, |e: Event<'_>| match e { + parse::from_bytes(input, &mut |e: Event<'_>| match e { Event::SectionHeader(next_header) => { match header.take() { None => { diff --git a/vendor/gix-config/src/parse/key.rs b/vendor/gix-config/src/parse/key.rs index b0e0376be..0ebb09e5f 100644 --- a/vendor/gix-config/src/parse/key.rs +++ b/vendor/gix-config/src/parse/key.rs @@ -14,8 +14,7 @@ pub struct Key<'a> { /// Parse `input` like `core.bare` or `remote.origin.url` as a `Key` to make its fields available, /// or `None` if there were not at least 2 tokens separated by `.`. /// Note that `input` isn't validated, and is `str` as ascii is a subset of UTF-8 which is required for any valid keys. -pub fn parse_unvalidated<'a>(input: impl Into<&'a BStr>) -> Option<Key<'a>> { - let input = input.into(); +pub fn parse_unvalidated(input: &BStr) -> Option<Key<'_>> { let mut tokens = input.splitn(2, |b| *b == b'.'); let section_name = tokens.next()?; let subsection_or_key = tokens.next()?; diff --git a/vendor/gix-config/src/parse/mod.rs b/vendor/gix-config/src/parse/mod.rs index e943a22b4..e11bbc1e3 100644 --- a/vendor/gix-config/src/parse/mod.rs +++ b/vendor/gix-config/src/parse/mod.rs @@ -1,4 +1,4 @@ -//! This module handles parsing a `gix-config` file. Generally speaking, you +//! This module handles parsing a `git-config` file. Generally speaking, you //! want to use a higher abstraction such as [`File`] unless you have some //! explicit reason to work with events instead. //! diff --git a/vendor/gix-config/src/parse/nom/mod.rs b/vendor/gix-config/src/parse/nom/mod.rs index 11d1dea6b..3ae45618d 100644 --- a/vendor/gix-config/src/parse/nom/mod.rs +++ b/vendor/gix-config/src/parse/nom/mod.rs @@ -1,38 +1,38 @@ use std::borrow::Cow; -use bstr::{BStr, BString, ByteSlice, ByteVec}; -use nom::{ - branch::alt, - bytes::complete::{tag, take_till, take_while}, - character::{ - complete::{char, one_of}, - is_space, - }, - combinator::{map, opt}, - error::{Error as NomError, ErrorKind}, - multi::{fold_many0, fold_many1}, - sequence::delimited, - IResult, +use bstr::{BStr, ByteSlice}; +use winnow::{ + combinator::{alt, delimited, fold_repeat, opt, preceded, repeat}, + error::{ErrorKind, InputError as NomError, ParserError as _}, + prelude::*, + stream::{Offset as _, Stream as _}, + token::{one_of, take_till0, take_while}, }; use crate::parse::{error::ParseNode, section, Comment, Error, Event}; /// Attempt to zero-copy parse the provided bytes, passing results to `dispatch`. -pub fn from_bytes<'a>(input: &'a [u8], mut dispatch: impl FnMut(Event<'a>)) -> Result<(), Error> { +pub fn from_bytes<'i>(mut input: &'i [u8], dispatch: &mut dyn FnMut(Event<'i>)) -> Result<(), Error> { + let start = input.checkpoint(); + let bom = unicode_bom::Bom::from(input); - let mut newlines = 0; - let (i, _) = fold_many0( + input.next_slice(bom.len()); + + fold_repeat( + 0.., alt(( - map(comment, Event::Comment), - map(take_spaces, |whitespace| Event::Whitespace(Cow::Borrowed(whitespace))), - map(take_newlines, |(newline, counter)| { - newlines += counter; - Event::Newline(Cow::Borrowed(newline)) - }), + comment.map(Event::Comment), + take_spaces1.map(|whitespace| Event::Whitespace(Cow::Borrowed(whitespace))), + |i: &mut &'i [u8]| { + let newline = take_newlines1.parse_next(i)?; + let o = Event::Newline(Cow::Borrowed(newline)); + Ok(o) + }, )), || (), |_acc, event| dispatch(event), - )(&input[bom.len()..]) + ) + .parse_next(&mut input) // I don't think this can panic. many0 errors if the child parser returns // a success where the input was not consumed, but alt will only return Ok // if one of its children succeed. However, all of it's children are @@ -40,108 +40,101 @@ pub fn from_bytes<'a>(input: &'a [u8], mut dispatch: impl FnMut(Event<'a>)) -> R // can never occur. .expect("many0(alt(...)) panicked. Likely a bug in one of the children parsers."); - if i.is_empty() { + if input.is_empty() { return Ok(()); } let mut node = ParseNode::SectionHeader; - let res = fold_many1( - |i| section(i, &mut node, &mut dispatch), - || (), - |_acc, additional_newlines| { - newlines += additional_newlines; - }, - )(i); - let (i, _) = res.map_err(|_| Error { - line_number: newlines, - last_attempted_parser: node, - parsed_until: i.as_bstr().into(), + let res = repeat(1.., |i: &mut &'i [u8]| section(i, &mut node, dispatch)) + .map(|()| ()) + .parse_next(&mut input); + res.map_err(|_| { + let newlines = newlines_from(input, start); + Error { + line_number: newlines, + last_attempted_parser: node, + parsed_until: input.as_bstr().into(), + } })?; // This needs to happen after we collect sections, otherwise the line number // will be off. - if !i.is_empty() { + if !input.is_empty() { + let newlines = newlines_from(input, start); return Err(Error { line_number: newlines, last_attempted_parser: node, - parsed_until: i.as_bstr().into(), + parsed_until: input.as_bstr().into(), }); } Ok(()) } -fn comment(i: &[u8]) -> IResult<&[u8], Comment<'_>> { - let (i, comment_tag) = one_of(";#")(i)?; - let (i, comment) = take_till(|c| c == b'\n')(i)?; - Ok(( - i, - Comment { - tag: comment_tag as u8, - text: Cow::Borrowed(comment.as_bstr()), - }, - )) +fn newlines_from(input: &[u8], start: winnow::stream::Checkpoint<&[u8]>) -> usize { + let offset = input.offset_from(&start); + let mut start_input = input; + start_input.reset(start); + start_input.next_slice(offset).iter().filter(|c| **c == b'\n').count() +} + +fn comment<'i>(i: &mut &'i [u8]) -> PResult<Comment<'i>, NomError<&'i [u8]>> { + ( + one_of([';', '#']), + take_till0(|c| c == b'\n').map(|text: &[u8]| Cow::Borrowed(text.as_bstr())), + ) + .map(|(tag, text)| Comment { tag, text }) + .parse_next(i) } #[cfg(test)] mod tests; -fn section<'a>(i: &'a [u8], node: &mut ParseNode, dispatch: &mut impl FnMut(Event<'a>)) -> IResult<&'a [u8], usize> { - let (mut i, header) = section_header(i)?; +fn section<'i>( + i: &mut &'i [u8], + node: &mut ParseNode, + dispatch: &mut dyn FnMut(Event<'i>), +) -> PResult<(), NomError<&'i [u8]>> { + let start = i.checkpoint(); + let header = section_header(i).map_err(|e| { + i.reset(start); + e + })?; dispatch(Event::SectionHeader(header)); - let mut newlines = 0; - // This would usually be a many0(alt(...)), the manual loop allows us to // optimize vec insertions loop { - let old_i = i; + let start = i.checkpoint(); - if let Ok((new_i, v)) = take_spaces(i) { - if old_i != new_i { - i = new_i; - dispatch(Event::Whitespace(Cow::Borrowed(v.as_bstr()))); - } + if let Some(v) = opt(take_spaces1).parse_next(i)? { + dispatch(Event::Whitespace(Cow::Borrowed(v.as_bstr()))); } - if let Ok((new_i, (v, new_newlines))) = take_newlines(i) { - if old_i != new_i { - i = new_i; - newlines += new_newlines; - dispatch(Event::Newline(Cow::Borrowed(v.as_bstr()))); - } + if let Some(v) = opt(take_newlines1).parse_next(i)? { + dispatch(Event::Newline(Cow::Borrowed(v.as_bstr()))); } - if let Ok((new_i, new_newlines)) = key_value_pair(i, node, dispatch) { - if old_i != new_i { - i = new_i; - newlines += new_newlines; - } - } + key_value_pair(i, node, dispatch)?; - if let Ok((new_i, comment)) = comment(i) { - if old_i != new_i { - i = new_i; - dispatch(Event::Comment(comment)); - } + if let Some(comment) = opt(comment).parse_next(i)? { + dispatch(Event::Comment(comment)); } - if old_i == i { + if i.offset_from(&start) == 0 { break; } } - Ok((i, newlines)) + Ok(()) } -fn section_header(i: &[u8]) -> IResult<&[u8], section::Header<'_>> { - let (i, _) = char('[')(i)?; +fn section_header<'i>(i: &mut &'i [u8]) -> PResult<section::Header<'i>, NomError<&'i [u8]>> { // No spaces must be between section name and section start - let (i, name) = take_while(|c: u8| c.is_ascii_alphanumeric() || c == b'-' || c == b'.')(i)?; + let name = preceded('[', take_while(1.., is_section_char).map(bstr::ByteSlice::as_bstr)).parse_next(i)?; - let name = name.as_bstr(); - if let Ok((i, _)) = char::<_, NomError<&[u8]>>(']')(i) { + if opt(one_of::<_, _, NomError<&[u8]>>(']')).parse_next(i)?.is_some() { // Either section does not have a subsection or using deprecated // subsection syntax at this point. let header = match memchr::memrchr(b'.', name.as_bytes()) { @@ -158,303 +151,226 @@ fn section_header(i: &[u8]) -> IResult<&[u8], section::Header<'_>> { }; if header.name.is_empty() { - return Err(nom::Err::Error(NomError { - input: i, - code: ErrorKind::NoneOf, - })); + return Err(winnow::error::ErrMode::from_error_kind(i, ErrorKind::Fail)); } - return Ok((i, header)); + return Ok(header); } // Section header must be using modern subsection syntax at this point. - - let (i, whitespace) = take_spaces(i)?; - let (i, subsection_name) = delimited(char('"'), opt(sub_section), tag("\"]"))(i)?; - - Ok(( - i, - section::Header { + (take_spaces1, delimited('"', opt(sub_section), "\"]")) + .map(|(whitespace, subsection_name)| section::Header { name: section::Name(Cow::Borrowed(name)), separator: Some(Cow::Borrowed(whitespace)), subsection_name, - }, - )) + }) + .parse_next(i) } -fn sub_section(i: &[u8]) -> IResult<&[u8], Cow<'_, BStr>> { - let (rest, (found_escape, consumed)) = sub_section_delegate(i, &mut |_| ())?; - if found_escape { - let mut buf = BString::default(); - sub_section_delegate(i, &mut |b| buf.push_byte(b)).map(|(i, _)| (i, buf.into())) - } else { - Ok((rest, i[..consumed].as_bstr().into())) - } +fn is_section_char(c: u8) -> bool { + c.is_ascii_alphanumeric() || c == b'-' || c == b'.' } -fn sub_section_delegate<'a>(i: &'a [u8], push_byte: &mut dyn FnMut(u8)) -> IResult<&'a [u8], (bool, usize)> { - let mut cursor = 0; - let mut bytes = i.iter().copied(); - let mut found_terminator = false; - let mut found_escape = false; - while let Some(mut b) = bytes.next() { - cursor += 1; - if b == b'\n' || b == 0 { - return Err(nom::Err::Error(NomError { - input: &i[cursor..], - code: ErrorKind::NonEmpty, - })); - } - if b == b'"' { - found_terminator = true; - break; - } - if b == b'\\' { - b = bytes.next().ok_or_else(|| { - nom::Err::Error(NomError { - input: &i[cursor..], - code: ErrorKind::NonEmpty, - }) - })?; - found_escape = true; - cursor += 1; - if b == b'\n' { - return Err(nom::Err::Error(NomError { - input: &i[cursor..], - code: ErrorKind::NonEmpty, - })); - } - } - push_byte(b); +fn sub_section<'i>(i: &mut &'i [u8]) -> PResult<Cow<'i, BStr>, NomError<&'i [u8]>> { + let mut output = Cow::Borrowed(Default::default()); + if let Some(sub) = opt(subsection_subset).parse_next(i)? { + output = Cow::Borrowed(sub.as_bstr()); } - - if !found_terminator { - return Err(nom::Err::Error(NomError { - input: &i[cursor..], - code: ErrorKind::NonEmpty, - })); + while let Some(sub) = opt(subsection_subset).parse_next(i)? { + output.to_mut().extend(sub); } - Ok((&i[cursor - 1..], (found_escape, cursor - 1))) + Ok(output) +} + +fn subsection_subset<'i>(i: &mut &'i [u8]) -> PResult<&'i [u8], NomError<&'i [u8]>> { + alt((subsection_unescaped, subsection_escaped_char)).parse_next(i) +} + +fn subsection_unescaped<'i>(i: &mut &'i [u8]) -> PResult<&'i [u8], NomError<&'i [u8]>> { + take_while(1.., is_subsection_unescaped_char).parse_next(i) +} + +fn subsection_escaped_char<'i>(i: &mut &'i [u8]) -> PResult<&'i [u8], NomError<&'i [u8]>> { + preceded('\\', one_of(is_subsection_escapeable_char).recognize()).parse_next(i) } -fn key_value_pair<'a>( - i: &'a [u8], +fn is_subsection_escapeable_char(c: u8) -> bool { + c != b'\n' +} + +fn is_subsection_unescaped_char(c: u8) -> bool { + c != b'"' && c != b'\\' && c != b'\n' && c != 0 +} + +fn key_value_pair<'i>( + i: &mut &'i [u8], node: &mut ParseNode, - dispatch: &mut impl FnMut(Event<'a>), -) -> IResult<&'a [u8], usize> { + dispatch: &mut dyn FnMut(Event<'i>), +) -> PResult<(), NomError<&'i [u8]>> { *node = ParseNode::Name; - let (i, name) = config_name(i)?; + if let Some(name) = opt(config_name).parse_next(i)? { + dispatch(Event::SectionKey(section::Key(Cow::Borrowed(name)))); - dispatch(Event::SectionKey(section::Key(Cow::Borrowed(name)))); + if let Some(whitespace) = opt(take_spaces1).parse_next(i)? { + dispatch(Event::Whitespace(Cow::Borrowed(whitespace))); + } - let (i, whitespace) = opt(take_spaces)(i)?; - if let Some(whitespace) = whitespace { - dispatch(Event::Whitespace(Cow::Borrowed(whitespace))); + *node = ParseNode::Value; + config_value(i, dispatch) + } else { + Ok(()) } - - *node = ParseNode::Value; - let (i, newlines) = config_value(i, dispatch)?; - Ok((i, newlines)) } /// Parses the config name of a config pair. Assumes the input has already been /// trimmed of any leading whitespace. -fn config_name(i: &[u8]) -> IResult<&[u8], &BStr> { - if i.is_empty() { - return Err(nom::Err::Error(NomError { - input: i, - code: ErrorKind::NonEmpty, - })); - } - - if !i[0].is_ascii_alphabetic() { - return Err(nom::Err::Error(NomError { - input: i, - code: ErrorKind::Alpha, - })); - } - - let (i, name) = take_while(|c: u8| c.is_ascii_alphanumeric() || c == b'-')(i)?; - Ok((i, name.as_bstr())) +fn config_name<'i>(i: &mut &'i [u8]) -> PResult<&'i BStr, NomError<&'i [u8]>> { + ( + one_of(|c: u8| c.is_ascii_alphabetic()), + take_while(0.., |c: u8| c.is_ascii_alphanumeric() || c == b'-'), + ) + .recognize() + .map(bstr::ByteSlice::as_bstr) + .parse_next(i) } -fn config_value<'a>(i: &'a [u8], dispatch: &mut impl FnMut(Event<'a>)) -> IResult<&'a [u8], usize> { - if let (i, Some(_)) = opt(char('='))(i)? { +fn config_value<'i>(i: &mut &'i [u8], dispatch: &mut dyn FnMut(Event<'i>)) -> PResult<(), NomError<&'i [u8]>> { + if opt('=').parse_next(i)?.is_some() { dispatch(Event::KeyValueSeparator); - let (i, whitespace) = opt(take_spaces)(i)?; - if let Some(whitespace) = whitespace { + if let Some(whitespace) = opt(take_spaces1).parse_next(i)? { dispatch(Event::Whitespace(Cow::Borrowed(whitespace))); } - let (i, newlines) = value_impl(i, dispatch)?; - Ok((i, newlines)) + value_impl(i, dispatch) } else { // This is a special way of denoting 'empty' values which a lot of code depends on. // Hence, rather to fix this everywhere else, leave it here and fix it where it matters, namely // when it's about differentiating between a missing key-value separator, and one followed by emptiness. dispatch(Event::Value(Cow::Borrowed("".into()))); - Ok((i, 0)) + Ok(()) } } /// Handles parsing of known-to-be values. This function handles both single /// line values as well as values that are continuations. -fn value_impl<'a>(i: &'a [u8], dispatch: &mut impl FnMut(Event<'a>)) -> IResult<&'a [u8], usize> { - let (i, value_end, newlines, mut dispatch) = { - let new_err = |code| nom::Err::Error(NomError { input: i, code }); - let mut value_end = None::<usize>; - let mut value_start: usize = 0; - let mut newlines = 0; - - let mut prev_char_was_backslash = false; - // This is required to ignore comment markers if they're in a quote. - let mut is_in_quotes = false; - // Used to determine if we return a Value or Value{Not,}Done - let mut partial_value_found = false; - let mut last_value_index: usize = 0; - - let mut bytes = i.iter(); - while let Some(mut c) = bytes.next() { - if prev_char_was_backslash { - prev_char_was_backslash = false; - let mut consumed = 1; - if *c == b'\r' { - c = bytes.next().ok_or_else(|| new_err(ErrorKind::Escaped))?; - if *c != b'\n' { - return Err(new_err(ErrorKind::Tag)); - } - consumed += 1; - } +fn value_impl<'i>(i: &mut &'i [u8], dispatch: &mut dyn FnMut(Event<'i>)) -> PResult<(), NomError<&'i [u8]>> { + let start_checkpoint = i.checkpoint(); + let mut value_start_checkpoint = i.checkpoint(); + let mut value_end = None; - match c { - b'\n' => { - partial_value_found = true; - let backslash = 1; - dispatch(Event::ValueNotDone(Cow::Borrowed( - i[value_start..last_value_index - backslash].as_bstr(), - ))); - let nl_end = last_value_index + consumed; - dispatch(Event::Newline(Cow::Borrowed(i[last_value_index..nl_end].as_bstr()))); - value_start = nl_end; - value_end = None; - newlines += 1; - - last_value_index += consumed; - } - b'n' | b't' | b'\\' | b'b' | b'"' => { - last_value_index += 1; - } - _ => { - return Err(new_err(ErrorKind::Escaped)); - } + // This is required to ignore comment markers if they're in a quote. + let mut is_in_quotes = false; + // Used to determine if we return a Value or Value{Not,}Done + let mut partial_value_found = false; + + loop { + let _ = take_while(0.., |c| !matches!(c, b'\n' | b'\\' | b'"' | b';' | b'#')).parse_next(i)?; + if let Some(c) = i.next_token() { + match c { + b'\n' => { + value_end = Some(i.offset_from(&value_start_checkpoint) - 1); + break; } - } else { - match c { - b'\n' => { - value_end = last_value_index.into(); - break; - } - b';' | b'#' if !is_in_quotes => { - value_end = last_value_index.into(); - break; - } - b'\\' => prev_char_was_backslash = true, - b'"' => is_in_quotes = !is_in_quotes, - _ => {} + b';' | b'#' if !is_in_quotes => { + value_end = Some(i.offset_from(&value_start_checkpoint) - 1); + break; } - last_value_index += 1; - } - } + b'\\' => { + let escaped_index = i.offset_from(&value_start_checkpoint); + let escape_index = escaped_index - 1; + let Some(mut c) = i.next_token() else { + i.reset(start_checkpoint); + return Err(winnow::error::ErrMode::from_error_kind(i, ErrorKind::Token)); + }; + let mut consumed = 1; + if c == b'\r' { + c = i.next_token().ok_or_else(|| { + i.reset(start_checkpoint); + winnow::error::ErrMode::from_error_kind(i, ErrorKind::Token) + })?; + if c != b'\n' { + i.reset(start_checkpoint); + return Err(winnow::error::ErrMode::from_error_kind(i, ErrorKind::Slice)); + } + consumed += 1; + } - if prev_char_was_backslash { - return Err(new_err(ErrorKind::Escaped)); - } + match c { + b'\n' => { + partial_value_found = true; - if is_in_quotes { - return Err(new_err(ErrorKind::Tag)); - } + i.reset(value_start_checkpoint); + + let value = i.next_slice(escape_index).as_bstr(); + dispatch(Event::ValueNotDone(Cow::Borrowed(value))); + + i.next_token(); - let value_end = match value_end { - None => { - if last_value_index == 0 { - dispatch(Event::Value(Cow::Borrowed("".into()))); - return Ok((&i[0..], newlines)); - } else { - i.len() + let nl = i.next_slice(consumed).as_bstr(); + dispatch(Event::Newline(Cow::Borrowed(nl))); + + value_start_checkpoint = i.checkpoint(); + value_end = None; + } + b'n' | b't' | b'\\' | b'b' | b'"' => {} + _ => { + i.reset(start_checkpoint); + return Err(winnow::error::ErrMode::from_error_kind(i, ErrorKind::Token)); + } + } } + b'"' => is_in_quotes = !is_in_quotes, + _ => {} } - Some(idx) => idx, - }; + } else { + break; + } + } + if is_in_quotes { + i.reset(start_checkpoint); + return Err(winnow::error::ErrMode::from_error_kind(i, ErrorKind::Slice)); + } - let dispatch = move |value: &'a [u8]| { - if partial_value_found { - dispatch(Event::ValueDone(Cow::Borrowed(value.as_bstr()))); + let value_end = match value_end { + None => { + let last_value_index = i.offset_from(&value_start_checkpoint); + if last_value_index == 0 { + dispatch(Event::Value(Cow::Borrowed("".into()))); + return Ok(()); } else { - dispatch(Event::Value(Cow::Borrowed(value.as_bstr()))); + last_value_index } - }; - (&i[value_start..], value_end - value_start, newlines, dispatch) - }; - - let (i, remainder_value) = { - let value_end_no_trailing_whitespace = i[..value_end] - .iter() - .enumerate() - .rev() - .find_map(|(idx, b)| (!b.is_ascii_whitespace()).then_some(idx + 1)) - .unwrap_or(0); - ( - &i[value_end_no_trailing_whitespace..], - &i[..value_end_no_trailing_whitespace], - ) + } + Some(idx) => idx, }; - dispatch(remainder_value); + i.reset(value_start_checkpoint); + let value_end_no_trailing_whitespace = i[..value_end] + .iter() + .enumerate() + .rev() + .find_map(|(idx, b)| (!b.is_ascii_whitespace()).then_some(idx + 1)) + .unwrap_or(0); + let remainder_value = i.next_slice(value_end_no_trailing_whitespace); + + if partial_value_found { + dispatch(Event::ValueDone(Cow::Borrowed(remainder_value.as_bstr()))); + } else { + dispatch(Event::Value(Cow::Borrowed(remainder_value.as_bstr()))); + } - Ok((i, newlines)) + Ok(()) } -fn take_spaces(i: &[u8]) -> IResult<&[u8], &BStr> { - let (i, v) = take_while(|c: u8| c.is_ascii() && is_space(c))(i)?; - if v.is_empty() { - Err(nom::Err::Error(NomError { - input: i, - code: ErrorKind::Eof, - })) - } else { - Ok((i, v.as_bstr())) - } +fn take_spaces1<'i>(i: &mut &'i [u8]) -> PResult<&'i BStr, NomError<&'i [u8]>> { + take_while(1.., winnow::stream::AsChar::is_space) + .map(bstr::ByteSlice::as_bstr) + .parse_next(i) } -fn take_newlines(i: &[u8]) -> IResult<&[u8], (&BStr, usize)> { - let mut counter = 0; - let mut consumed_bytes = 0; - let mut next_must_be_newline = false; - for b in i.iter().copied() { - if !b.is_ascii() { - break; - }; - if b == b'\r' { - if next_must_be_newline { - break; - } - next_must_be_newline = true; - continue; - }; - if b == b'\n' { - counter += 1; - consumed_bytes += if next_must_be_newline { 2 } else { 1 }; - next_must_be_newline = false; - } else { - break; - } - } - let (v, i) = i.split_at(consumed_bytes); - if v.is_empty() { - Err(nom::Err::Error(NomError { - input: i, - code: ErrorKind::Eof, - })) - } else { - Ok((i, (v.as_bstr(), counter))) - } +fn take_newlines1<'i>(i: &mut &'i [u8]) -> PResult<&'i BStr, NomError<&'i [u8]>> { + repeat(1.., alt(("\r\n", "\n"))) + .map(|()| ()) + .recognize() + .map(bstr::ByteSlice::as_bstr) + .parse_next(i) } diff --git a/vendor/gix-config/src/parse/nom/tests.rs b/vendor/gix-config/src/parse/nom/tests.rs index f6e8c3d92..d9679222f 100644 --- a/vendor/gix-config/src/parse/nom/tests.rs +++ b/vendor/gix-config/src/parse/nom/tests.rs @@ -1,13 +1,15 @@ use super::*; mod section_headers { + use winnow::prelude::*; + use super::section_header; use crate::parse::tests::util::{fully_consumed, section_header as parsed_section_header}; #[test] fn no_subsection() { assert_eq!( - section_header(b"[hello]").unwrap(), + section_header.parse_peek(b"[hello]").unwrap(), fully_consumed(parsed_section_header("hello", None)), ); } @@ -15,7 +17,7 @@ mod section_headers { #[test] fn modern_subsection() { assert_eq!( - section_header(br#"[hello "world"]"#).unwrap(), + section_header.parse_peek(br#"[hello "world"]"#).unwrap(), fully_consumed(parsed_section_header("hello", (" ", "world"))), ); } @@ -23,7 +25,7 @@ mod section_headers { #[test] fn escaped_subsection() { assert_eq!( - section_header(br#"[hello "foo\\bar\""]"#).unwrap(), + section_header.parse_peek(br#"[hello "foo\\bar\""]"#).unwrap(), fully_consumed(parsed_section_header("hello", (" ", r#"foo\bar""#))), ); } @@ -31,11 +33,11 @@ mod section_headers { #[test] fn deprecated_subsection() { assert_eq!( - section_header(br#"[hello.world]"#).unwrap(), + section_header.parse_peek(br#"[hello.world]"#).unwrap(), fully_consumed(parsed_section_header("hello", (".", "world"))) ); assert_eq!( - section_header(br#"[Hello.World]"#).unwrap(), + section_header.parse_peek(br#"[Hello.World]"#).unwrap(), fully_consumed(parsed_section_header("Hello", (".", "World"))) ); } @@ -43,7 +45,7 @@ mod section_headers { #[test] fn empty_legacy_subsection_name() { assert_eq!( - section_header(br#"[hello-world.]"#).unwrap(), + section_header.parse_peek(br#"[hello-world.]"#).unwrap(), fully_consumed(parsed_section_header("hello-world", (".", ""))) ); } @@ -51,7 +53,7 @@ mod section_headers { #[test] fn empty_modern_subsection_name() { assert_eq!( - section_header(br#"[hello ""]"#).unwrap(), + section_header.parse_peek(br#"[hello ""]"#).unwrap(), fully_consumed(parsed_section_header("hello", (" ", ""))) ); } @@ -59,55 +61,55 @@ mod section_headers { #[test] fn backslashes_in_subsections_do_not_escape_newlines_or_tabs() { assert_eq!( - section_header(br#"[hello "single \ \\ \t \n \0"]"#).unwrap(), - fully_consumed(parsed_section_header("hello", (" ", r#"single \ t n 0"#))) + section_header.parse_peek(br#"[hello "single \ \\ \t \n \0"]"#).unwrap(), + fully_consumed(parsed_section_header("hello", (" ", r"single \ t n 0"))) ); } #[test] fn newline_in_header() { - assert!(section_header(b"[hello\n]").is_err()); + assert!(section_header.parse_peek(b"[hello\n]").is_err()); } #[test] fn newline_in_sub_section() { - assert!(section_header(b"[hello \"hello\n\"]").is_err()); + assert!(section_header.parse_peek(b"[hello \"hello\n\"]").is_err()); } #[test] fn null_byt_in_sub_section() { - assert!(section_header(b"[hello \"hello\0\"]").is_err()); + assert!(section_header.parse_peek(b"[hello \"hello\0\"]").is_err()); } #[test] fn escaped_newline_in_sub_section() { - assert!(section_header(b"[hello \"hello\\\n\"]").is_err()); + assert!(section_header.parse_peek(b"[hello \"hello\\\n\"]").is_err()); } #[test] fn eof_after_escape_in_sub_section() { - assert!(section_header(b"[hello \"hello\\").is_err()); + assert!(section_header.parse_peek(b"[hello \"hello\\").is_err()); } #[test] fn null_byte_in_header() { - assert!(section_header(b"[hello\0]").is_err()); + assert!(section_header.parse_peek(b"[hello\0]").is_err()); } #[test] fn invalid_characters_in_section() { - assert!(section_header(b"[$]").is_err()); + assert!(section_header.parse_peek(b"[$]").is_err()); } #[test] fn invalid_characters_in_legacy_sub_section() { - assert!(section_header(b"[hello.$]").is_err()); - assert!(section_header(b"[hello. world]").is_err()); + assert!(section_header.parse_peek(b"[hello.$]").is_err()); + assert!(section_header.parse_peek(b"[hello. world]").is_err()); } #[test] fn right_brace_in_subsection_name() { assert_eq!( - section_header(br#"[hello "]"]"#).unwrap(), + section_header.parse_peek(br#"[hello "]"]"#).unwrap(), fully_consumed(parsed_section_header("hello", (" ", "]"))) ); } @@ -116,49 +118,51 @@ mod section_headers { mod sub_section { use std::borrow::Cow; + use winnow::prelude::*; + use super::sub_section; #[test] fn zero_copy_simple() { - let actual = sub_section(b"name\"").unwrap().1; + let actual = sub_section.parse_peek(b"name\"").unwrap().1; assert_eq!(actual.as_ref(), "name"); assert!(matches!(actual, Cow::Borrowed(_))); } #[test] fn escapes_need_allocation() { - let actual = sub_section(br#"\x\t\n\0\\\"""#).unwrap().1; + let actual = sub_section.parse_peek(br#"\x\t\n\0\\\"""#).unwrap().1; assert_eq!(actual.as_ref(), r#"xtn0\""#); assert!(matches!(actual, Cow::Owned(_))); } } mod config_name { - use nom::combinator::all_consuming; + use winnow::prelude::*; use super::config_name; use crate::parse::tests::util::fully_consumed; #[test] fn just_name() { - assert_eq!(config_name(b"name").unwrap(), fully_consumed("name".into())); + assert_eq!(config_name.parse_peek(b"name").unwrap(), fully_consumed("name".into())); } #[test] fn must_start_with_alphabetic() { - assert!(config_name(b"4aaa").is_err()); - assert!(config_name(b"-aaa").is_err()); + assert!(config_name.parse_peek(b"4aaa").is_err()); + assert!(config_name.parse_peek(b"-aaa").is_err()); } #[test] fn only_a_subset_of_characters_is_allowed() { - assert!(all_consuming(config_name)(b"Name$_").is_err()); - assert!(all_consuming(config_name)(b"other#").is_err()); + assert!(config_name.parse(b"Name$_").is_err()); + assert!(config_name.parse(b"other#").is_err()); } #[test] fn cannot_be_empty() { - assert!(config_name(b"").is_err()); + assert!(config_name.parse_peek(b"").is_err()); } } @@ -174,28 +178,25 @@ mod section { Event, Section, }; - fn section<'a>(i: &'a [u8], node: &mut ParseNode) -> nom::IResult<&'a [u8], (Section<'a>, usize)> { + fn section<'a>(mut i: &'a [u8], node: &mut ParseNode) -> winnow::IResult<&'a [u8], Section<'a>> { let mut header = None; let mut events = section::Events::default(); - super::section(i, node, &mut |e| match &header { + super::section(&mut i, node, &mut |e| match &header { None => { header = Some(e); } Some(_) => events.push(e), }) - .map(|(i, o)| { + .map(|_| { ( i, - ( - Section { - header: match header.expect("header set") { - Event::SectionHeader(header) => header, - _ => unreachable!("unexpected"), - }, - events, + Section { + header: match header.expect("header set") { + Event::SectionHeader(header) => header, + _ => unreachable!("unexpected"), }, - o, - ), + events, + }, ) }) } @@ -205,22 +206,19 @@ mod section { let mut node = ParseNode::SectionHeader; assert_eq!( section(b"[a] k = \r\n", &mut node).unwrap(), - fully_consumed(( - Section { - header: parsed_section_header("a", None), - events: vec![ - whitespace_event(" "), - name_event("k"), - whitespace_event(" "), - Event::KeyValueSeparator, - whitespace_event(" "), - value_event(""), - newline_custom_event("\r\n") - ] - .into(), - }, - 1 - )), + fully_consumed(Section { + header: parsed_section_header("a", None), + events: vec![ + whitespace_event(" "), + name_event("k"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event(""), + newline_custom_event("\r\n") + ] + .into(), + }), ); } @@ -229,41 +227,35 @@ mod section { let mut node = ParseNode::SectionHeader; assert_eq!( section(b"[a] k = v\r\n", &mut node).unwrap(), - fully_consumed(( - Section { - header: parsed_section_header("a", None), - events: vec![ - whitespace_event(" "), - name_event("k"), - whitespace_event(" "), - Event::KeyValueSeparator, - whitespace_event(" "), - value_event("v"), - newline_custom_event("\r\n") - ] - .into(), - }, - 1 - )), + fully_consumed(Section { + header: parsed_section_header("a", None), + events: vec![ + whitespace_event(" "), + name_event("k"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event("v"), + newline_custom_event("\r\n") + ] + .into(), + }), ); assert_eq!( section(b"[a] k = \r\n", &mut node).unwrap(), - fully_consumed(( - Section { - header: parsed_section_header("a", None), - events: vec![ - whitespace_event(" "), - name_event("k"), - whitespace_event(" "), - Event::KeyValueSeparator, - whitespace_event(" "), - value_event(""), - newline_custom_event("\r\n") - ] - .into(), - }, - 1 - )), + fully_consumed(Section { + header: parsed_section_header("a", None), + events: vec![ + whitespace_event(" "), + name_event("k"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event(""), + newline_custom_event("\r\n") + ] + .into(), + }), ); } @@ -272,13 +264,10 @@ mod section { let mut node = ParseNode::SectionHeader; assert_eq!( section(b"[test]", &mut node).unwrap(), - fully_consumed(( - Section { - header: parsed_section_header("test", None), - events: Default::default() - }, - 0 - )), + fully_consumed(Section { + header: parsed_section_header("test", None), + events: Default::default() + }), ); } @@ -291,33 +280,30 @@ mod section { d = "lol""#; assert_eq!( section(section_data, &mut node).unwrap(), - fully_consumed(( - Section { - header: parsed_section_header("hello", None), - events: vec![ - newline_event(), - whitespace_event(" "), - name_event("a"), - whitespace_event(" "), - Event::KeyValueSeparator, - whitespace_event(" "), - value_event("b"), - newline_event(), - whitespace_event(" "), - name_event("c"), - value_event(""), - newline_event(), - whitespace_event(" "), - name_event("d"), - whitespace_event(" "), - Event::KeyValueSeparator, - whitespace_event(" "), - value_event("\"lol\"") - ] - .into() - }, - 3 - )) + fully_consumed(Section { + header: parsed_section_header("hello", None), + events: vec![ + newline_event(), + whitespace_event(" "), + name_event("a"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event("b"), + newline_event(), + whitespace_event(" "), + name_event("c"), + value_event(""), + newline_event(), + whitespace_event(" "), + name_event("d"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event("\"lol\"") + ] + .into() + }) ); } @@ -327,38 +313,32 @@ mod section { let section_data = b"[a] k="; assert_eq!( section(section_data, &mut node).unwrap(), - fully_consumed(( - Section { - header: parsed_section_header("a", None), - events: vec![ - whitespace_event(" "), - name_event("k"), - Event::KeyValueSeparator, - value_event(""), - ] - .into() - }, - 0 - )) + fully_consumed(Section { + header: parsed_section_header("a", None), + events: vec![ + whitespace_event(" "), + name_event("k"), + Event::KeyValueSeparator, + value_event(""), + ] + .into() + }) ); let section_data = b"[a] k=\n"; assert_eq!( section(section_data, &mut node).unwrap(), - fully_consumed(( - Section { - header: parsed_section_header("a", None), - events: vec![ - whitespace_event(" "), - name_event("k"), - Event::KeyValueSeparator, - value_event(""), - newline_event(), - ] - .into() - }, - 1 - )) + fully_consumed(Section { + header: parsed_section_header("a", None), + events: vec![ + whitespace_event(" "), + name_event("k"), + Event::KeyValueSeparator, + value_event(""), + newline_event(), + ] + .into() + }) ); } @@ -371,34 +351,31 @@ mod section { d = "lol""#; assert_eq!( section(section_data, &mut node).unwrap(), - fully_consumed(( - Section { - header: parsed_section_header("hello", None), - events: vec![ - newline_event(), - whitespace_event(" "), - name_event("a"), - whitespace_event(" "), - Event::KeyValueSeparator, - whitespace_event(" "), - value_event("b"), - newline_event(), - whitespace_event(" "), - name_event("c"), - Event::KeyValueSeparator, - value_event(""), - newline_event(), - whitespace_event(" "), - name_event("d"), - whitespace_event(" "), - Event::KeyValueSeparator, - whitespace_event(" "), - value_event("\"lol\"") - ] - .into() - }, - 3 - )) + fully_consumed(Section { + header: parsed_section_header("hello", None), + events: vec![ + newline_event(), + whitespace_event(" "), + name_event("a"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event("b"), + newline_event(), + whitespace_event(" "), + name_event("c"), + Event::KeyValueSeparator, + value_event(""), + newline_event(), + whitespace_event(" "), + name_event("d"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event("\"lol\"") + ] + .into() + }) ); } @@ -407,32 +384,26 @@ mod section { let mut node = ParseNode::SectionHeader; assert_eq!( section(b"[hello] c", &mut node).unwrap(), - fully_consumed(( - Section { - header: parsed_section_header("hello", None), - events: vec![whitespace_event(" "), name_event("c"), value_event("")].into() - }, - 0 - )) + fully_consumed(Section { + header: parsed_section_header("hello", None), + events: vec![whitespace_event(" "), name_event("c"), value_event("")].into() + }) ); assert_eq!( section(b"[hello] c\nd", &mut node).unwrap(), - fully_consumed(( - Section { - header: parsed_section_header("hello", None), - events: vec![ - whitespace_event(" "), - name_event("c"), - value_event(""), - newline_event(), - name_event("d"), - value_event("") - ] - .into() - }, - 1 - )) + fully_consumed(Section { + header: parsed_section_header("hello", None), + events: vec![ + whitespace_event(" "), + name_event("c"), + value_event(""), + newline_event(), + name_event("d"), + value_event("") + ] + .into() + }) ); } @@ -446,39 +417,36 @@ mod section { c = d"#; assert_eq!( section(section_data, &mut node).unwrap(), - fully_consumed(( - Section { - header: parsed_section_header("hello", None), - events: vec![ - whitespace_event(" "), - comment_event(';', " commentA"), - newline_event(), - whitespace_event(" "), - name_event("a"), - whitespace_event(" "), - Event::KeyValueSeparator, - whitespace_event(" "), - value_event("b"), - whitespace_event(" "), - comment_event('#', " commentB"), - newline_event(), - whitespace_event(" "), - comment_event(';', " commentC"), - newline_event(), - whitespace_event(" "), - comment_event(';', " commentD"), - newline_event(), - whitespace_event(" "), - name_event("c"), - whitespace_event(" "), - Event::KeyValueSeparator, - whitespace_event(" "), - value_event("d"), - ] - .into() - }, - 4 - )) + fully_consumed(Section { + header: parsed_section_header("hello", None), + events: vec![ + whitespace_event(" "), + comment_event(';', " commentA"), + newline_event(), + whitespace_event(" "), + name_event("a"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event("b"), + whitespace_event(" "), + comment_event('#', " commentB"), + newline_event(), + whitespace_event(" "), + comment_event(';', " commentC"), + newline_event(), + whitespace_event(" "), + comment_event(';', " commentD"), + newline_event(), + whitespace_event(" "), + name_event("c"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event("d"), + ] + .into() + }) ); } @@ -488,27 +456,24 @@ mod section { // This test is absolute hell. Good luck if this fails. assert_eq!( section(b"[section] a = 1 \"\\\"\\\na ; e \"\\\"\\\nd # \"b\t ; c", &mut node).unwrap(), - fully_consumed(( - Section { - header: parsed_section_header("section", None), - events: vec![ - whitespace_event(" "), - name_event("a"), - whitespace_event(" "), - Event::KeyValueSeparator, - whitespace_event(" "), - value_not_done_event(r#"1 "\""#), - newline_event(), - value_not_done_event(r#"a ; e "\""#), - newline_event(), - value_done_event("d"), - whitespace_event(" "), - comment_event('#', " \"b\t ; c"), - ] - .into() - }, - 2 - )) + fully_consumed(Section { + header: parsed_section_header("section", None), + events: vec![ + whitespace_event(" "), + name_event("a"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_not_done_event(r#"1 "\""#), + newline_event(), + value_not_done_event(r#"a ; e "\""#), + newline_event(), + value_done_event("d"), + whitespace_event(" "), + comment_event('#', " \"b\t ; c"), + ] + .into() + }) ); } @@ -517,23 +482,20 @@ mod section { let mut node = ParseNode::SectionHeader; assert_eq!( section(b"[section \"a\"] b =\"\\\n;\";a", &mut node).unwrap(), - fully_consumed(( - Section { - header: parsed_section_header("section", (" ", "a")), - events: vec![ - whitespace_event(" "), - name_event("b"), - whitespace_event(" "), - Event::KeyValueSeparator, - value_not_done_event("\""), - newline_event(), - value_done_event(";\""), - comment_event(';', "a"), - ] - .into() - }, - 1 - )) + fully_consumed(Section { + header: parsed_section_header("section", (" ", "a")), + events: vec![ + whitespace_event(" "), + name_event("b"), + whitespace_event(" "), + Event::KeyValueSeparator, + value_not_done_event("\""), + newline_event(), + value_done_event(";\""), + comment_event(';', "a"), + ] + .into() + }) ); } @@ -542,19 +504,16 @@ mod section { let mut node = ParseNode::SectionHeader; assert_eq!( section(b"[s]hello #world", &mut node).unwrap(), - fully_consumed(( - Section { - header: parsed_section_header("s", None), - events: vec![ - name_event("hello"), - whitespace_event(" "), - value_event(""), - comment_event('#', "world"), - ] - .into() - }, - 0 - )) + fully_consumed(Section { + header: parsed_section_header("s", None), + events: vec![ + name_event("hello"), + whitespace_event(" "), + value_event(""), + comment_event('#', "world"), + ] + .into() + }) ); } } @@ -567,8 +526,8 @@ mod value_continuation { tests::util::{into_events, newline_custom_event, newline_event, value_done_event, value_not_done_event}, }; - pub fn value_impl<'a>(i: &'a [u8], events: &mut section::Events<'a>) -> nom::IResult<&'a [u8], ()> { - super::value_impl(i, &mut |e| events.push(e)).map(|t| (t.0, ())) + pub fn value_impl<'a>(mut i: &'a [u8], events: &mut section::Events<'a>) -> winnow::IResult<&'a [u8], ()> { + super::value_impl(&mut i, &mut |e| events.push(e)).map(|_| (i, ())) } #[test] @@ -797,6 +756,7 @@ mod value_no_continuation { } #[test] + #[allow(clippy::needless_raw_string_hashes)] fn trans_escaped_comment_marker_not_consumed() { let mut events = section::Events::default(); assert_eq!(value_impl(br##"hello"#"world; a"##, &mut events).unwrap().0, b"; a"); @@ -817,7 +777,7 @@ mod value_no_continuation { #[test] fn invalid_escape() { - assert!(value_impl(br#"\x"#, &mut Default::default()).is_err()); + assert!(value_impl(br"\x", &mut Default::default()).is_err()); } #[test] @@ -827,7 +787,7 @@ mod value_no_continuation { #[test] fn incomplete_escape() { - assert!(value_impl(br#"hello world\"#, &mut Default::default()).is_err()); + assert!(value_impl(br"hello world\", &mut Default::default()).is_err()); } } @@ -840,18 +800,25 @@ mod key_value_pair { }; fn key_value<'a>( - i: &'a [u8], + mut i: &'a [u8], node: &mut ParseNode, events: &mut section::Events<'a>, - ) -> nom::IResult<&'a [u8], ()> { - super::key_value_pair(i, node, &mut |e| events.push(e)).map(|t| (t.0, ())) + ) -> winnow::IResult<&'a [u8], ()> { + super::key_value_pair(&mut i, node, &mut |e| events.push(e)).map(|_| (i, ())) } #[test] fn nonascii_is_allowed_for_values_but_not_for_keys() { let mut node = ParseNode::SectionHeader; let mut vec = Default::default(); - assert!(key_value("你好".as_bytes(), &mut node, &mut vec).is_err()); + assert!( + key_value("你好".as_bytes(), &mut node, &mut vec).is_ok(), + "Verifying `is_ok` because bad keys get ignored, the caller parser handles this as error" + ); + assert_eq!(vec, into_events(vec![])); + + let mut node = ParseNode::SectionHeader; + let mut vec = Default::default(); assert!(key_value("a = 你好 ".as_bytes(), &mut node, &mut vec).is_ok()); assert_eq!( vec, @@ -895,13 +862,15 @@ mod key_value_pair { } mod comment { + use winnow::prelude::*; + use super::comment; use crate::parse::tests::util::{comment as parsed_comment, fully_consumed}; #[test] fn semicolon() { assert_eq!( - comment(b"; this is a semicolon comment").unwrap(), + comment.parse_peek(b"; this is a semicolon comment").unwrap(), fully_consumed(parsed_comment(';', " this is a semicolon comment")), ); } @@ -909,7 +878,7 @@ mod comment { #[test] fn octothorpe() { assert_eq!( - comment(b"# this is an octothorpe comment").unwrap(), + comment.parse_peek(b"# this is an octothorpe comment").unwrap(), fully_consumed(parsed_comment('#', " this is an octothorpe comment")), ); } @@ -917,7 +886,7 @@ mod comment { #[test] fn multiple_markers() { assert_eq!( - comment(b"###### this is an octothorpe comment").unwrap(), + comment.parse_peek(b"###### this is an octothorpe comment").unwrap(), fully_consumed(parsed_comment('#', "##### this is an octothorpe comment")), ); } diff --git a/vendor/gix-config/src/parse/section/header.rs b/vendor/gix-config/src/parse/section/header.rs index 341edcdd5..14c2519cf 100644 --- a/vendor/gix-config/src/parse/section/header.rs +++ b/vendor/gix-config/src/parse/section/header.rs @@ -147,7 +147,7 @@ fn escape_subsection(name: &BStr) -> Cow<'_, BStr> { let mut buf = Vec::with_capacity(name.len()); for b in name.iter().copied() { match b { - b'\\' => buf.push_str(br#"\\"#), + b'\\' => buf.push_str(br"\\"), b'"' => buf.push_str(br#"\""#), _ => buf.push(b), } |