From 10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 4 May 2024 14:41:41 +0200 Subject: Merging upstream version 1.70.0+dfsg2. Signed-off-by: Daniel Baumann --- vendor/gix-config/src/parse/nom/mod.rs | 460 +++++++++++++++ vendor/gix-config/src/parse/nom/tests.rs | 924 +++++++++++++++++++++++++++++++ 2 files changed, 1384 insertions(+) create mode 100644 vendor/gix-config/src/parse/nom/mod.rs create mode 100644 vendor/gix-config/src/parse/nom/tests.rs (limited to 'vendor/gix-config/src/parse/nom') diff --git a/vendor/gix-config/src/parse/nom/mod.rs b/vendor/gix-config/src/parse/nom/mod.rs new file mode 100644 index 000000000..11d1dea6b --- /dev/null +++ b/vendor/gix-config/src/parse/nom/mod.rs @@ -0,0 +1,460 @@ +use std::borrow::Cow; + +use bstr::{BStr, BString, ByteSlice, ByteVec}; +use nom::{ + branch::alt, + bytes::complete::{tag, take_till, take_while}, + character::{ + complete::{char, one_of}, + is_space, + }, + combinator::{map, opt}, + error::{Error as NomError, ErrorKind}, + multi::{fold_many0, fold_many1}, + sequence::delimited, + IResult, +}; + +use crate::parse::{error::ParseNode, section, Comment, Error, Event}; + +/// Attempt to zero-copy parse the provided bytes, passing results to `dispatch`. +pub fn from_bytes<'a>(input: &'a [u8], mut dispatch: impl FnMut(Event<'a>)) -> Result<(), Error> { + let bom = unicode_bom::Bom::from(input); + let mut newlines = 0; + let (i, _) = fold_many0( + alt(( + map(comment, Event::Comment), + map(take_spaces, |whitespace| Event::Whitespace(Cow::Borrowed(whitespace))), + map(take_newlines, |(newline, counter)| { + newlines += counter; + Event::Newline(Cow::Borrowed(newline)) + }), + )), + || (), + |_acc, event| dispatch(event), + )(&input[bom.len()..]) + // I don't think this can panic. many0 errors if the child parser returns + // a success where the input was not consumed, but alt will only return Ok + // if one of its children succeed. However, all of it's children are + // guaranteed to consume something if they succeed, so the Ok(i) == i case + // can never occur. + .expect("many0(alt(...)) panicked. Likely a bug in one of the children parsers."); + + if i.is_empty() { + return Ok(()); + } + + let mut node = ParseNode::SectionHeader; + + let res = fold_many1( + |i| section(i, &mut node, &mut dispatch), + || (), + |_acc, additional_newlines| { + newlines += additional_newlines; + }, + )(i); + let (i, _) = res.map_err(|_| Error { + line_number: newlines, + last_attempted_parser: node, + parsed_until: i.as_bstr().into(), + })?; + + // This needs to happen after we collect sections, otherwise the line number + // will be off. + if !i.is_empty() { + return Err(Error { + line_number: newlines, + last_attempted_parser: node, + parsed_until: i.as_bstr().into(), + }); + } + + Ok(()) +} + +fn comment(i: &[u8]) -> IResult<&[u8], Comment<'_>> { + let (i, comment_tag) = one_of(";#")(i)?; + let (i, comment) = take_till(|c| c == b'\n')(i)?; + Ok(( + i, + Comment { + tag: comment_tag as u8, + text: Cow::Borrowed(comment.as_bstr()), + }, + )) +} + +#[cfg(test)] +mod tests; + +fn section<'a>(i: &'a [u8], node: &mut ParseNode, dispatch: &mut impl FnMut(Event<'a>)) -> IResult<&'a [u8], usize> { + let (mut i, header) = section_header(i)?; + dispatch(Event::SectionHeader(header)); + + let mut newlines = 0; + + // This would usually be a many0(alt(...)), the manual loop allows us to + // optimize vec insertions + loop { + let old_i = i; + + if let Ok((new_i, v)) = take_spaces(i) { + if old_i != new_i { + i = new_i; + dispatch(Event::Whitespace(Cow::Borrowed(v.as_bstr()))); + } + } + + if let Ok((new_i, (v, new_newlines))) = take_newlines(i) { + if old_i != new_i { + i = new_i; + newlines += new_newlines; + dispatch(Event::Newline(Cow::Borrowed(v.as_bstr()))); + } + } + + if let Ok((new_i, new_newlines)) = key_value_pair(i, node, dispatch) { + if old_i != new_i { + i = new_i; + newlines += new_newlines; + } + } + + if let Ok((new_i, comment)) = comment(i) { + if old_i != new_i { + i = new_i; + dispatch(Event::Comment(comment)); + } + } + + if old_i == i { + break; + } + } + + Ok((i, newlines)) +} + +fn section_header(i: &[u8]) -> IResult<&[u8], section::Header<'_>> { + let (i, _) = char('[')(i)?; + // No spaces must be between section name and section start + let (i, name) = take_while(|c: u8| c.is_ascii_alphanumeric() || c == b'-' || c == b'.')(i)?; + + let name = name.as_bstr(); + if let Ok((i, _)) = char::<_, NomError<&[u8]>>(']')(i) { + // Either section does not have a subsection or using deprecated + // subsection syntax at this point. + let header = match memchr::memrchr(b'.', name.as_bytes()) { + Some(index) => section::Header { + name: section::Name(Cow::Borrowed(name[..index].as_bstr())), + separator: name.get(index..=index).map(|s| Cow::Borrowed(s.as_bstr())), + subsection_name: name.get(index + 1..).map(|s| Cow::Borrowed(s.as_bstr())), + }, + None => section::Header { + name: section::Name(Cow::Borrowed(name.as_bstr())), + separator: None, + subsection_name: None, + }, + }; + + if header.name.is_empty() { + return Err(nom::Err::Error(NomError { + input: i, + code: ErrorKind::NoneOf, + })); + } + return Ok((i, header)); + } + + // Section header must be using modern subsection syntax at this point. + + let (i, whitespace) = take_spaces(i)?; + let (i, subsection_name) = delimited(char('"'), opt(sub_section), tag("\"]"))(i)?; + + Ok(( + i, + section::Header { + name: section::Name(Cow::Borrowed(name)), + separator: Some(Cow::Borrowed(whitespace)), + subsection_name, + }, + )) +} + +fn sub_section(i: &[u8]) -> IResult<&[u8], Cow<'_, BStr>> { + let (rest, (found_escape, consumed)) = sub_section_delegate(i, &mut |_| ())?; + if found_escape { + let mut buf = BString::default(); + sub_section_delegate(i, &mut |b| buf.push_byte(b)).map(|(i, _)| (i, buf.into())) + } else { + Ok((rest, i[..consumed].as_bstr().into())) + } +} + +fn sub_section_delegate<'a>(i: &'a [u8], push_byte: &mut dyn FnMut(u8)) -> IResult<&'a [u8], (bool, usize)> { + let mut cursor = 0; + let mut bytes = i.iter().copied(); + let mut found_terminator = false; + let mut found_escape = false; + while let Some(mut b) = bytes.next() { + cursor += 1; + if b == b'\n' || b == 0 { + return Err(nom::Err::Error(NomError { + input: &i[cursor..], + code: ErrorKind::NonEmpty, + })); + } + if b == b'"' { + found_terminator = true; + break; + } + if b == b'\\' { + b = bytes.next().ok_or_else(|| { + nom::Err::Error(NomError { + input: &i[cursor..], + code: ErrorKind::NonEmpty, + }) + })?; + found_escape = true; + cursor += 1; + if b == b'\n' { + return Err(nom::Err::Error(NomError { + input: &i[cursor..], + code: ErrorKind::NonEmpty, + })); + } + } + push_byte(b); + } + + if !found_terminator { + return Err(nom::Err::Error(NomError { + input: &i[cursor..], + code: ErrorKind::NonEmpty, + })); + } + + Ok((&i[cursor - 1..], (found_escape, cursor - 1))) +} + +fn key_value_pair<'a>( + i: &'a [u8], + node: &mut ParseNode, + dispatch: &mut impl FnMut(Event<'a>), +) -> IResult<&'a [u8], usize> { + *node = ParseNode::Name; + let (i, name) = config_name(i)?; + + dispatch(Event::SectionKey(section::Key(Cow::Borrowed(name)))); + + let (i, whitespace) = opt(take_spaces)(i)?; + if let Some(whitespace) = whitespace { + dispatch(Event::Whitespace(Cow::Borrowed(whitespace))); + } + + *node = ParseNode::Value; + let (i, newlines) = config_value(i, dispatch)?; + Ok((i, newlines)) +} + +/// Parses the config name of a config pair. Assumes the input has already been +/// trimmed of any leading whitespace. +fn config_name(i: &[u8]) -> IResult<&[u8], &BStr> { + if i.is_empty() { + return Err(nom::Err::Error(NomError { + input: i, + code: ErrorKind::NonEmpty, + })); + } + + if !i[0].is_ascii_alphabetic() { + return Err(nom::Err::Error(NomError { + input: i, + code: ErrorKind::Alpha, + })); + } + + let (i, name) = take_while(|c: u8| c.is_ascii_alphanumeric() || c == b'-')(i)?; + Ok((i, name.as_bstr())) +} + +fn config_value<'a>(i: &'a [u8], dispatch: &mut impl FnMut(Event<'a>)) -> IResult<&'a [u8], usize> { + if let (i, Some(_)) = opt(char('='))(i)? { + dispatch(Event::KeyValueSeparator); + let (i, whitespace) = opt(take_spaces)(i)?; + if let Some(whitespace) = whitespace { + dispatch(Event::Whitespace(Cow::Borrowed(whitespace))); + } + let (i, newlines) = value_impl(i, dispatch)?; + Ok((i, newlines)) + } else { + // This is a special way of denoting 'empty' values which a lot of code depends on. + // Hence, rather to fix this everywhere else, leave it here and fix it where it matters, namely + // when it's about differentiating between a missing key-value separator, and one followed by emptiness. + dispatch(Event::Value(Cow::Borrowed("".into()))); + Ok((i, 0)) + } +} + +/// Handles parsing of known-to-be values. This function handles both single +/// line values as well as values that are continuations. +fn value_impl<'a>(i: &'a [u8], dispatch: &mut impl FnMut(Event<'a>)) -> IResult<&'a [u8], usize> { + let (i, value_end, newlines, mut dispatch) = { + let new_err = |code| nom::Err::Error(NomError { input: i, code }); + let mut value_end = None::; + let mut value_start: usize = 0; + let mut newlines = 0; + + let mut prev_char_was_backslash = false; + // This is required to ignore comment markers if they're in a quote. + let mut is_in_quotes = false; + // Used to determine if we return a Value or Value{Not,}Done + let mut partial_value_found = false; + let mut last_value_index: usize = 0; + + let mut bytes = i.iter(); + while let Some(mut c) = bytes.next() { + if prev_char_was_backslash { + prev_char_was_backslash = false; + let mut consumed = 1; + if *c == b'\r' { + c = bytes.next().ok_or_else(|| new_err(ErrorKind::Escaped))?; + if *c != b'\n' { + return Err(new_err(ErrorKind::Tag)); + } + consumed += 1; + } + + match c { + b'\n' => { + partial_value_found = true; + let backslash = 1; + dispatch(Event::ValueNotDone(Cow::Borrowed( + i[value_start..last_value_index - backslash].as_bstr(), + ))); + let nl_end = last_value_index + consumed; + dispatch(Event::Newline(Cow::Borrowed(i[last_value_index..nl_end].as_bstr()))); + value_start = nl_end; + value_end = None; + newlines += 1; + + last_value_index += consumed; + } + b'n' | b't' | b'\\' | b'b' | b'"' => { + last_value_index += 1; + } + _ => { + return Err(new_err(ErrorKind::Escaped)); + } + } + } else { + match c { + b'\n' => { + value_end = last_value_index.into(); + break; + } + b';' | b'#' if !is_in_quotes => { + value_end = last_value_index.into(); + break; + } + b'\\' => prev_char_was_backslash = true, + b'"' => is_in_quotes = !is_in_quotes, + _ => {} + } + last_value_index += 1; + } + } + + if prev_char_was_backslash { + return Err(new_err(ErrorKind::Escaped)); + } + + if is_in_quotes { + return Err(new_err(ErrorKind::Tag)); + } + + let value_end = match value_end { + None => { + if last_value_index == 0 { + dispatch(Event::Value(Cow::Borrowed("".into()))); + return Ok((&i[0..], newlines)); + } else { + i.len() + } + } + Some(idx) => idx, + }; + + let dispatch = move |value: &'a [u8]| { + if partial_value_found { + dispatch(Event::ValueDone(Cow::Borrowed(value.as_bstr()))); + } else { + dispatch(Event::Value(Cow::Borrowed(value.as_bstr()))); + } + }; + (&i[value_start..], value_end - value_start, newlines, dispatch) + }; + + let (i, remainder_value) = { + let value_end_no_trailing_whitespace = i[..value_end] + .iter() + .enumerate() + .rev() + .find_map(|(idx, b)| (!b.is_ascii_whitespace()).then_some(idx + 1)) + .unwrap_or(0); + ( + &i[value_end_no_trailing_whitespace..], + &i[..value_end_no_trailing_whitespace], + ) + }; + + dispatch(remainder_value); + + Ok((i, newlines)) +} + +fn take_spaces(i: &[u8]) -> IResult<&[u8], &BStr> { + let (i, v) = take_while(|c: u8| c.is_ascii() && is_space(c))(i)?; + if v.is_empty() { + Err(nom::Err::Error(NomError { + input: i, + code: ErrorKind::Eof, + })) + } else { + Ok((i, v.as_bstr())) + } +} + +fn take_newlines(i: &[u8]) -> IResult<&[u8], (&BStr, usize)> { + let mut counter = 0; + let mut consumed_bytes = 0; + let mut next_must_be_newline = false; + for b in i.iter().copied() { + if !b.is_ascii() { + break; + }; + if b == b'\r' { + if next_must_be_newline { + break; + } + next_must_be_newline = true; + continue; + }; + if b == b'\n' { + counter += 1; + consumed_bytes += if next_must_be_newline { 2 } else { 1 }; + next_must_be_newline = false; + } else { + break; + } + } + let (v, i) = i.split_at(consumed_bytes); + if v.is_empty() { + Err(nom::Err::Error(NomError { + input: i, + code: ErrorKind::Eof, + })) + } else { + Ok((i, (v.as_bstr(), counter))) + } +} diff --git a/vendor/gix-config/src/parse/nom/tests.rs b/vendor/gix-config/src/parse/nom/tests.rs new file mode 100644 index 000000000..f6e8c3d92 --- /dev/null +++ b/vendor/gix-config/src/parse/nom/tests.rs @@ -0,0 +1,924 @@ +use super::*; + +mod section_headers { + use super::section_header; + use crate::parse::tests::util::{fully_consumed, section_header as parsed_section_header}; + + #[test] + fn no_subsection() { + assert_eq!( + section_header(b"[hello]").unwrap(), + fully_consumed(parsed_section_header("hello", None)), + ); + } + + #[test] + fn modern_subsection() { + assert_eq!( + section_header(br#"[hello "world"]"#).unwrap(), + fully_consumed(parsed_section_header("hello", (" ", "world"))), + ); + } + + #[test] + fn escaped_subsection() { + assert_eq!( + section_header(br#"[hello "foo\\bar\""]"#).unwrap(), + fully_consumed(parsed_section_header("hello", (" ", r#"foo\bar""#))), + ); + } + + #[test] + fn deprecated_subsection() { + assert_eq!( + section_header(br#"[hello.world]"#).unwrap(), + fully_consumed(parsed_section_header("hello", (".", "world"))) + ); + assert_eq!( + section_header(br#"[Hello.World]"#).unwrap(), + fully_consumed(parsed_section_header("Hello", (".", "World"))) + ); + } + + #[test] + fn empty_legacy_subsection_name() { + assert_eq!( + section_header(br#"[hello-world.]"#).unwrap(), + fully_consumed(parsed_section_header("hello-world", (".", ""))) + ); + } + + #[test] + fn empty_modern_subsection_name() { + assert_eq!( + section_header(br#"[hello ""]"#).unwrap(), + fully_consumed(parsed_section_header("hello", (" ", ""))) + ); + } + + #[test] + fn backslashes_in_subsections_do_not_escape_newlines_or_tabs() { + assert_eq!( + section_header(br#"[hello "single \ \\ \t \n \0"]"#).unwrap(), + fully_consumed(parsed_section_header("hello", (" ", r#"single \ t n 0"#))) + ); + } + + #[test] + fn newline_in_header() { + assert!(section_header(b"[hello\n]").is_err()); + } + + #[test] + fn newline_in_sub_section() { + assert!(section_header(b"[hello \"hello\n\"]").is_err()); + } + + #[test] + fn null_byt_in_sub_section() { + assert!(section_header(b"[hello \"hello\0\"]").is_err()); + } + + #[test] + fn escaped_newline_in_sub_section() { + assert!(section_header(b"[hello \"hello\\\n\"]").is_err()); + } + + #[test] + fn eof_after_escape_in_sub_section() { + assert!(section_header(b"[hello \"hello\\").is_err()); + } + + #[test] + fn null_byte_in_header() { + assert!(section_header(b"[hello\0]").is_err()); + } + + #[test] + fn invalid_characters_in_section() { + assert!(section_header(b"[$]").is_err()); + } + #[test] + fn invalid_characters_in_legacy_sub_section() { + assert!(section_header(b"[hello.$]").is_err()); + assert!(section_header(b"[hello. world]").is_err()); + } + + #[test] + fn right_brace_in_subsection_name() { + assert_eq!( + section_header(br#"[hello "]"]"#).unwrap(), + fully_consumed(parsed_section_header("hello", (" ", "]"))) + ); + } +} + +mod sub_section { + use std::borrow::Cow; + + use super::sub_section; + + #[test] + fn zero_copy_simple() { + let actual = sub_section(b"name\"").unwrap().1; + assert_eq!(actual.as_ref(), "name"); + assert!(matches!(actual, Cow::Borrowed(_))); + } + + #[test] + fn escapes_need_allocation() { + let actual = sub_section(br#"\x\t\n\0\\\"""#).unwrap().1; + assert_eq!(actual.as_ref(), r#"xtn0\""#); + assert!(matches!(actual, Cow::Owned(_))); + } +} + +mod config_name { + use nom::combinator::all_consuming; + + use super::config_name; + use crate::parse::tests::util::fully_consumed; + + #[test] + fn just_name() { + assert_eq!(config_name(b"name").unwrap(), fully_consumed("name".into())); + } + + #[test] + fn must_start_with_alphabetic() { + assert!(config_name(b"4aaa").is_err()); + assert!(config_name(b"-aaa").is_err()); + } + + #[test] + fn only_a_subset_of_characters_is_allowed() { + assert!(all_consuming(config_name)(b"Name$_").is_err()); + assert!(all_consuming(config_name)(b"other#").is_err()); + } + + #[test] + fn cannot_be_empty() { + assert!(config_name(b"").is_err()); + } +} + +mod section { + use crate::parse::{ + error::ParseNode, + section, + tests::util::{ + comment_event, fully_consumed, name_event, newline_custom_event, newline_event, + section_header as parsed_section_header, value_done_event, value_event, value_not_done_event, + whitespace_event, + }, + Event, Section, + }; + + fn section<'a>(i: &'a [u8], node: &mut ParseNode) -> nom::IResult<&'a [u8], (Section<'a>, usize)> { + let mut header = None; + let mut events = section::Events::default(); + super::section(i, node, &mut |e| match &header { + None => { + header = Some(e); + } + Some(_) => events.push(e), + }) + .map(|(i, o)| { + ( + i, + ( + Section { + header: match header.expect("header set") { + Event::SectionHeader(header) => header, + _ => unreachable!("unexpected"), + }, + events, + }, + o, + ), + ) + }) + } + + #[test] + fn empty_value_with_windows_newlines() { + let mut node = ParseNode::SectionHeader; + assert_eq!( + section(b"[a] k = \r\n", &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("a", None), + events: vec![ + whitespace_event(" "), + name_event("k"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event(""), + newline_custom_event("\r\n") + ] + .into(), + }, + 1 + )), + ); + } + + #[test] + fn simple_value_with_windows_newlines() { + let mut node = ParseNode::SectionHeader; + assert_eq!( + section(b"[a] k = v\r\n", &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("a", None), + events: vec![ + whitespace_event(" "), + name_event("k"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event("v"), + newline_custom_event("\r\n") + ] + .into(), + }, + 1 + )), + ); + assert_eq!( + section(b"[a] k = \r\n", &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("a", None), + events: vec![ + whitespace_event(" "), + name_event("k"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event(""), + newline_custom_event("\r\n") + ] + .into(), + }, + 1 + )), + ); + } + + #[test] + fn empty_section() { + let mut node = ParseNode::SectionHeader; + assert_eq!( + section(b"[test]", &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("test", None), + events: Default::default() + }, + 0 + )), + ); + } + + #[test] + fn simple_section() { + let mut node = ParseNode::SectionHeader; + let section_data = br#"[hello] + a = b + c + d = "lol""#; + assert_eq!( + section(section_data, &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("hello", None), + events: vec![ + newline_event(), + whitespace_event(" "), + name_event("a"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event("b"), + newline_event(), + whitespace_event(" "), + name_event("c"), + value_event(""), + newline_event(), + whitespace_event(" "), + name_event("d"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event("\"lol\"") + ] + .into() + }, + 3 + )) + ); + } + + #[test] + fn section_with_empty_value_simplified() { + let mut node = ParseNode::SectionHeader; + let section_data = b"[a] k="; + assert_eq!( + section(section_data, &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("a", None), + events: vec![ + whitespace_event(" "), + name_event("k"), + Event::KeyValueSeparator, + value_event(""), + ] + .into() + }, + 0 + )) + ); + + let section_data = b"[a] k=\n"; + assert_eq!( + section(section_data, &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("a", None), + events: vec![ + whitespace_event(" "), + name_event("k"), + Event::KeyValueSeparator, + value_event(""), + newline_event(), + ] + .into() + }, + 1 + )) + ); + } + + #[test] + fn section_with_empty_value() { + let mut node = ParseNode::SectionHeader; + let section_data = br#"[hello] + a = b + c= + d = "lol""#; + assert_eq!( + section(section_data, &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("hello", None), + events: vec![ + newline_event(), + whitespace_event(" "), + name_event("a"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event("b"), + newline_event(), + whitespace_event(" "), + name_event("c"), + Event::KeyValueSeparator, + value_event(""), + newline_event(), + whitespace_event(" "), + name_event("d"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event("\"lol\"") + ] + .into() + }, + 3 + )) + ); + } + + #[test] + fn section_implicit_value() { + let mut node = ParseNode::SectionHeader; + assert_eq!( + section(b"[hello] c", &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("hello", None), + events: vec![whitespace_event(" "), name_event("c"), value_event("")].into() + }, + 0 + )) + ); + + assert_eq!( + section(b"[hello] c\nd", &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("hello", None), + events: vec![ + whitespace_event(" "), + name_event("c"), + value_event(""), + newline_event(), + name_event("d"), + value_event("") + ] + .into() + }, + 1 + )) + ); + } + + #[test] + fn section_very_commented() { + let mut node = ParseNode::SectionHeader; + let section_data = br#"[hello] ; commentA + a = b # commentB + ; commentC + ; commentD + c = d"#; + assert_eq!( + section(section_data, &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("hello", None), + events: vec![ + whitespace_event(" "), + comment_event(';', " commentA"), + newline_event(), + whitespace_event(" "), + name_event("a"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event("b"), + whitespace_event(" "), + comment_event('#', " commentB"), + newline_event(), + whitespace_event(" "), + comment_event(';', " commentC"), + newline_event(), + whitespace_event(" "), + comment_event(';', " commentD"), + newline_event(), + whitespace_event(" "), + name_event("c"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event("d"), + ] + .into() + }, + 4 + )) + ); + } + + #[test] + fn complex_continuation() { + let mut node = ParseNode::SectionHeader; + // This test is absolute hell. Good luck if this fails. + assert_eq!( + section(b"[section] a = 1 \"\\\"\\\na ; e \"\\\"\\\nd # \"b\t ; c", &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("section", None), + events: vec![ + whitespace_event(" "), + name_event("a"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_not_done_event(r#"1 "\""#), + newline_event(), + value_not_done_event(r#"a ; e "\""#), + newline_event(), + value_done_event("d"), + whitespace_event(" "), + comment_event('#', " \"b\t ; c"), + ] + .into() + }, + 2 + )) + ); + } + + #[test] + fn quote_split_over_two_lines() { + let mut node = ParseNode::SectionHeader; + assert_eq!( + section(b"[section \"a\"] b =\"\\\n;\";a", &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("section", (" ", "a")), + events: vec![ + whitespace_event(" "), + name_event("b"), + whitespace_event(" "), + Event::KeyValueSeparator, + value_not_done_event("\""), + newline_event(), + value_done_event(";\""), + comment_event(';', "a"), + ] + .into() + }, + 1 + )) + ); + } + + #[test] + fn section_handles_extraneous_whitespace_before_comment() { + let mut node = ParseNode::SectionHeader; + assert_eq!( + section(b"[s]hello #world", &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("s", None), + events: vec![ + name_event("hello"), + whitespace_event(" "), + value_event(""), + comment_event('#', "world"), + ] + .into() + }, + 0 + )) + ); + } +} + +mod value_continuation { + use bstr::ByteSlice; + + use crate::parse::{ + section, + tests::util::{into_events, newline_custom_event, newline_event, value_done_event, value_not_done_event}, + }; + + pub fn value_impl<'a>(i: &'a [u8], events: &mut section::Events<'a>) -> nom::IResult<&'a [u8], ()> { + super::value_impl(i, &mut |e| events.push(e)).map(|t| (t.0, ())) + } + + #[test] + fn simple_continuation() { + let mut events = section::Events::default(); + assert_eq!(value_impl(b"hello\\\nworld", &mut events).unwrap().0, b""); + assert_eq!( + events, + into_events(vec![ + value_not_done_event("hello"), + newline_event(), + value_done_event("world") + ]) + ); + } + + #[test] + fn continuation_with_whitespace() { + let mut events = section::Events::default(); + assert_eq!(value_impl(b"hello\\\n world", &mut events).unwrap().0, b""); + assert_eq!( + events, + into_events(vec![ + value_not_done_event("hello"), + newline_event(), + value_done_event(" world") + ]) + ); + + let mut events = section::Events::default(); + assert_eq!(value_impl(b"hello\\\r\n world", &mut events).unwrap().0, b""); + assert_eq!( + events, + into_events(vec![ + value_not_done_event("hello"), + newline_custom_event("\r\n"), + value_done_event(" world") + ]) + ); + + let mut events = section::Events::default(); + assert!( + value_impl(b"hello\\\r\r\n world", &mut events).is_err(), + "\\r must be followed by \\n" + ); + } + + #[test] + fn complex_continuation_with_leftover_comment() { + let mut events = section::Events::default(); + assert_eq!( + value_impl(b"1 \"\\\"\\\na ; e \"\\\"\\\nd # \"b\t ; c", &mut events) + .unwrap() + .0, + b" # \"b\t ; c" + ); + assert_eq!( + events, + into_events(vec![ + value_not_done_event(r#"1 "\""#), + newline_event(), + value_not_done_event(r#"a ; e "\""#), + newline_event(), + value_done_event("d") + ]) + ); + } + + #[test] + fn quote_split_over_two_lines_with_leftover_comment() { + let mut events = section::Events::default(); + assert_eq!(value_impl(b"\"\\\n;\";a", &mut events).unwrap().0, b";a"); + assert_eq!( + events, + into_events(vec![ + value_not_done_event("\""), + newline_event(), + value_done_event(";\"") + ]) + ); + + let mut events = section::Events::default(); + assert_eq!(value_impl(b"\"a\\\r\nb;\";c", &mut events).unwrap().0, b";c"); + assert_eq!( + events, + into_events(vec![ + value_not_done_event("\"a"), + newline_custom_event("\r\n"), + value_done_event("b;\"") + ]) + ); + } + + #[test] + fn quote_split_over_multiple_lines_without_surrounding_quotes_but_inner_quotes() { + let mut events = section::Events::default(); + assert_eq!( + value_impl( + br#"1\ +"2" a\ +\"3 b\"\ +4 ; comment "#, + &mut events + ) + .unwrap() + .0 + .as_bstr(), + b" ; comment ".as_bstr() + ); + assert_eq!( + events, + into_events(vec![ + value_not_done_event("1"), + newline_event(), + value_not_done_event("\"2\" a"), + newline_event(), + value_not_done_event("\\\"3 b\\\""), + newline_event(), + value_done_event("4") + ]) + ); + } + + #[test] + fn quote_split_over_multiple_lines_with_surrounding_quotes() { + let mut events = section::Events::default(); + assert_eq!( + value_impl( + br#""1\ +"2" a\ +\"3 b\"\ +4 " ; comment "#, + &mut events + ) + .unwrap() + .0 + .as_bstr(), + b" ; comment ".as_bstr() + ); + assert_eq!( + events, + into_events(vec![ + value_not_done_event("\"1"), + newline_event(), + value_not_done_event("\"2\" a"), + newline_event(), + value_not_done_event("\\\"3 b\\\""), + newline_event(), + value_done_event("4 \"") + ]) + ); + } +} + +mod value_no_continuation { + use super::value_continuation::value_impl; + use crate::parse::{ + section, + tests::util::{into_events, value_event}, + }; + + #[test] + fn no_comment() { + let mut events = section::Events::default(); + assert_eq!(value_impl(b"hello", &mut events).unwrap().0, b""); + assert_eq!(events, into_events(vec![value_event("hello")])); + } + + #[test] + fn windows_newline() { + let mut events = section::Events::default(); + assert_eq!(value_impl(b"hi\r\nrest", &mut events).unwrap().0, b"\r\nrest"); + assert_eq!(events, into_events(vec![value_event("hi")])); + + events.clear(); + assert_eq!(value_impl(b"hi\r\r\r\nrest", &mut events).unwrap().0, b"\r\r\r\nrest"); + assert_eq!(events, into_events(vec![value_event("hi")])); + } + + #[test] + fn no_comment_newline() { + let mut events = section::Events::default(); + assert_eq!(value_impl(b"hello\na", &mut events).unwrap().0, b"\na"); + assert_eq!(events, into_events(vec![value_event("hello")])); + } + + #[test] + fn semicolon_comment_not_consumed() { + let mut events = section::Events::default(); + assert_eq!(value_impl(b"hello;world", &mut events).unwrap().0, b";world"); + assert_eq!(events, into_events(vec![value_event("hello")])); + } + + #[test] + fn octothorpe_comment_not_consumed() { + let mut events = section::Events::default(); + assert_eq!(value_impl(b"hello#world", &mut events).unwrap().0, b"#world"); + assert_eq!(events, into_events(vec![value_event("hello")])); + } + + #[test] + fn values_with_extraneous_whitespace_without_comment() { + let mut events = section::Events::default(); + assert_eq!( + value_impl(b"hello ", &mut events).unwrap().0, + b" " + ); + assert_eq!(events, into_events(vec![value_event("hello")])); + } + + #[test] + fn values_with_extraneous_whitespace_before_comment() { + let mut events = section::Events::default(); + assert_eq!( + value_impl(b"hello #world", &mut events).unwrap().0, + b" #world" + ); + assert_eq!(events, into_events(vec![value_event("hello")])); + + let mut events = section::Events::default(); + assert_eq!( + value_impl(b"hello ;world", &mut events).unwrap().0, + b" ;world" + ); + assert_eq!(events, into_events(vec![value_event("hello")])); + } + + #[test] + fn trans_escaped_comment_marker_not_consumed() { + let mut events = section::Events::default(); + assert_eq!(value_impl(br##"hello"#"world; a"##, &mut events).unwrap().0, b"; a"); + assert_eq!(events, into_events(vec![value_event(r##"hello"#"world"##)])); + } + + #[test] + fn complex_test() { + let mut events = section::Events::default(); + assert_eq!(value_impl(br#"value";";ahhhh"#, &mut events).unwrap().0, b";ahhhh"); + assert_eq!(events, into_events(vec![value_event(r#"value";""#)])); + } + + #[test] + fn garbage_after_continuation_is_err() { + assert!(value_impl(b"hello \\afwjdls", &mut Default::default()).is_err()); + } + + #[test] + fn invalid_escape() { + assert!(value_impl(br#"\x"#, &mut Default::default()).is_err()); + } + + #[test] + fn incomplete_quote() { + assert!(value_impl(br#"hello "world"#, &mut Default::default()).is_err()); + } + + #[test] + fn incomplete_escape() { + assert!(value_impl(br#"hello world\"#, &mut Default::default()).is_err()); + } +} + +mod key_value_pair { + use crate::parse::{ + error::ParseNode, + section, + tests::util::{into_events, name_event, value_event, whitespace_event}, + Event, + }; + + fn key_value<'a>( + i: &'a [u8], + node: &mut ParseNode, + events: &mut section::Events<'a>, + ) -> nom::IResult<&'a [u8], ()> { + super::key_value_pair(i, node, &mut |e| events.push(e)).map(|t| (t.0, ())) + } + + #[test] + fn nonascii_is_allowed_for_values_but_not_for_keys() { + let mut node = ParseNode::SectionHeader; + let mut vec = Default::default(); + assert!(key_value("你好".as_bytes(), &mut node, &mut vec).is_err()); + assert!(key_value("a = 你好 ".as_bytes(), &mut node, &mut vec).is_ok()); + assert_eq!( + vec, + into_events(vec![ + name_event("a"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event("你好") + ]) + ); + } + + #[test] + fn whitespace_is_not_ambiguous() { + let mut node = ParseNode::SectionHeader; + let mut vec = Default::default(); + assert!(key_value(b"a =b", &mut node, &mut vec).is_ok()); + assert_eq!( + vec, + into_events(vec![ + name_event("a"), + whitespace_event(" "), + Event::KeyValueSeparator, + value_event("b") + ]) + ); + + let mut vec = Default::default(); + assert!(key_value(b"a= b", &mut node, &mut vec).is_ok()); + assert_eq!( + vec, + into_events(vec![ + name_event("a"), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event("b") + ]) + ); + } +} + +mod comment { + use super::comment; + use crate::parse::tests::util::{comment as parsed_comment, fully_consumed}; + + #[test] + fn semicolon() { + assert_eq!( + comment(b"; this is a semicolon comment").unwrap(), + fully_consumed(parsed_comment(';', " this is a semicolon comment")), + ); + } + + #[test] + fn octothorpe() { + assert_eq!( + comment(b"# this is an octothorpe comment").unwrap(), + fully_consumed(parsed_comment('#', " this is an octothorpe comment")), + ); + } + + #[test] + fn multiple_markers() { + assert_eq!( + comment(b"###### this is an octothorpe comment").unwrap(), + fully_consumed(parsed_comment('#', "##### this is an octothorpe comment")), + ); + } +} -- cgit v1.2.3