summaryrefslogtreecommitdiffstats
path: root/vendor/gix-config/src/parse/nom
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/gix-config/src/parse/nom')
-rw-r--r--vendor/gix-config/src/parse/nom/mod.rs460
-rw-r--r--vendor/gix-config/src/parse/nom/tests.rs924
2 files changed, 1384 insertions, 0 deletions
diff --git a/vendor/gix-config/src/parse/nom/mod.rs b/vendor/gix-config/src/parse/nom/mod.rs
new file mode 100644
index 000000000..11d1dea6b
--- /dev/null
+++ b/vendor/gix-config/src/parse/nom/mod.rs
@@ -0,0 +1,460 @@
+use std::borrow::Cow;
+
+use bstr::{BStr, BString, ByteSlice, ByteVec};
+use nom::{
+ branch::alt,
+ bytes::complete::{tag, take_till, take_while},
+ character::{
+ complete::{char, one_of},
+ is_space,
+ },
+ combinator::{map, opt},
+ error::{Error as NomError, ErrorKind},
+ multi::{fold_many0, fold_many1},
+ sequence::delimited,
+ IResult,
+};
+
+use crate::parse::{error::ParseNode, section, Comment, Error, Event};
+
+/// Attempt to zero-copy parse the provided bytes, passing results to `dispatch`.
+pub fn from_bytes<'a>(input: &'a [u8], mut dispatch: impl FnMut(Event<'a>)) -> Result<(), Error> {
+ let bom = unicode_bom::Bom::from(input);
+ let mut newlines = 0;
+ let (i, _) = fold_many0(
+ alt((
+ map(comment, Event::Comment),
+ map(take_spaces, |whitespace| Event::Whitespace(Cow::Borrowed(whitespace))),
+ map(take_newlines, |(newline, counter)| {
+ newlines += counter;
+ Event::Newline(Cow::Borrowed(newline))
+ }),
+ )),
+ || (),
+ |_acc, event| dispatch(event),
+ )(&input[bom.len()..])
+ // I don't think this can panic. many0 errors if the child parser returns
+ // a success where the input was not consumed, but alt will only return Ok
+ // if one of its children succeed. However, all of it's children are
+ // guaranteed to consume something if they succeed, so the Ok(i) == i case
+ // can never occur.
+ .expect("many0(alt(...)) panicked. Likely a bug in one of the children parsers.");
+
+ if i.is_empty() {
+ return Ok(());
+ }
+
+ let mut node = ParseNode::SectionHeader;
+
+ let res = fold_many1(
+ |i| section(i, &mut node, &mut dispatch),
+ || (),
+ |_acc, additional_newlines| {
+ newlines += additional_newlines;
+ },
+ )(i);
+ let (i, _) = res.map_err(|_| Error {
+ line_number: newlines,
+ last_attempted_parser: node,
+ parsed_until: i.as_bstr().into(),
+ })?;
+
+ // This needs to happen after we collect sections, otherwise the line number
+ // will be off.
+ if !i.is_empty() {
+ return Err(Error {
+ line_number: newlines,
+ last_attempted_parser: node,
+ parsed_until: i.as_bstr().into(),
+ });
+ }
+
+ Ok(())
+}
+
+fn comment(i: &[u8]) -> IResult<&[u8], Comment<'_>> {
+ let (i, comment_tag) = one_of(";#")(i)?;
+ let (i, comment) = take_till(|c| c == b'\n')(i)?;
+ Ok((
+ i,
+ Comment {
+ tag: comment_tag as u8,
+ text: Cow::Borrowed(comment.as_bstr()),
+ },
+ ))
+}
+
+#[cfg(test)]
+mod tests;
+
+fn section<'a>(i: &'a [u8], node: &mut ParseNode, dispatch: &mut impl FnMut(Event<'a>)) -> IResult<&'a [u8], usize> {
+ let (mut i, header) = section_header(i)?;
+ dispatch(Event::SectionHeader(header));
+
+ let mut newlines = 0;
+
+ // This would usually be a many0(alt(...)), the manual loop allows us to
+ // optimize vec insertions
+ loop {
+ let old_i = i;
+
+ if let Ok((new_i, v)) = take_spaces(i) {
+ if old_i != new_i {
+ i = new_i;
+ dispatch(Event::Whitespace(Cow::Borrowed(v.as_bstr())));
+ }
+ }
+
+ if let Ok((new_i, (v, new_newlines))) = take_newlines(i) {
+ if old_i != new_i {
+ i = new_i;
+ newlines += new_newlines;
+ dispatch(Event::Newline(Cow::Borrowed(v.as_bstr())));
+ }
+ }
+
+ if let Ok((new_i, new_newlines)) = key_value_pair(i, node, dispatch) {
+ if old_i != new_i {
+ i = new_i;
+ newlines += new_newlines;
+ }
+ }
+
+ if let Ok((new_i, comment)) = comment(i) {
+ if old_i != new_i {
+ i = new_i;
+ dispatch(Event::Comment(comment));
+ }
+ }
+
+ if old_i == i {
+ break;
+ }
+ }
+
+ Ok((i, newlines))
+}
+
+fn section_header(i: &[u8]) -> IResult<&[u8], section::Header<'_>> {
+ let (i, _) = char('[')(i)?;
+ // No spaces must be between section name and section start
+ let (i, name) = take_while(|c: u8| c.is_ascii_alphanumeric() || c == b'-' || c == b'.')(i)?;
+
+ let name = name.as_bstr();
+ if let Ok((i, _)) = char::<_, NomError<&[u8]>>(']')(i) {
+ // Either section does not have a subsection or using deprecated
+ // subsection syntax at this point.
+ let header = match memchr::memrchr(b'.', name.as_bytes()) {
+ Some(index) => section::Header {
+ name: section::Name(Cow::Borrowed(name[..index].as_bstr())),
+ separator: name.get(index..=index).map(|s| Cow::Borrowed(s.as_bstr())),
+ subsection_name: name.get(index + 1..).map(|s| Cow::Borrowed(s.as_bstr())),
+ },
+ None => section::Header {
+ name: section::Name(Cow::Borrowed(name.as_bstr())),
+ separator: None,
+ subsection_name: None,
+ },
+ };
+
+ if header.name.is_empty() {
+ return Err(nom::Err::Error(NomError {
+ input: i,
+ code: ErrorKind::NoneOf,
+ }));
+ }
+ return Ok((i, header));
+ }
+
+ // Section header must be using modern subsection syntax at this point.
+
+ let (i, whitespace) = take_spaces(i)?;
+ let (i, subsection_name) = delimited(char('"'), opt(sub_section), tag("\"]"))(i)?;
+
+ Ok((
+ i,
+ section::Header {
+ name: section::Name(Cow::Borrowed(name)),
+ separator: Some(Cow::Borrowed(whitespace)),
+ subsection_name,
+ },
+ ))
+}
+
+fn sub_section(i: &[u8]) -> IResult<&[u8], Cow<'_, BStr>> {
+ let (rest, (found_escape, consumed)) = sub_section_delegate(i, &mut |_| ())?;
+ if found_escape {
+ let mut buf = BString::default();
+ sub_section_delegate(i, &mut |b| buf.push_byte(b)).map(|(i, _)| (i, buf.into()))
+ } else {
+ Ok((rest, i[..consumed].as_bstr().into()))
+ }
+}
+
+fn sub_section_delegate<'a>(i: &'a [u8], push_byte: &mut dyn FnMut(u8)) -> IResult<&'a [u8], (bool, usize)> {
+ let mut cursor = 0;
+ let mut bytes = i.iter().copied();
+ let mut found_terminator = false;
+ let mut found_escape = false;
+ while let Some(mut b) = bytes.next() {
+ cursor += 1;
+ if b == b'\n' || b == 0 {
+ return Err(nom::Err::Error(NomError {
+ input: &i[cursor..],
+ code: ErrorKind::NonEmpty,
+ }));
+ }
+ if b == b'"' {
+ found_terminator = true;
+ break;
+ }
+ if b == b'\\' {
+ b = bytes.next().ok_or_else(|| {
+ nom::Err::Error(NomError {
+ input: &i[cursor..],
+ code: ErrorKind::NonEmpty,
+ })
+ })?;
+ found_escape = true;
+ cursor += 1;
+ if b == b'\n' {
+ return Err(nom::Err::Error(NomError {
+ input: &i[cursor..],
+ code: ErrorKind::NonEmpty,
+ }));
+ }
+ }
+ push_byte(b);
+ }
+
+ if !found_terminator {
+ return Err(nom::Err::Error(NomError {
+ input: &i[cursor..],
+ code: ErrorKind::NonEmpty,
+ }));
+ }
+
+ Ok((&i[cursor - 1..], (found_escape, cursor - 1)))
+}
+
+fn key_value_pair<'a>(
+ i: &'a [u8],
+ node: &mut ParseNode,
+ dispatch: &mut impl FnMut(Event<'a>),
+) -> IResult<&'a [u8], usize> {
+ *node = ParseNode::Name;
+ let (i, name) = config_name(i)?;
+
+ dispatch(Event::SectionKey(section::Key(Cow::Borrowed(name))));
+
+ let (i, whitespace) = opt(take_spaces)(i)?;
+ if let Some(whitespace) = whitespace {
+ dispatch(Event::Whitespace(Cow::Borrowed(whitespace)));
+ }
+
+ *node = ParseNode::Value;
+ let (i, newlines) = config_value(i, dispatch)?;
+ Ok((i, newlines))
+}
+
+/// Parses the config name of a config pair. Assumes the input has already been
+/// trimmed of any leading whitespace.
+fn config_name(i: &[u8]) -> IResult<&[u8], &BStr> {
+ if i.is_empty() {
+ return Err(nom::Err::Error(NomError {
+ input: i,
+ code: ErrorKind::NonEmpty,
+ }));
+ }
+
+ if !i[0].is_ascii_alphabetic() {
+ return Err(nom::Err::Error(NomError {
+ input: i,
+ code: ErrorKind::Alpha,
+ }));
+ }
+
+ let (i, name) = take_while(|c: u8| c.is_ascii_alphanumeric() || c == b'-')(i)?;
+ Ok((i, name.as_bstr()))
+}
+
+fn config_value<'a>(i: &'a [u8], dispatch: &mut impl FnMut(Event<'a>)) -> IResult<&'a [u8], usize> {
+ if let (i, Some(_)) = opt(char('='))(i)? {
+ dispatch(Event::KeyValueSeparator);
+ let (i, whitespace) = opt(take_spaces)(i)?;
+ if let Some(whitespace) = whitespace {
+ dispatch(Event::Whitespace(Cow::Borrowed(whitespace)));
+ }
+ let (i, newlines) = value_impl(i, dispatch)?;
+ Ok((i, newlines))
+ } else {
+ // This is a special way of denoting 'empty' values which a lot of code depends on.
+ // Hence, rather to fix this everywhere else, leave it here and fix it where it matters, namely
+ // when it's about differentiating between a missing key-value separator, and one followed by emptiness.
+ dispatch(Event::Value(Cow::Borrowed("".into())));
+ Ok((i, 0))
+ }
+}
+
+/// Handles parsing of known-to-be values. This function handles both single
+/// line values as well as values that are continuations.
+fn value_impl<'a>(i: &'a [u8], dispatch: &mut impl FnMut(Event<'a>)) -> IResult<&'a [u8], usize> {
+ let (i, value_end, newlines, mut dispatch) = {
+ let new_err = |code| nom::Err::Error(NomError { input: i, code });
+ let mut value_end = None::<usize>;
+ let mut value_start: usize = 0;
+ let mut newlines = 0;
+
+ let mut prev_char_was_backslash = false;
+ // This is required to ignore comment markers if they're in a quote.
+ let mut is_in_quotes = false;
+ // Used to determine if we return a Value or Value{Not,}Done
+ let mut partial_value_found = false;
+ let mut last_value_index: usize = 0;
+
+ let mut bytes = i.iter();
+ while let Some(mut c) = bytes.next() {
+ if prev_char_was_backslash {
+ prev_char_was_backslash = false;
+ let mut consumed = 1;
+ if *c == b'\r' {
+ c = bytes.next().ok_or_else(|| new_err(ErrorKind::Escaped))?;
+ if *c != b'\n' {
+ return Err(new_err(ErrorKind::Tag));
+ }
+ consumed += 1;
+ }
+
+ match c {
+ b'\n' => {
+ partial_value_found = true;
+ let backslash = 1;
+ dispatch(Event::ValueNotDone(Cow::Borrowed(
+ i[value_start..last_value_index - backslash].as_bstr(),
+ )));
+ let nl_end = last_value_index + consumed;
+ dispatch(Event::Newline(Cow::Borrowed(i[last_value_index..nl_end].as_bstr())));
+ value_start = nl_end;
+ value_end = None;
+ newlines += 1;
+
+ last_value_index += consumed;
+ }
+ b'n' | b't' | b'\\' | b'b' | b'"' => {
+ last_value_index += 1;
+ }
+ _ => {
+ return Err(new_err(ErrorKind::Escaped));
+ }
+ }
+ } else {
+ match c {
+ b'\n' => {
+ value_end = last_value_index.into();
+ break;
+ }
+ b';' | b'#' if !is_in_quotes => {
+ value_end = last_value_index.into();
+ break;
+ }
+ b'\\' => prev_char_was_backslash = true,
+ b'"' => is_in_quotes = !is_in_quotes,
+ _ => {}
+ }
+ last_value_index += 1;
+ }
+ }
+
+ if prev_char_was_backslash {
+ return Err(new_err(ErrorKind::Escaped));
+ }
+
+ if is_in_quotes {
+ return Err(new_err(ErrorKind::Tag));
+ }
+
+ let value_end = match value_end {
+ None => {
+ if last_value_index == 0 {
+ dispatch(Event::Value(Cow::Borrowed("".into())));
+ return Ok((&i[0..], newlines));
+ } else {
+ i.len()
+ }
+ }
+ Some(idx) => idx,
+ };
+
+ let dispatch = move |value: &'a [u8]| {
+ if partial_value_found {
+ dispatch(Event::ValueDone(Cow::Borrowed(value.as_bstr())));
+ } else {
+ dispatch(Event::Value(Cow::Borrowed(value.as_bstr())));
+ }
+ };
+ (&i[value_start..], value_end - value_start, newlines, dispatch)
+ };
+
+ let (i, remainder_value) = {
+ let value_end_no_trailing_whitespace = i[..value_end]
+ .iter()
+ .enumerate()
+ .rev()
+ .find_map(|(idx, b)| (!b.is_ascii_whitespace()).then_some(idx + 1))
+ .unwrap_or(0);
+ (
+ &i[value_end_no_trailing_whitespace..],
+ &i[..value_end_no_trailing_whitespace],
+ )
+ };
+
+ dispatch(remainder_value);
+
+ Ok((i, newlines))
+}
+
+fn take_spaces(i: &[u8]) -> IResult<&[u8], &BStr> {
+ let (i, v) = take_while(|c: u8| c.is_ascii() && is_space(c))(i)?;
+ if v.is_empty() {
+ Err(nom::Err::Error(NomError {
+ input: i,
+ code: ErrorKind::Eof,
+ }))
+ } else {
+ Ok((i, v.as_bstr()))
+ }
+}
+
+fn take_newlines(i: &[u8]) -> IResult<&[u8], (&BStr, usize)> {
+ let mut counter = 0;
+ let mut consumed_bytes = 0;
+ let mut next_must_be_newline = false;
+ for b in i.iter().copied() {
+ if !b.is_ascii() {
+ break;
+ };
+ if b == b'\r' {
+ if next_must_be_newline {
+ break;
+ }
+ next_must_be_newline = true;
+ continue;
+ };
+ if b == b'\n' {
+ counter += 1;
+ consumed_bytes += if next_must_be_newline { 2 } else { 1 };
+ next_must_be_newline = false;
+ } else {
+ break;
+ }
+ }
+ let (v, i) = i.split_at(consumed_bytes);
+ if v.is_empty() {
+ Err(nom::Err::Error(NomError {
+ input: i,
+ code: ErrorKind::Eof,
+ }))
+ } else {
+ Ok((i, (v.as_bstr(), counter)))
+ }
+}
diff --git a/vendor/gix-config/src/parse/nom/tests.rs b/vendor/gix-config/src/parse/nom/tests.rs
new file mode 100644
index 000000000..f6e8c3d92
--- /dev/null
+++ b/vendor/gix-config/src/parse/nom/tests.rs
@@ -0,0 +1,924 @@
+use super::*;
+
+mod section_headers {
+ use super::section_header;
+ use crate::parse::tests::util::{fully_consumed, section_header as parsed_section_header};
+
+ #[test]
+ fn no_subsection() {
+ assert_eq!(
+ section_header(b"[hello]").unwrap(),
+ fully_consumed(parsed_section_header("hello", None)),
+ );
+ }
+
+ #[test]
+ fn modern_subsection() {
+ assert_eq!(
+ section_header(br#"[hello "world"]"#).unwrap(),
+ fully_consumed(parsed_section_header("hello", (" ", "world"))),
+ );
+ }
+
+ #[test]
+ fn escaped_subsection() {
+ assert_eq!(
+ section_header(br#"[hello "foo\\bar\""]"#).unwrap(),
+ fully_consumed(parsed_section_header("hello", (" ", r#"foo\bar""#))),
+ );
+ }
+
+ #[test]
+ fn deprecated_subsection() {
+ assert_eq!(
+ section_header(br#"[hello.world]"#).unwrap(),
+ fully_consumed(parsed_section_header("hello", (".", "world")))
+ );
+ assert_eq!(
+ section_header(br#"[Hello.World]"#).unwrap(),
+ fully_consumed(parsed_section_header("Hello", (".", "World")))
+ );
+ }
+
+ #[test]
+ fn empty_legacy_subsection_name() {
+ assert_eq!(
+ section_header(br#"[hello-world.]"#).unwrap(),
+ fully_consumed(parsed_section_header("hello-world", (".", "")))
+ );
+ }
+
+ #[test]
+ fn empty_modern_subsection_name() {
+ assert_eq!(
+ section_header(br#"[hello ""]"#).unwrap(),
+ fully_consumed(parsed_section_header("hello", (" ", "")))
+ );
+ }
+
+ #[test]
+ fn backslashes_in_subsections_do_not_escape_newlines_or_tabs() {
+ assert_eq!(
+ section_header(br#"[hello "single \ \\ \t \n \0"]"#).unwrap(),
+ fully_consumed(parsed_section_header("hello", (" ", r#"single \ t n 0"#)))
+ );
+ }
+
+ #[test]
+ fn newline_in_header() {
+ assert!(section_header(b"[hello\n]").is_err());
+ }
+
+ #[test]
+ fn newline_in_sub_section() {
+ assert!(section_header(b"[hello \"hello\n\"]").is_err());
+ }
+
+ #[test]
+ fn null_byt_in_sub_section() {
+ assert!(section_header(b"[hello \"hello\0\"]").is_err());
+ }
+
+ #[test]
+ fn escaped_newline_in_sub_section() {
+ assert!(section_header(b"[hello \"hello\\\n\"]").is_err());
+ }
+
+ #[test]
+ fn eof_after_escape_in_sub_section() {
+ assert!(section_header(b"[hello \"hello\\").is_err());
+ }
+
+ #[test]
+ fn null_byte_in_header() {
+ assert!(section_header(b"[hello\0]").is_err());
+ }
+
+ #[test]
+ fn invalid_characters_in_section() {
+ assert!(section_header(b"[$]").is_err());
+ }
+ #[test]
+ fn invalid_characters_in_legacy_sub_section() {
+ assert!(section_header(b"[hello.$]").is_err());
+ assert!(section_header(b"[hello. world]").is_err());
+ }
+
+ #[test]
+ fn right_brace_in_subsection_name() {
+ assert_eq!(
+ section_header(br#"[hello "]"]"#).unwrap(),
+ fully_consumed(parsed_section_header("hello", (" ", "]")))
+ );
+ }
+}
+
+mod sub_section {
+ use std::borrow::Cow;
+
+ use super::sub_section;
+
+ #[test]
+ fn zero_copy_simple() {
+ let actual = sub_section(b"name\"").unwrap().1;
+ assert_eq!(actual.as_ref(), "name");
+ assert!(matches!(actual, Cow::Borrowed(_)));
+ }
+
+ #[test]
+ fn escapes_need_allocation() {
+ let actual = sub_section(br#"\x\t\n\0\\\"""#).unwrap().1;
+ assert_eq!(actual.as_ref(), r#"xtn0\""#);
+ assert!(matches!(actual, Cow::Owned(_)));
+ }
+}
+
+mod config_name {
+ use nom::combinator::all_consuming;
+
+ use super::config_name;
+ use crate::parse::tests::util::fully_consumed;
+
+ #[test]
+ fn just_name() {
+ assert_eq!(config_name(b"name").unwrap(), fully_consumed("name".into()));
+ }
+
+ #[test]
+ fn must_start_with_alphabetic() {
+ assert!(config_name(b"4aaa").is_err());
+ assert!(config_name(b"-aaa").is_err());
+ }
+
+ #[test]
+ fn only_a_subset_of_characters_is_allowed() {
+ assert!(all_consuming(config_name)(b"Name$_").is_err());
+ assert!(all_consuming(config_name)(b"other#").is_err());
+ }
+
+ #[test]
+ fn cannot_be_empty() {
+ assert!(config_name(b"").is_err());
+ }
+}
+
+mod section {
+ use crate::parse::{
+ error::ParseNode,
+ section,
+ tests::util::{
+ comment_event, fully_consumed, name_event, newline_custom_event, newline_event,
+ section_header as parsed_section_header, value_done_event, value_event, value_not_done_event,
+ whitespace_event,
+ },
+ Event, Section,
+ };
+
+ fn section<'a>(i: &'a [u8], node: &mut ParseNode) -> nom::IResult<&'a [u8], (Section<'a>, usize)> {
+ let mut header = None;
+ let mut events = section::Events::default();
+ super::section(i, node, &mut |e| match &header {
+ None => {
+ header = Some(e);
+ }
+ Some(_) => events.push(e),
+ })
+ .map(|(i, o)| {
+ (
+ i,
+ (
+ Section {
+ header: match header.expect("header set") {
+ Event::SectionHeader(header) => header,
+ _ => unreachable!("unexpected"),
+ },
+ events,
+ },
+ o,
+ ),
+ )
+ })
+ }
+
+ #[test]
+ fn empty_value_with_windows_newlines() {
+ let mut node = ParseNode::SectionHeader;
+ assert_eq!(
+ section(b"[a] k = \r\n", &mut node).unwrap(),
+ fully_consumed((
+ Section {
+ header: parsed_section_header("a", None),
+ events: vec![
+ whitespace_event(" "),
+ name_event("k"),
+ whitespace_event(" "),
+ Event::KeyValueSeparator,
+ whitespace_event(" "),
+ value_event(""),
+ newline_custom_event("\r\n")
+ ]
+ .into(),
+ },
+ 1
+ )),
+ );
+ }
+
+ #[test]
+ fn simple_value_with_windows_newlines() {
+ let mut node = ParseNode::SectionHeader;
+ assert_eq!(
+ section(b"[a] k = v\r\n", &mut node).unwrap(),
+ fully_consumed((
+ Section {
+ header: parsed_section_header("a", None),
+ events: vec![
+ whitespace_event(" "),
+ name_event("k"),
+ whitespace_event(" "),
+ Event::KeyValueSeparator,
+ whitespace_event(" "),
+ value_event("v"),
+ newline_custom_event("\r\n")
+ ]
+ .into(),
+ },
+ 1
+ )),
+ );
+ assert_eq!(
+ section(b"[a] k = \r\n", &mut node).unwrap(),
+ fully_consumed((
+ Section {
+ header: parsed_section_header("a", None),
+ events: vec![
+ whitespace_event(" "),
+ name_event("k"),
+ whitespace_event(" "),
+ Event::KeyValueSeparator,
+ whitespace_event(" "),
+ value_event(""),
+ newline_custom_event("\r\n")
+ ]
+ .into(),
+ },
+ 1
+ )),
+ );
+ }
+
+ #[test]
+ fn empty_section() {
+ let mut node = ParseNode::SectionHeader;
+ assert_eq!(
+ section(b"[test]", &mut node).unwrap(),
+ fully_consumed((
+ Section {
+ header: parsed_section_header("test", None),
+ events: Default::default()
+ },
+ 0
+ )),
+ );
+ }
+
+ #[test]
+ fn simple_section() {
+ let mut node = ParseNode::SectionHeader;
+ let section_data = br#"[hello]
+ a = b
+ c
+ d = "lol""#;
+ assert_eq!(
+ section(section_data, &mut node).unwrap(),
+ fully_consumed((
+ Section {
+ header: parsed_section_header("hello", None),
+ events: vec![
+ newline_event(),
+ whitespace_event(" "),
+ name_event("a"),
+ whitespace_event(" "),
+ Event::KeyValueSeparator,
+ whitespace_event(" "),
+ value_event("b"),
+ newline_event(),
+ whitespace_event(" "),
+ name_event("c"),
+ value_event(""),
+ newline_event(),
+ whitespace_event(" "),
+ name_event("d"),
+ whitespace_event(" "),
+ Event::KeyValueSeparator,
+ whitespace_event(" "),
+ value_event("\"lol\"")
+ ]
+ .into()
+ },
+ 3
+ ))
+ );
+ }
+
+ #[test]
+ fn section_with_empty_value_simplified() {
+ let mut node = ParseNode::SectionHeader;
+ let section_data = b"[a] k=";
+ assert_eq!(
+ section(section_data, &mut node).unwrap(),
+ fully_consumed((
+ Section {
+ header: parsed_section_header("a", None),
+ events: vec![
+ whitespace_event(" "),
+ name_event("k"),
+ Event::KeyValueSeparator,
+ value_event(""),
+ ]
+ .into()
+ },
+ 0
+ ))
+ );
+
+ let section_data = b"[a] k=\n";
+ assert_eq!(
+ section(section_data, &mut node).unwrap(),
+ fully_consumed((
+ Section {
+ header: parsed_section_header("a", None),
+ events: vec![
+ whitespace_event(" "),
+ name_event("k"),
+ Event::KeyValueSeparator,
+ value_event(""),
+ newline_event(),
+ ]
+ .into()
+ },
+ 1
+ ))
+ );
+ }
+
+ #[test]
+ fn section_with_empty_value() {
+ let mut node = ParseNode::SectionHeader;
+ let section_data = br#"[hello]
+ a = b
+ c=
+ d = "lol""#;
+ assert_eq!(
+ section(section_data, &mut node).unwrap(),
+ fully_consumed((
+ Section {
+ header: parsed_section_header("hello", None),
+ events: vec![
+ newline_event(),
+ whitespace_event(" "),
+ name_event("a"),
+ whitespace_event(" "),
+ Event::KeyValueSeparator,
+ whitespace_event(" "),
+ value_event("b"),
+ newline_event(),
+ whitespace_event(" "),
+ name_event("c"),
+ Event::KeyValueSeparator,
+ value_event(""),
+ newline_event(),
+ whitespace_event(" "),
+ name_event("d"),
+ whitespace_event(" "),
+ Event::KeyValueSeparator,
+ whitespace_event(" "),
+ value_event("\"lol\"")
+ ]
+ .into()
+ },
+ 3
+ ))
+ );
+ }
+
+ #[test]
+ fn section_implicit_value() {
+ let mut node = ParseNode::SectionHeader;
+ assert_eq!(
+ section(b"[hello] c", &mut node).unwrap(),
+ fully_consumed((
+ Section {
+ header: parsed_section_header("hello", None),
+ events: vec![whitespace_event(" "), name_event("c"), value_event("")].into()
+ },
+ 0
+ ))
+ );
+
+ assert_eq!(
+ section(b"[hello] c\nd", &mut node).unwrap(),
+ fully_consumed((
+ Section {
+ header: parsed_section_header("hello", None),
+ events: vec![
+ whitespace_event(" "),
+ name_event("c"),
+ value_event(""),
+ newline_event(),
+ name_event("d"),
+ value_event("")
+ ]
+ .into()
+ },
+ 1
+ ))
+ );
+ }
+
+ #[test]
+ fn section_very_commented() {
+ let mut node = ParseNode::SectionHeader;
+ let section_data = br#"[hello] ; commentA
+ a = b # commentB
+ ; commentC
+ ; commentD
+ c = d"#;
+ assert_eq!(
+ section(section_data, &mut node).unwrap(),
+ fully_consumed((
+ Section {
+ header: parsed_section_header("hello", None),
+ events: vec![
+ whitespace_event(" "),
+ comment_event(';', " commentA"),
+ newline_event(),
+ whitespace_event(" "),
+ name_event("a"),
+ whitespace_event(" "),
+ Event::KeyValueSeparator,
+ whitespace_event(" "),
+ value_event("b"),
+ whitespace_event(" "),
+ comment_event('#', " commentB"),
+ newline_event(),
+ whitespace_event(" "),
+ comment_event(';', " commentC"),
+ newline_event(),
+ whitespace_event(" "),
+ comment_event(';', " commentD"),
+ newline_event(),
+ whitespace_event(" "),
+ name_event("c"),
+ whitespace_event(" "),
+ Event::KeyValueSeparator,
+ whitespace_event(" "),
+ value_event("d"),
+ ]
+ .into()
+ },
+ 4
+ ))
+ );
+ }
+
+ #[test]
+ fn complex_continuation() {
+ let mut node = ParseNode::SectionHeader;
+ // This test is absolute hell. Good luck if this fails.
+ assert_eq!(
+ section(b"[section] a = 1 \"\\\"\\\na ; e \"\\\"\\\nd # \"b\t ; c", &mut node).unwrap(),
+ fully_consumed((
+ Section {
+ header: parsed_section_header("section", None),
+ events: vec![
+ whitespace_event(" "),
+ name_event("a"),
+ whitespace_event(" "),
+ Event::KeyValueSeparator,
+ whitespace_event(" "),
+ value_not_done_event(r#"1 "\""#),
+ newline_event(),
+ value_not_done_event(r#"a ; e "\""#),
+ newline_event(),
+ value_done_event("d"),
+ whitespace_event(" "),
+ comment_event('#', " \"b\t ; c"),
+ ]
+ .into()
+ },
+ 2
+ ))
+ );
+ }
+
+ #[test]
+ fn quote_split_over_two_lines() {
+ let mut node = ParseNode::SectionHeader;
+ assert_eq!(
+ section(b"[section \"a\"] b =\"\\\n;\";a", &mut node).unwrap(),
+ fully_consumed((
+ Section {
+ header: parsed_section_header("section", (" ", "a")),
+ events: vec![
+ whitespace_event(" "),
+ name_event("b"),
+ whitespace_event(" "),
+ Event::KeyValueSeparator,
+ value_not_done_event("\""),
+ newline_event(),
+ value_done_event(";\""),
+ comment_event(';', "a"),
+ ]
+ .into()
+ },
+ 1
+ ))
+ );
+ }
+
+ #[test]
+ fn section_handles_extraneous_whitespace_before_comment() {
+ let mut node = ParseNode::SectionHeader;
+ assert_eq!(
+ section(b"[s]hello #world", &mut node).unwrap(),
+ fully_consumed((
+ Section {
+ header: parsed_section_header("s", None),
+ events: vec![
+ name_event("hello"),
+ whitespace_event(" "),
+ value_event(""),
+ comment_event('#', "world"),
+ ]
+ .into()
+ },
+ 0
+ ))
+ );
+ }
+}
+
+mod value_continuation {
+ use bstr::ByteSlice;
+
+ use crate::parse::{
+ section,
+ tests::util::{into_events, newline_custom_event, newline_event, value_done_event, value_not_done_event},
+ };
+
+ pub fn value_impl<'a>(i: &'a [u8], events: &mut section::Events<'a>) -> nom::IResult<&'a [u8], ()> {
+ super::value_impl(i, &mut |e| events.push(e)).map(|t| (t.0, ()))
+ }
+
+ #[test]
+ fn simple_continuation() {
+ let mut events = section::Events::default();
+ assert_eq!(value_impl(b"hello\\\nworld", &mut events).unwrap().0, b"");
+ assert_eq!(
+ events,
+ into_events(vec![
+ value_not_done_event("hello"),
+ newline_event(),
+ value_done_event("world")
+ ])
+ );
+ }
+
+ #[test]
+ fn continuation_with_whitespace() {
+ let mut events = section::Events::default();
+ assert_eq!(value_impl(b"hello\\\n world", &mut events).unwrap().0, b"");
+ assert_eq!(
+ events,
+ into_events(vec![
+ value_not_done_event("hello"),
+ newline_event(),
+ value_done_event(" world")
+ ])
+ );
+
+ let mut events = section::Events::default();
+ assert_eq!(value_impl(b"hello\\\r\n world", &mut events).unwrap().0, b"");
+ assert_eq!(
+ events,
+ into_events(vec![
+ value_not_done_event("hello"),
+ newline_custom_event("\r\n"),
+ value_done_event(" world")
+ ])
+ );
+
+ let mut events = section::Events::default();
+ assert!(
+ value_impl(b"hello\\\r\r\n world", &mut events).is_err(),
+ "\\r must be followed by \\n"
+ );
+ }
+
+ #[test]
+ fn complex_continuation_with_leftover_comment() {
+ let mut events = section::Events::default();
+ assert_eq!(
+ value_impl(b"1 \"\\\"\\\na ; e \"\\\"\\\nd # \"b\t ; c", &mut events)
+ .unwrap()
+ .0,
+ b" # \"b\t ; c"
+ );
+ assert_eq!(
+ events,
+ into_events(vec![
+ value_not_done_event(r#"1 "\""#),
+ newline_event(),
+ value_not_done_event(r#"a ; e "\""#),
+ newline_event(),
+ value_done_event("d")
+ ])
+ );
+ }
+
+ #[test]
+ fn quote_split_over_two_lines_with_leftover_comment() {
+ let mut events = section::Events::default();
+ assert_eq!(value_impl(b"\"\\\n;\";a", &mut events).unwrap().0, b";a");
+ assert_eq!(
+ events,
+ into_events(vec![
+ value_not_done_event("\""),
+ newline_event(),
+ value_done_event(";\"")
+ ])
+ );
+
+ let mut events = section::Events::default();
+ assert_eq!(value_impl(b"\"a\\\r\nb;\";c", &mut events).unwrap().0, b";c");
+ assert_eq!(
+ events,
+ into_events(vec![
+ value_not_done_event("\"a"),
+ newline_custom_event("\r\n"),
+ value_done_event("b;\"")
+ ])
+ );
+ }
+
+ #[test]
+ fn quote_split_over_multiple_lines_without_surrounding_quotes_but_inner_quotes() {
+ let mut events = section::Events::default();
+ assert_eq!(
+ value_impl(
+ br#"1\
+"2" a\
+\"3 b\"\
+4 ; comment "#,
+ &mut events
+ )
+ .unwrap()
+ .0
+ .as_bstr(),
+ b" ; comment ".as_bstr()
+ );
+ assert_eq!(
+ events,
+ into_events(vec![
+ value_not_done_event("1"),
+ newline_event(),
+ value_not_done_event("\"2\" a"),
+ newline_event(),
+ value_not_done_event("\\\"3 b\\\""),
+ newline_event(),
+ value_done_event("4")
+ ])
+ );
+ }
+
+ #[test]
+ fn quote_split_over_multiple_lines_with_surrounding_quotes() {
+ let mut events = section::Events::default();
+ assert_eq!(
+ value_impl(
+ br#""1\
+"2" a\
+\"3 b\"\
+4 " ; comment "#,
+ &mut events
+ )
+ .unwrap()
+ .0
+ .as_bstr(),
+ b" ; comment ".as_bstr()
+ );
+ assert_eq!(
+ events,
+ into_events(vec![
+ value_not_done_event("\"1"),
+ newline_event(),
+ value_not_done_event("\"2\" a"),
+ newline_event(),
+ value_not_done_event("\\\"3 b\\\""),
+ newline_event(),
+ value_done_event("4 \"")
+ ])
+ );
+ }
+}
+
+mod value_no_continuation {
+ use super::value_continuation::value_impl;
+ use crate::parse::{
+ section,
+ tests::util::{into_events, value_event},
+ };
+
+ #[test]
+ fn no_comment() {
+ let mut events = section::Events::default();
+ assert_eq!(value_impl(b"hello", &mut events).unwrap().0, b"");
+ assert_eq!(events, into_events(vec![value_event("hello")]));
+ }
+
+ #[test]
+ fn windows_newline() {
+ let mut events = section::Events::default();
+ assert_eq!(value_impl(b"hi\r\nrest", &mut events).unwrap().0, b"\r\nrest");
+ assert_eq!(events, into_events(vec![value_event("hi")]));
+
+ events.clear();
+ assert_eq!(value_impl(b"hi\r\r\r\nrest", &mut events).unwrap().0, b"\r\r\r\nrest");
+ assert_eq!(events, into_events(vec![value_event("hi")]));
+ }
+
+ #[test]
+ fn no_comment_newline() {
+ let mut events = section::Events::default();
+ assert_eq!(value_impl(b"hello\na", &mut events).unwrap().0, b"\na");
+ assert_eq!(events, into_events(vec![value_event("hello")]));
+ }
+
+ #[test]
+ fn semicolon_comment_not_consumed() {
+ let mut events = section::Events::default();
+ assert_eq!(value_impl(b"hello;world", &mut events).unwrap().0, b";world");
+ assert_eq!(events, into_events(vec![value_event("hello")]));
+ }
+
+ #[test]
+ fn octothorpe_comment_not_consumed() {
+ let mut events = section::Events::default();
+ assert_eq!(value_impl(b"hello#world", &mut events).unwrap().0, b"#world");
+ assert_eq!(events, into_events(vec![value_event("hello")]));
+ }
+
+ #[test]
+ fn values_with_extraneous_whitespace_without_comment() {
+ let mut events = section::Events::default();
+ assert_eq!(
+ value_impl(b"hello ", &mut events).unwrap().0,
+ b" "
+ );
+ assert_eq!(events, into_events(vec![value_event("hello")]));
+ }
+
+ #[test]
+ fn values_with_extraneous_whitespace_before_comment() {
+ let mut events = section::Events::default();
+ assert_eq!(
+ value_impl(b"hello #world", &mut events).unwrap().0,
+ b" #world"
+ );
+ assert_eq!(events, into_events(vec![value_event("hello")]));
+
+ let mut events = section::Events::default();
+ assert_eq!(
+ value_impl(b"hello ;world", &mut events).unwrap().0,
+ b" ;world"
+ );
+ assert_eq!(events, into_events(vec![value_event("hello")]));
+ }
+
+ #[test]
+ fn trans_escaped_comment_marker_not_consumed() {
+ let mut events = section::Events::default();
+ assert_eq!(value_impl(br##"hello"#"world; a"##, &mut events).unwrap().0, b"; a");
+ assert_eq!(events, into_events(vec![value_event(r##"hello"#"world"##)]));
+ }
+
+ #[test]
+ fn complex_test() {
+ let mut events = section::Events::default();
+ assert_eq!(value_impl(br#"value";";ahhhh"#, &mut events).unwrap().0, b";ahhhh");
+ assert_eq!(events, into_events(vec![value_event(r#"value";""#)]));
+ }
+
+ #[test]
+ fn garbage_after_continuation_is_err() {
+ assert!(value_impl(b"hello \\afwjdls", &mut Default::default()).is_err());
+ }
+
+ #[test]
+ fn invalid_escape() {
+ assert!(value_impl(br#"\x"#, &mut Default::default()).is_err());
+ }
+
+ #[test]
+ fn incomplete_quote() {
+ assert!(value_impl(br#"hello "world"#, &mut Default::default()).is_err());
+ }
+
+ #[test]
+ fn incomplete_escape() {
+ assert!(value_impl(br#"hello world\"#, &mut Default::default()).is_err());
+ }
+}
+
+mod key_value_pair {
+ use crate::parse::{
+ error::ParseNode,
+ section,
+ tests::util::{into_events, name_event, value_event, whitespace_event},
+ Event,
+ };
+
+ fn key_value<'a>(
+ i: &'a [u8],
+ node: &mut ParseNode,
+ events: &mut section::Events<'a>,
+ ) -> nom::IResult<&'a [u8], ()> {
+ super::key_value_pair(i, node, &mut |e| events.push(e)).map(|t| (t.0, ()))
+ }
+
+ #[test]
+ fn nonascii_is_allowed_for_values_but_not_for_keys() {
+ let mut node = ParseNode::SectionHeader;
+ let mut vec = Default::default();
+ assert!(key_value("你好".as_bytes(), &mut node, &mut vec).is_err());
+ assert!(key_value("a = 你好 ".as_bytes(), &mut node, &mut vec).is_ok());
+ assert_eq!(
+ vec,
+ into_events(vec![
+ name_event("a"),
+ whitespace_event(" "),
+ Event::KeyValueSeparator,
+ whitespace_event(" "),
+ value_event("你好")
+ ])
+ );
+ }
+
+ #[test]
+ fn whitespace_is_not_ambiguous() {
+ let mut node = ParseNode::SectionHeader;
+ let mut vec = Default::default();
+ assert!(key_value(b"a =b", &mut node, &mut vec).is_ok());
+ assert_eq!(
+ vec,
+ into_events(vec![
+ name_event("a"),
+ whitespace_event(" "),
+ Event::KeyValueSeparator,
+ value_event("b")
+ ])
+ );
+
+ let mut vec = Default::default();
+ assert!(key_value(b"a= b", &mut node, &mut vec).is_ok());
+ assert_eq!(
+ vec,
+ into_events(vec![
+ name_event("a"),
+ Event::KeyValueSeparator,
+ whitespace_event(" "),
+ value_event("b")
+ ])
+ );
+ }
+}
+
+mod comment {
+ use super::comment;
+ use crate::parse::tests::util::{comment as parsed_comment, fully_consumed};
+
+ #[test]
+ fn semicolon() {
+ assert_eq!(
+ comment(b"; this is a semicolon comment").unwrap(),
+ fully_consumed(parsed_comment(';', " this is a semicolon comment")),
+ );
+ }
+
+ #[test]
+ fn octothorpe() {
+ assert_eq!(
+ comment(b"# this is an octothorpe comment").unwrap(),
+ fully_consumed(parsed_comment('#', " this is an octothorpe comment")),
+ );
+ }
+
+ #[test]
+ fn multiple_markers() {
+ assert_eq!(
+ comment(b"###### this is an octothorpe comment").unwrap(),
+ fully_consumed(parsed_comment('#', "##### this is an octothorpe comment")),
+ );
+ }
+}