diff options
Diffstat (limited to 'vendor/toml_edit/src/parser')
-rw-r--r-- | vendor/toml_edit/src/parser/array.rs | 147 | ||||
-rw-r--r-- | vendor/toml_edit/src/parser/datetime.rs | 430 | ||||
-rw-r--r-- | vendor/toml_edit/src/parser/document.rs | 147 | ||||
-rw-r--r-- | vendor/toml_edit/src/parser/errors.rs | 482 | ||||
-rw-r--r-- | vendor/toml_edit/src/parser/inline_table.rs | 186 | ||||
-rw-r--r-- | vendor/toml_edit/src/parser/key.rs | 104 | ||||
-rw-r--r-- | vendor/toml_edit/src/parser/macros.rs | 13 | ||||
-rw-r--r-- | vendor/toml_edit/src/parser/mod.rs | 303 | ||||
-rw-r--r-- | vendor/toml_edit/src/parser/numbers.rs | 344 | ||||
-rw-r--r-- | vendor/toml_edit/src/parser/state.rs | 323 | ||||
-rw-r--r-- | vendor/toml_edit/src/parser/strings.rs | 456 | ||||
-rw-r--r-- | vendor/toml_edit/src/parser/table.rs | 89 | ||||
-rw-r--r-- | vendor/toml_edit/src/parser/trivia.rs | 150 | ||||
-rw-r--r-- | vendor/toml_edit/src/parser/value.rs | 157 |
14 files changed, 3331 insertions, 0 deletions
diff --git a/vendor/toml_edit/src/parser/array.rs b/vendor/toml_edit/src/parser/array.rs new file mode 100644 index 000000000..93d7873ec --- /dev/null +++ b/vendor/toml_edit/src/parser/array.rs @@ -0,0 +1,147 @@ +use nom8::combinator::cut; +use nom8::combinator::opt; +use nom8::multi::separated_list1; +use nom8::sequence::delimited; + +use crate::parser::trivia::ws_comment_newline; +use crate::parser::value::value; +use crate::{Array, Item, RawString, Value}; + +use crate::parser::prelude::*; + +// ;; Array + +// array = array-open array-values array-close +pub(crate) fn array( + check: RecursionCheck, +) -> impl FnMut(Input<'_>) -> IResult<Input<'_>, Array, ParserError<'_>> { + move |input| { + delimited( + ARRAY_OPEN, + cut(array_values(check)), + cut(ARRAY_CLOSE) + .context(Context::Expression("array")) + .context(Context::Expected(ParserValue::CharLiteral(']'))), + ) + .parse(input) + } +} + +// note: we're omitting ws and newlines here, because +// they should be part of the formatted values +// array-open = %x5B ws-newline ; [ +pub(crate) const ARRAY_OPEN: u8 = b'['; +// array-close = ws-newline %x5D ; ] +const ARRAY_CLOSE: u8 = b']'; +// array-sep = ws %x2C ws ; , Comma +const ARRAY_SEP: u8 = b','; + +// note: this rule is modified +// array-values = [ ( array-value array-sep array-values ) / +// array-value / ws-comment-newline ] +pub(crate) fn array_values( + check: RecursionCheck, +) -> impl FnMut(Input<'_>) -> IResult<Input<'_>, Array, ParserError<'_>> { + move |input| { + let check = check.recursing(input)?; + ( + opt(( + separated_list1(ARRAY_SEP, array_value(check)), + opt(ARRAY_SEP), + ) + .map(|(v, trailing): (Vec<Value>, Option<u8>)| { + ( + Array::with_vec(v.into_iter().map(Item::Value).collect()), + trailing.is_some(), + ) + })), + ws_comment_newline.span(), + ) + .map_res::<_, _, std::str::Utf8Error>(|(array, trailing)| { + let (mut array, comma) = array.unwrap_or_default(); + array.set_trailing_comma(comma); + array.set_trailing(RawString::with_span(trailing)); + Ok(array) + }) + .parse(input) + } +} + +pub(crate) fn array_value( + check: RecursionCheck, +) -> impl FnMut(Input<'_>) -> IResult<Input<'_>, Value, ParserError<'_>> { + move |input| { + ( + ws_comment_newline.span(), + value(check), + ws_comment_newline.span(), + ) + .map(|(ws1, v, ws2)| v.decorated(RawString::with_span(ws1), RawString::with_span(ws2))) + .parse(input) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn arrays() { + let inputs = [ + r#"[]"#, + r#"[ ]"#, + r#"[ + 1, 2, 3 +]"#, + r#"[ + 1, + 2, # this is ok +]"#, + r#"[# comment +# comment2 + + + ]"#, + r#"[# comment +# comment2 + 1 + +#sd +, +# comment3 + + ]"#, + r#"[1]"#, + r#"[1,]"#, + r#"[ "all", 'strings', """are the same""", '''type''']"#, + r#"[ 100, -2,]"#, + r#"[1, 2, 3]"#, + r#"[1.1, 2.1, 3.1]"#, + r#"["a", "b", "c"]"#, + r#"[ [ 1, 2 ], [3, 4, 5] ]"#, + r#"[ [ 1, 2 ], ["a", "b", "c"] ]"#, + r#"[ { x = 1, a = "2" }, {a = "a",b = "b", c = "c"} ]"#, + ]; + for input in inputs { + dbg!(input); + let mut parsed = array(Default::default()).parse(new_input(input)).finish(); + if let Ok(parsed) = &mut parsed { + parsed.despan(input); + } + assert_eq!(parsed.map(|a| a.to_string()), Ok(input.to_owned())); + } + } + + #[test] + fn invalid_arrays() { + let invalid_inputs = [r#"["#, r#"[,]"#, r#"[,2]"#, r#"[1e165,,]"#]; + for input in invalid_inputs { + dbg!(input); + let mut parsed = array(Default::default()).parse(new_input(input)).finish(); + if let Ok(parsed) = &mut parsed { + parsed.despan(input); + } + assert!(parsed.is_err()); + } + } +} diff --git a/vendor/toml_edit/src/parser/datetime.rs b/vendor/toml_edit/src/parser/datetime.rs new file mode 100644 index 000000000..63a761655 --- /dev/null +++ b/vendor/toml_edit/src/parser/datetime.rs @@ -0,0 +1,430 @@ +use std::ops::RangeInclusive; + +use crate::parser::errors::CustomError; +use crate::parser::prelude::*; +use crate::parser::trivia::from_utf8_unchecked; + +use nom8::branch::alt; +use nom8::bytes::one_of; +use nom8::bytes::take_while_m_n; +use nom8::combinator::cut; +use nom8::combinator::opt; +use nom8::sequence::preceded; +use toml_datetime::*; + +// ;; Date and Time (as defined in RFC 3339) + +// date-time = offset-date-time / local-date-time / local-date / local-time +// offset-date-time = full-date time-delim full-time +// local-date-time = full-date time-delim partial-time +// local-date = full-date +// local-time = partial-time +// full-time = partial-time time-offset +pub(crate) fn date_time(input: Input<'_>) -> IResult<Input<'_>, Datetime, ParserError<'_>> { + alt(( + (full_date, opt((time_delim, partial_time, opt(time_offset)))) + .map(|(date, opt)| { + match opt { + // Offset Date-Time + Some((_, time, offset)) => Datetime { + date: Some(date), + time: Some(time), + offset, + }, + // Local Date + None => Datetime { + date: Some(date), + time: None, + offset: None, + }, + } + }) + .context(Context::Expression("date-time")), + partial_time + .map(|t| t.into()) + .context(Context::Expression("time")), + )) + .parse(input) +} + +// full-date = date-fullyear "-" date-month "-" date-mday +pub(crate) fn full_date(input: Input<'_>) -> IResult<Input<'_>, Date, ParserError<'_>> { + (date_fullyear, b'-', cut((date_month, b'-', date_mday))) + .map(|(year, _, (month, _, day))| Date { year, month, day }) + .parse(input) +} + +// partial-time = time-hour ":" time-minute ":" time-second [time-secfrac] +pub(crate) fn partial_time(input: Input<'_>) -> IResult<Input<'_>, Time, ParserError<'_>> { + ( + time_hour, + b':', + cut((time_minute, b':', time_second, opt(time_secfrac))), + ) + .map(|(hour, _, (minute, _, second, nanosecond))| Time { + hour, + minute, + second, + nanosecond: nanosecond.unwrap_or_default(), + }) + .parse(input) +} + +// time-offset = "Z" / time-numoffset +// time-numoffset = ( "+" / "-" ) time-hour ":" time-minute +pub(crate) fn time_offset(input: Input<'_>) -> IResult<Input<'_>, Offset, ParserError<'_>> { + alt(( + one_of((b'Z', b'z')).value(Offset::Z), + (one_of((b'+', b'-')), cut((time_hour, b':', time_minute))) + .map(|(sign, (hours, _, minutes))| { + let sign = match sign { + b'+' => 1, + b'-' => -1, + _ => unreachable!("Parser prevents this"), + }; + sign * (hours as i16 * 60 + minutes as i16) + }) + .verify(|minutes| ((-24 * 60)..=(24 * 60)).contains(minutes)) + .map(|minutes| Offset::Custom { minutes }), + )) + .context(Context::Expression("time offset")) + .parse(input) +} + +// date-fullyear = 4DIGIT +pub(crate) fn date_fullyear(input: Input<'_>) -> IResult<Input<'_>, u16, ParserError<'_>> { + unsigned_digits::<4, 4> + .map(|s: &str| s.parse::<u16>().expect("4DIGIT should match u8")) + .parse(input) +} + +// date-month = 2DIGIT ; 01-12 +pub(crate) fn date_month(input: Input<'_>) -> IResult<Input<'_>, u8, ParserError<'_>> { + unsigned_digits::<2, 2> + .map_res(|s: &str| { + let d = s.parse::<u8>().expect("2DIGIT should match u8"); + if (1..=12).contains(&d) { + Ok(d) + } else { + Err(CustomError::OutOfRange) + } + }) + .parse(input) +} + +// date-mday = 2DIGIT ; 01-28, 01-29, 01-30, 01-31 based on month/year +pub(crate) fn date_mday(input: Input<'_>) -> IResult<Input<'_>, u8, ParserError<'_>> { + unsigned_digits::<2, 2> + .map_res(|s: &str| { + let d = s.parse::<u8>().expect("2DIGIT should match u8"); + if (1..=31).contains(&d) { + Ok(d) + } else { + Err(CustomError::OutOfRange) + } + }) + .parse(input) +} + +// time-delim = "T" / %x20 ; T, t, or space +pub(crate) fn time_delim(input: Input<'_>) -> IResult<Input<'_>, u8, ParserError<'_>> { + one_of(TIME_DELIM).parse(input) +} + +const TIME_DELIM: (u8, u8, u8) = (b'T', b't', b' '); + +// time-hour = 2DIGIT ; 00-23 +pub(crate) fn time_hour(input: Input<'_>) -> IResult<Input<'_>, u8, ParserError<'_>> { + unsigned_digits::<2, 2> + .map_res(|s: &str| { + let d = s.parse::<u8>().expect("2DIGIT should match u8"); + if (0..=23).contains(&d) { + Ok(d) + } else { + Err(CustomError::OutOfRange) + } + }) + .parse(input) +} + +// time-minute = 2DIGIT ; 00-59 +pub(crate) fn time_minute(input: Input<'_>) -> IResult<Input<'_>, u8, ParserError<'_>> { + unsigned_digits::<2, 2> + .map_res(|s: &str| { + let d = s.parse::<u8>().expect("2DIGIT should match u8"); + if (0..=59).contains(&d) { + Ok(d) + } else { + Err(CustomError::OutOfRange) + } + }) + .parse(input) +} + +// time-second = 2DIGIT ; 00-58, 00-59, 00-60 based on leap second rules +pub(crate) fn time_second(input: Input<'_>) -> IResult<Input<'_>, u8, ParserError<'_>> { + unsigned_digits::<2, 2> + .map_res(|s: &str| { + let d = s.parse::<u8>().expect("2DIGIT should match u8"); + if (0..=60).contains(&d) { + Ok(d) + } else { + Err(CustomError::OutOfRange) + } + }) + .parse(input) +} + +// time-secfrac = "." 1*DIGIT +pub(crate) fn time_secfrac(input: Input<'_>) -> IResult<Input<'_>, u32, ParserError<'_>> { + static SCALE: [u32; 10] = [ + 0, + 100_000_000, + 10_000_000, + 1_000_000, + 100_000, + 10_000, + 1_000, + 100, + 10, + 1, + ]; + const INF: usize = usize::MAX; + preceded(b'.', unsigned_digits::<1, INF>) + .map_res(|mut repr: &str| -> Result<u32, CustomError> { + let max_digits = SCALE.len() - 1; + if max_digits < repr.len() { + // Millisecond precision is required. Further precision of fractional seconds is + // implementation-specific. If the value contains greater precision than the + // implementation can support, the additional precision must be truncated, not rounded. + repr = &repr[0..max_digits]; + } + + let v = repr.parse::<u32>().map_err(|_| CustomError::OutOfRange)?; + let num_digits = repr.len(); + + // scale the number accordingly. + let scale = SCALE.get(num_digits).ok_or(CustomError::OutOfRange)?; + let v = v.checked_mul(*scale).ok_or(CustomError::OutOfRange)?; + Ok(v) + }) + .parse(input) +} + +pub(crate) fn unsigned_digits<const MIN: usize, const MAX: usize>( + input: Input<'_>, +) -> IResult<Input<'_>, &str, ParserError<'_>> { + take_while_m_n(MIN, MAX, DIGIT) + .map(|b: &[u8]| unsafe { from_utf8_unchecked(b, "`is_ascii_digit` filters out on-ASCII") }) + .parse(input) +} + +// DIGIT = %x30-39 ; 0-9 +const DIGIT: RangeInclusive<u8> = b'0'..=b'9'; + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn offset_date_time() { + let inputs = [ + ( + "1979-05-27T07:32:00Z", + Datetime { + date: Some(Date { + year: 1979, + month: 5, + day: 27, + }), + time: Some(Time { + hour: 7, + minute: 32, + second: 0, + nanosecond: 0, + }), + offset: Some(Offset::Z), + }, + ), + ( + "1979-05-27T00:32:00-07:00", + Datetime { + date: Some(Date { + year: 1979, + month: 5, + day: 27, + }), + time: Some(Time { + hour: 0, + minute: 32, + second: 0, + nanosecond: 0, + }), + offset: Some(Offset::Custom { minutes: -7 * 60 }), + }, + ), + ( + "1979-05-27T00:32:00-00:36", + Datetime { + date: Some(Date { + year: 1979, + month: 5, + day: 27, + }), + time: Some(Time { + hour: 0, + minute: 32, + second: 0, + nanosecond: 0, + }), + offset: Some(Offset::Custom { minutes: -36 }), + }, + ), + ( + "1979-05-27T00:32:00.999999", + Datetime { + date: Some(Date { + year: 1979, + month: 5, + day: 27, + }), + time: Some(Time { + hour: 0, + minute: 32, + second: 0, + nanosecond: 999999000, + }), + offset: None, + }, + ), + ]; + for (input, expected) in inputs { + dbg!(input); + let actual = date_time.parse(new_input(input)).finish().unwrap(); + assert_eq!(expected, actual); + } + } + + #[test] + fn local_date_time() { + let inputs = [ + ( + "1979-05-27T07:32:00", + Datetime { + date: Some(Date { + year: 1979, + month: 5, + day: 27, + }), + time: Some(Time { + hour: 7, + minute: 32, + second: 0, + nanosecond: 0, + }), + offset: None, + }, + ), + ( + "1979-05-27T00:32:00.999999", + Datetime { + date: Some(Date { + year: 1979, + month: 5, + day: 27, + }), + time: Some(Time { + hour: 0, + minute: 32, + second: 0, + nanosecond: 999999000, + }), + offset: None, + }, + ), + ]; + for (input, expected) in inputs { + dbg!(input); + let actual = date_time.parse(new_input(input)).finish().unwrap(); + assert_eq!(expected, actual); + } + } + + #[test] + fn local_date() { + let inputs = [ + ( + "1979-05-27", + Datetime { + date: Some(Date { + year: 1979, + month: 5, + day: 27, + }), + time: None, + offset: None, + }, + ), + ( + "2017-07-20", + Datetime { + date: Some(Date { + year: 2017, + month: 7, + day: 20, + }), + time: None, + offset: None, + }, + ), + ]; + for (input, expected) in inputs { + dbg!(input); + let actual = date_time.parse(new_input(input)).finish().unwrap(); + assert_eq!(expected, actual); + } + } + + #[test] + fn local_time() { + let inputs = [ + ( + "07:32:00", + Datetime { + date: None, + time: Some(Time { + hour: 7, + minute: 32, + second: 0, + nanosecond: 0, + }), + offset: None, + }, + ), + ( + "00:32:00.999999", + Datetime { + date: None, + time: Some(Time { + hour: 0, + minute: 32, + second: 0, + nanosecond: 999999000, + }), + offset: None, + }, + ), + ]; + for (input, expected) in inputs { + dbg!(input); + let actual = date_time.parse(new_input(input)).finish().unwrap(); + assert_eq!(expected, actual); + } + } + + #[test] + fn time_fraction_truncated() { + let input = "1987-07-05T17:45:00.123456789012345Z"; + date_time.parse(new_input(input)).finish().unwrap(); + } +} diff --git a/vendor/toml_edit/src/parser/document.rs b/vendor/toml_edit/src/parser/document.rs new file mode 100644 index 000000000..38e9e0eee --- /dev/null +++ b/vendor/toml_edit/src/parser/document.rs @@ -0,0 +1,147 @@ +use std::cell::RefCell; + +use nom8::bytes::any; +use nom8::bytes::one_of; +use nom8::combinator::cut; +use nom8::combinator::eof; +use nom8::combinator::opt; +use nom8::combinator::peek; +use nom8::error::FromExternalError; +use nom8::multi::many0_count; + +use crate::document::Document; +use crate::key::Key; +use crate::parser::inline_table::KEYVAL_SEP; +use crate::parser::key::key; +use crate::parser::prelude::*; +use crate::parser::state::ParseState; +use crate::parser::table::table; +use crate::parser::trivia::{comment, line_ending, line_trailing, newline, ws}; +use crate::parser::value::value; +use crate::table::TableKeyValue; +use crate::Item; +use crate::RawString; + +// ;; TOML + +// toml = expression *( newline expression ) + +// expression = ( ( ws comment ) / +// ( ws keyval ws [ comment ] ) / +// ( ws table ws [ comment ] ) / +// ws ) +pub(crate) fn document(input: Input<'_>) -> IResult<Input<'_>, Document, ParserError<'_>> { + let state = RefCell::new(ParseState::default()); + let state_ref = &state; + + let (i, _o) = ( + // Remove BOM if present + opt(b"\xEF\xBB\xBF"), + parse_ws(state_ref), + many0_count(( + dispatch! {peek(any); + crate::parser::trivia::COMMENT_START_SYMBOL => cut(parse_comment(state_ref)), + crate::parser::table::STD_TABLE_OPEN => cut(table(state_ref)), + crate::parser::trivia::LF | + crate::parser::trivia::CR => parse_newline(state_ref), + _ => cut(keyval(state_ref)), + }, + parse_ws(state_ref), + )), + eof, + ) + .parse(input)?; + state + .into_inner() + .into_document() + .map(|document| (i, document)) + .map_err(|err| { + nom8::Err::Error(ParserError::from_external_error( + i, + nom8::error::ErrorKind::MapRes, + err, + )) + }) +} + +pub(crate) fn parse_comment<'s, 'i>( + state: &'s RefCell<ParseState>, +) -> impl FnMut(Input<'i>) -> IResult<Input<'i>, (), ParserError<'_>> + 's { + move |i| { + (comment, line_ending) + .span() + .map(|span| { + state.borrow_mut().on_comment(span); + }) + .parse(i) + } +} + +pub(crate) fn parse_ws<'s, 'i>( + state: &'s RefCell<ParseState>, +) -> impl FnMut(Input<'i>) -> IResult<Input<'i>, (), ParserError<'i>> + 's { + move |i| { + ws.span() + .map(|span| state.borrow_mut().on_ws(span)) + .parse(i) + } +} + +pub(crate) fn parse_newline<'s, 'i>( + state: &'s RefCell<ParseState>, +) -> impl FnMut(Input<'i>) -> IResult<Input<'i>, (), ParserError<'i>> + 's { + move |i| { + newline + .span() + .map(|span| state.borrow_mut().on_ws(span)) + .parse(i) + } +} + +pub(crate) fn keyval<'s, 'i>( + state: &'s RefCell<ParseState>, +) -> impl FnMut(Input<'i>) -> IResult<Input<'i>, (), ParserError<'i>> + 's { + move |i| { + parse_keyval + .map_res(|(p, kv)| state.borrow_mut().on_keyval(p, kv)) + .parse(i) + } +} + +// keyval = key keyval-sep val +pub(crate) fn parse_keyval( + input: Input<'_>, +) -> IResult<Input<'_>, (Vec<Key>, TableKeyValue), ParserError<'_>> { + ( + key, + cut(( + one_of(KEYVAL_SEP) + .context(Context::Expected(ParserValue::CharLiteral('.'))) + .context(Context::Expected(ParserValue::CharLiteral('='))), + ( + ws.span(), + value(RecursionCheck::default()), + line_trailing + .context(Context::Expected(ParserValue::CharLiteral('\n'))) + .context(Context::Expected(ParserValue::CharLiteral('#'))), + ), + )), + ) + .map_res::<_, _, std::str::Utf8Error>(|(key, (_, v))| { + let mut path = key; + let key = path.pop().expect("grammar ensures at least 1"); + + let (pre, v, suf) = v; + let pre = RawString::with_span(pre); + let suf = RawString::with_span(suf); + let v = v.decorated(pre, suf); + Ok(( + path, + TableKeyValue { + key, + value: Item::Value(v), + }, + )) + }) + .parse(input) +} diff --git a/vendor/toml_edit/src/parser/errors.rs b/vendor/toml_edit/src/parser/errors.rs new file mode 100644 index 000000000..0baf6bdd4 --- /dev/null +++ b/vendor/toml_edit/src/parser/errors.rs @@ -0,0 +1,482 @@ +use std::error::Error as StdError; +use std::fmt::{Display, Formatter, Result}; + +use crate::parser::prelude::*; +use crate::Key; + +/// Type representing a TOML parse error +#[derive(Debug, Clone, Eq, PartialEq, Hash)] +pub struct TomlError { + message: String, + original: Option<String>, + keys: Vec<String>, + span: Option<std::ops::Range<usize>>, +} + +impl TomlError { + pub(crate) fn new(error: ParserError<'_>, original: Input<'_>) -> Self { + use nom8::input::IntoOutput; + use nom8::input::Offset; + + let offset = original.offset(&error.input); + let span = if offset == original.len() { + offset..offset + } else { + offset..(offset + 1) + }; + + let message = error.to_string(); + + Self { + message, + original: Some( + String::from_utf8(original.into_output().to_owned()) + .expect("original document was utf8"), + ), + keys: Vec::new(), + span: Some(span), + } + } + + #[cfg(feature = "serde")] + pub(crate) fn custom(message: String, span: Option<std::ops::Range<usize>>) -> Self { + Self { + message, + original: None, + keys: Vec::new(), + span, + } + } + + #[cfg(feature = "serde")] + pub(crate) fn add_key(&mut self, key: String) { + self.keys.insert(0, key); + } + + /// What went wrong + pub fn message(&self) -> &str { + &self.message + } + + /// The start/end index into the original document where the error occurred + pub fn span(&self) -> Option<std::ops::Range<usize>> { + self.span.clone() + } + + #[cfg(feature = "serde")] + pub(crate) fn set_span(&mut self, span: Option<std::ops::Range<usize>>) { + self.span = span; + } + + #[cfg(feature = "serde")] + pub(crate) fn set_original(&mut self, original: Option<String>) { + self.original = original; + } +} + +/// Displays a TOML parse error +/// +/// # Example +/// +/// TOML parse error at line 1, column 10 +/// | +/// 1 | 00:32:00.a999999 +/// | ^ +/// Unexpected `a` +/// Expected `digit` +/// While parsing a Time +/// While parsing a Date-Time +impl Display for TomlError { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + let mut context = false; + if let (Some(original), Some(span)) = (&self.original, self.span()) { + context = true; + + let (line, column) = translate_position(original.as_bytes(), span.start); + let line_num = line + 1; + let col_num = column + 1; + let gutter = line_num.to_string().len(); + let content = original.split('\n').nth(line).expect("valid line number"); + + writeln!( + f, + "TOML parse error at line {}, column {}", + line_num, col_num + )?; + // | + for _ in 0..=gutter { + write!(f, " ")?; + } + writeln!(f, "|")?; + + // 1 | 00:32:00.a999999 + write!(f, "{} | ", line_num)?; + writeln!(f, "{}", content)?; + + // | ^ + for _ in 0..=gutter { + write!(f, " ")?; + } + write!(f, "|")?; + for _ in 0..=column { + write!(f, " ")?; + } + // The span will be empty at eof, so we need to make sure we always print at least + // one `^` + write!(f, "^")?; + for _ in (span.start + 1)..(span.end.min(span.start + content.len())) { + write!(f, "^")?; + } + writeln!(f)?; + } + writeln!(f, "{}", self.message)?; + if !context && !self.keys.is_empty() { + writeln!(f, "in `{}`", self.keys.join("."))?; + } + + Ok(()) + } +} + +impl StdError for TomlError { + fn description(&self) -> &'static str { + "TOML parse error" + } +} + +#[derive(Debug)] +pub(crate) struct ParserError<'b> { + input: Input<'b>, + context: Vec<Context>, + cause: Option<Box<dyn std::error::Error + Send + Sync + 'static>>, +} + +impl<'b> nom8::error::ParseError<Input<'b>> for ParserError<'b> { + fn from_error_kind(input: Input<'b>, _kind: nom8::error::ErrorKind) -> Self { + Self { + input, + context: Default::default(), + cause: Default::default(), + } + } + + fn append(_input: Input<'b>, _kind: nom8::error::ErrorKind, other: Self) -> Self { + other + } + + fn from_char(_input: Input<'b>, _: char) -> Self { + unimplemented!("this shouldn't be called with a binary parser") + } + + fn or(self, other: Self) -> Self { + other + } +} + +impl<'b> nom8::error::ParseError<&'b str> for ParserError<'b> { + fn from_error_kind(input: &'b str, _kind: nom8::error::ErrorKind) -> Self { + Self { + input: Input::new(input.as_bytes()), + context: Default::default(), + cause: Default::default(), + } + } + + fn append(_input: &'b str, _kind: nom8::error::ErrorKind, other: Self) -> Self { + other + } + + fn from_char(_input: &'b str, _: char) -> Self { + unimplemented!("this shouldn't be called with a binary parser") + } + + fn or(self, other: Self) -> Self { + other + } +} + +impl<'b> nom8::error::ContextError<Input<'b>, Context> for ParserError<'b> { + fn add_context(_input: Input<'b>, ctx: Context, mut other: Self) -> Self { + other.context.push(ctx); + other + } +} + +impl<'b, E: std::error::Error + Send + Sync + 'static> nom8::error::FromExternalError<Input<'b>, E> + for ParserError<'b> +{ + fn from_external_error(input: Input<'b>, _kind: nom8::error::ErrorKind, e: E) -> Self { + Self { + input, + context: Default::default(), + cause: Some(Box::new(e)), + } + } +} + +impl<'b, E: std::error::Error + Send + Sync + 'static> nom8::error::FromExternalError<&'b str, E> + for ParserError<'b> +{ + fn from_external_error(input: &'b str, _kind: nom8::error::ErrorKind, e: E) -> Self { + Self { + input: Input::new(input.as_bytes()), + context: Default::default(), + cause: Some(Box::new(e)), + } + } +} + +// For tests +impl<'b> std::cmp::PartialEq for ParserError<'b> { + fn eq(&self, other: &Self) -> bool { + self.input == other.input + && self.context == other.context + && self.cause.as_ref().map(ToString::to_string) + == other.cause.as_ref().map(ToString::to_string) + } +} + +impl<'a> std::fmt::Display for ParserError<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let expression = self.context.iter().find_map(|c| match c { + Context::Expression(c) => Some(c), + _ => None, + }); + let expected = self + .context + .iter() + .filter_map(|c| match c { + Context::Expected(c) => Some(c), + _ => None, + }) + .collect::<Vec<_>>(); + + let mut newline = false; + + if let Some(expression) = expression { + newline = true; + + write!(f, "invalid {}", expression)?; + } + + if !expected.is_empty() { + if newline { + writeln!(f)?; + } + newline = true; + + write!(f, "expected ")?; + for (i, expected) in expected.iter().enumerate() { + if i != 0 { + write!(f, ", ")?; + } + write!(f, "{}", expected)?; + } + } + if let Some(cause) = &self.cause { + if newline { + writeln!(f)?; + } + write!(f, "{}", cause)?; + } + + Ok(()) + } +} + +#[derive(Copy, Clone, Debug, PartialEq)] +pub(crate) enum Context { + Expression(&'static str), + Expected(ParserValue), +} + +#[derive(Copy, Clone, Debug, PartialEq)] +pub(crate) enum ParserValue { + CharLiteral(char), + StringLiteral(&'static str), + Description(&'static str), +} + +impl std::fmt::Display for ParserValue { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ParserValue::CharLiteral('\n') => "newline".fmt(f), + ParserValue::CharLiteral('`') => "'`'".fmt(f), + ParserValue::CharLiteral(c) if c.is_ascii_control() => { + write!(f, "`{}`", c.escape_debug()) + } + ParserValue::CharLiteral(c) => write!(f, "`{}`", c), + ParserValue::StringLiteral(c) => write!(f, "`{}`", c), + ParserValue::Description(c) => write!(f, "{}", c), + } + } +} + +fn translate_position(input: &[u8], index: usize) -> (usize, usize) { + if input.is_empty() { + return (0, index); + } + + let safe_index = index.min(input.len() - 1); + let column_offset = index - safe_index; + let index = safe_index; + + let nl = input[0..index] + .iter() + .rev() + .enumerate() + .find(|(_, b)| **b == b'\n') + .map(|(nl, _)| index - nl - 1); + let line_start = match nl { + Some(nl) => nl + 1, + None => 0, + }; + let line = input[0..line_start].iter().filter(|b| **b == b'\n').count(); + let line = line; + + let column = std::str::from_utf8(&input[line_start..=index]) + .map(|s| s.chars().count() - 1) + .unwrap_or_else(|_| index - line_start); + let column = column + column_offset; + + (line, column) +} + +#[cfg(test)] +mod test_translate_position { + use super::*; + + #[test] + fn empty() { + let input = b""; + let index = 0; + let position = translate_position(&input[..], index); + assert_eq!(position, (0, 0)); + } + + #[test] + fn start() { + let input = b"Hello"; + let index = 0; + let position = translate_position(&input[..], index); + assert_eq!(position, (0, 0)); + } + + #[test] + fn end() { + let input = b"Hello"; + let index = input.len() - 1; + let position = translate_position(&input[..], index); + assert_eq!(position, (0, input.len() - 1)); + } + + #[test] + fn after() { + let input = b"Hello"; + let index = input.len(); + let position = translate_position(&input[..], index); + assert_eq!(position, (0, input.len())); + } + + #[test] + fn first_line() { + let input = b"Hello\nWorld\n"; + let index = 2; + let position = translate_position(&input[..], index); + assert_eq!(position, (0, 2)); + } + + #[test] + fn end_of_line() { + let input = b"Hello\nWorld\n"; + let index = 5; + let position = translate_position(&input[..], index); + assert_eq!(position, (0, 5)); + } + + #[test] + fn start_of_second_line() { + let input = b"Hello\nWorld\n"; + let index = 6; + let position = translate_position(&input[..], index); + assert_eq!(position, (1, 0)); + } + + #[test] + fn second_line() { + let input = b"Hello\nWorld\n"; + let index = 8; + let position = translate_position(&input[..], index); + assert_eq!(position, (1, 2)); + } +} + +#[derive(Debug, Clone)] +pub(crate) enum CustomError { + DuplicateKey { + key: String, + table: Option<Vec<Key>>, + }, + DottedKeyExtendWrongType { + key: Vec<Key>, + actual: &'static str, + }, + OutOfRange, + #[cfg_attr(feature = "unbounded", allow(dead_code))] + RecursionLimitExceeded, +} + +impl CustomError { + pub(crate) fn duplicate_key(path: &[Key], i: usize) -> Self { + assert!(i < path.len()); + let key = &path[i]; + let repr = key.display_repr(); + Self::DuplicateKey { + key: repr.into(), + table: Some(path[..i].to_vec()), + } + } + + pub(crate) fn extend_wrong_type(path: &[Key], i: usize, actual: &'static str) -> Self { + assert!(i < path.len()); + Self::DottedKeyExtendWrongType { + key: path[..=i].to_vec(), + actual, + } + } +} + +impl StdError for CustomError { + fn description(&self) -> &'static str { + "TOML parse error" + } +} + +impl Display for CustomError { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + match self { + CustomError::DuplicateKey { key, table } => { + if let Some(table) = table { + if table.is_empty() { + write!(f, "duplicate key `{}` in document root", key) + } else { + let path = table.iter().map(|k| k.get()).collect::<Vec<_>>().join("."); + write!(f, "duplicate key `{}` in table `{}`", key, path) + } + } else { + write!(f, "duplicate key `{}`", key) + } + } + CustomError::DottedKeyExtendWrongType { key, actual } => { + let path = key.iter().map(|k| k.get()).collect::<Vec<_>>().join("."); + write!( + f, + "dotted key `{}` attempted to extend non-table type ({})", + path, actual + ) + } + CustomError::OutOfRange => write!(f, "value is out of range"), + CustomError::RecursionLimitExceeded => write!(f, "recursion limit exceded"), + } + } +} diff --git a/vendor/toml_edit/src/parser/inline_table.rs b/vendor/toml_edit/src/parser/inline_table.rs new file mode 100644 index 000000000..e8d38f4a9 --- /dev/null +++ b/vendor/toml_edit/src/parser/inline_table.rs @@ -0,0 +1,186 @@ +use nom8::bytes::one_of; +use nom8::combinator::cut; +use nom8::multi::separated_list0; +use nom8::sequence::delimited; + +use crate::key::Key; +use crate::parser::errors::CustomError; +use crate::parser::key::key; +use crate::parser::prelude::*; +use crate::parser::trivia::ws; +use crate::parser::value::value; +use crate::table::TableKeyValue; +use crate::{InlineTable, InternalString, Item, RawString, Value}; + +use indexmap::map::Entry; + +// ;; Inline Table + +// inline-table = inline-table-open inline-table-keyvals inline-table-close +pub(crate) fn inline_table( + check: RecursionCheck, +) -> impl FnMut(Input<'_>) -> IResult<Input<'_>, InlineTable, ParserError<'_>> { + move |input| { + delimited( + INLINE_TABLE_OPEN, + cut(inline_table_keyvals(check).map_res(|(kv, p)| table_from_pairs(kv, p))), + cut(INLINE_TABLE_CLOSE) + .context(Context::Expression("inline table")) + .context(Context::Expected(ParserValue::CharLiteral('}'))), + ) + .parse(input) + } +} + +fn table_from_pairs( + v: Vec<(Vec<Key>, TableKeyValue)>, + preamble: RawString, +) -> Result<InlineTable, CustomError> { + let mut root = InlineTable::new(); + root.set_preamble(preamble); + // Assuming almost all pairs will be directly in `root` + root.items.reserve(v.len()); + + for (path, kv) in v { + let table = descend_path(&mut root, &path)?; + let key: InternalString = kv.key.get_internal().into(); + match table.items.entry(key) { + Entry::Vacant(o) => { + o.insert(kv); + } + Entry::Occupied(o) => { + return Err(CustomError::DuplicateKey { + key: o.key().as_str().into(), + table: None, + }); + } + } + } + Ok(root) +} + +fn descend_path<'a>( + mut table: &'a mut InlineTable, + path: &'a [Key], +) -> Result<&'a mut InlineTable, CustomError> { + for (i, key) in path.iter().enumerate() { + let entry = table.entry_format(key).or_insert_with(|| { + let mut new_table = InlineTable::new(); + new_table.set_dotted(true); + + Value::InlineTable(new_table) + }); + match *entry { + Value::InlineTable(ref mut sweet_child_of_mine) => { + table = sweet_child_of_mine; + } + ref v => { + return Err(CustomError::extend_wrong_type(path, i, v.type_name())); + } + } + } + Ok(table) +} + +// inline-table-open = %x7B ws ; { +pub(crate) const INLINE_TABLE_OPEN: u8 = b'{'; +// inline-table-close = ws %x7D ; } +const INLINE_TABLE_CLOSE: u8 = b'}'; +// inline-table-sep = ws %x2C ws ; , Comma +const INLINE_TABLE_SEP: u8 = b','; +// keyval-sep = ws %x3D ws ; = +pub(crate) const KEYVAL_SEP: u8 = b'='; + +// inline-table-keyvals = [ inline-table-keyvals-non-empty ] +// inline-table-keyvals-non-empty = +// ( key keyval-sep val inline-table-sep inline-table-keyvals-non-empty ) / +// ( key keyval-sep val ) + +fn inline_table_keyvals( + check: RecursionCheck, +) -> impl FnMut( + Input<'_>, +) -> IResult<Input<'_>, (Vec<(Vec<Key>, TableKeyValue)>, RawString), ParserError<'_>> { + move |input| { + let check = check.recursing(input)?; + ( + separated_list0(INLINE_TABLE_SEP, keyval(check)), + ws.span().map(RawString::with_span), + ) + .parse(input) + } +} + +fn keyval( + check: RecursionCheck, +) -> impl FnMut(Input<'_>) -> IResult<Input<'_>, (Vec<Key>, TableKeyValue), ParserError<'_>> { + move |input| { + ( + key, + cut(( + one_of(KEYVAL_SEP) + .context(Context::Expected(ParserValue::CharLiteral('.'))) + .context(Context::Expected(ParserValue::CharLiteral('='))), + (ws.span(), value(check), ws.span()), + )), + ) + .map(|(key, (_, v))| { + let mut path = key; + let key = path.pop().expect("grammar ensures at least 1"); + + let (pre, v, suf) = v; + let pre = RawString::with_span(pre); + let suf = RawString::with_span(suf); + let v = v.decorated(pre, suf); + ( + path, + TableKeyValue { + key, + value: Item::Value(v), + }, + ) + }) + .parse(input) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn inline_tables() { + let inputs = [ + r#"{}"#, + r#"{ }"#, + r#"{a = 1e165}"#, + r#"{ hello = "world", a = 1}"#, + r#"{ hello.world = "a" }"#, + ]; + for input in inputs { + dbg!(input); + let mut parsed = inline_table(Default::default()) + .parse(new_input(input)) + .finish(); + if let Ok(parsed) = &mut parsed { + parsed.despan(input); + } + assert_eq!(parsed.map(|a| a.to_string()), Ok(input.to_owned())); + } + } + + #[test] + fn invalid_inline_tables() { + let invalid_inputs = [r#"{a = 1e165"#, r#"{ hello = "world", a = 2, hello = 1}"#]; + for input in invalid_inputs { + dbg!(input); + let mut parsed = inline_table(Default::default()) + .parse(new_input(input)) + .finish(); + if let Ok(parsed) = &mut parsed { + parsed.despan(input); + } + assert!(parsed.is_err()); + } + } +} diff --git a/vendor/toml_edit/src/parser/key.rs b/vendor/toml_edit/src/parser/key.rs new file mode 100644 index 000000000..82424916f --- /dev/null +++ b/vendor/toml_edit/src/parser/key.rs @@ -0,0 +1,104 @@ +use std::ops::RangeInclusive; + +use nom8::bytes::any; +use nom8::bytes::take_while1; +use nom8::combinator::peek; +use nom8::multi::separated_list1; + +use crate::key::Key; +use crate::parser::errors::CustomError; +use crate::parser::prelude::*; +use crate::parser::strings::{basic_string, literal_string}; +use crate::parser::trivia::{from_utf8_unchecked, ws}; +use crate::repr::{Decor, Repr}; +use crate::InternalString; +use crate::RawString; + +// key = simple-key / dotted-key +// dotted-key = simple-key 1*( dot-sep simple-key ) +pub(crate) fn key(input: Input<'_>) -> IResult<Input<'_>, Vec<Key>, ParserError<'_>> { + separated_list1( + DOT_SEP, + (ws.span(), simple_key, ws.span()).map(|(pre, (raw, key), suffix)| { + Key::new(key) + .with_repr_unchecked(Repr::new_unchecked(raw)) + .with_decor(Decor::new( + RawString::with_span(pre), + RawString::with_span(suffix), + )) + }), + ) + .context(Context::Expression("key")) + .map_res(|k| { + // Inserting the key will require recursion down the line + RecursionCheck::check_depth(k.len())?; + Ok::<_, CustomError>(k) + }) + .parse(input) +} + +// simple-key = quoted-key / unquoted-key +// quoted-key = basic-string / literal-string +pub(crate) fn simple_key( + input: Input<'_>, +) -> IResult<Input<'_>, (RawString, InternalString), ParserError<'_>> { + dispatch! {peek(any); + crate::parser::strings::QUOTATION_MARK => basic_string + .map(|s: std::borrow::Cow<'_, str>| s.as_ref().into()), + crate::parser::strings::APOSTROPHE => literal_string.map(|s: &str| s.into()), + _ => unquoted_key.map(|s: &str| s.into()), + } + .with_span() + .map(|(k, span)| { + let raw = RawString::with_span(span); + (raw, k) + }) + .parse(input) +} + +// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _ +fn unquoted_key(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> { + take_while1(UNQUOTED_CHAR) + .map(|b| unsafe { from_utf8_unchecked(b, "`is_unquoted_char` filters out on-ASCII") }) + .parse(input) +} + +pub(crate) fn is_unquoted_char(c: u8) -> bool { + use nom8::input::FindToken; + UNQUOTED_CHAR.find_token(c) +} + +const UNQUOTED_CHAR: ( + RangeInclusive<u8>, + RangeInclusive<u8>, + RangeInclusive<u8>, + u8, + u8, +) = (b'A'..=b'Z', b'a'..=b'z', b'0'..=b'9', b'-', b'_'); + +// dot-sep = ws %x2E ws ; . Period +const DOT_SEP: u8 = b'.'; + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn keys() { + let cases = [ + ("a", "a"), + (r#""hello\n ""#, "hello\n "), + (r#"'hello\n '"#, "hello\\n "), + ]; + + for (input, expected) in cases { + dbg!(input); + let parsed = simple_key.parse(new_input(input)).finish(); + assert_eq!( + parsed, + Ok((RawString::with_span(0..(input.len())), expected.into())), + "Parsing {input:?}" + ); + } + } +} diff --git a/vendor/toml_edit/src/parser/macros.rs b/vendor/toml_edit/src/parser/macros.rs new file mode 100644 index 000000000..591b2f7b1 --- /dev/null +++ b/vendor/toml_edit/src/parser/macros.rs @@ -0,0 +1,13 @@ +macro_rules! dispatch { + ($match_parser: expr; $( $pat:pat $(if $pred:expr)? => $expr: expr ),+ $(,)? ) => { + move |i| + { + let (i, initial) = $match_parser.parse(i)?; + match initial { + $( + $pat $(if $pred)? => $expr.parse(i), + )* + } + } + } +} diff --git a/vendor/toml_edit/src/parser/mod.rs b/vendor/toml_edit/src/parser/mod.rs new file mode 100644 index 000000000..2c7751ee0 --- /dev/null +++ b/vendor/toml_edit/src/parser/mod.rs @@ -0,0 +1,303 @@ +#![allow(clippy::type_complexity)] + +#[macro_use] +pub(crate) mod macros; + +pub(crate) mod array; +pub(crate) mod datetime; +pub(crate) mod document; +pub(crate) mod errors; +pub(crate) mod inline_table; +pub(crate) mod key; +pub(crate) mod numbers; +pub(crate) mod state; +pub(crate) mod strings; +pub(crate) mod table; +pub(crate) mod trivia; +pub(crate) mod value; + +pub use errors::TomlError; + +pub(crate) fn parse_document(raw: &str) -> Result<crate::Document, TomlError> { + use prelude::*; + + let b = new_input(raw); + let mut doc = document::document + .parse(b) + .finish() + .map_err(|e| TomlError::new(e, b))?; + doc.span = Some(0..(raw.len())); + doc.original = Some(raw.to_owned()); + Ok(doc) +} + +pub(crate) fn parse_key(raw: &str) -> Result<crate::Key, TomlError> { + use prelude::*; + + let b = new_input(raw); + let result = key::simple_key.parse(b).finish(); + match result { + Ok((raw, key)) => { + Ok(crate::Key::new(key).with_repr_unchecked(crate::Repr::new_unchecked(raw))) + } + Err(e) => Err(TomlError::new(e, b)), + } +} + +pub(crate) fn parse_key_path(raw: &str) -> Result<Vec<crate::Key>, TomlError> { + use prelude::*; + + let b = new_input(raw); + let result = key::key.parse(b).finish(); + match result { + Ok(mut keys) => { + for key in &mut keys { + key.despan(raw); + } + Ok(keys) + } + Err(e) => Err(TomlError::new(e, b)), + } +} + +pub(crate) fn parse_value(raw: &str) -> Result<crate::Value, TomlError> { + use prelude::*; + + let b = new_input(raw); + let parsed = value::value(RecursionCheck::default()).parse(b).finish(); + match parsed { + Ok(mut value) => { + // Only take the repr and not decor, as its probably not intended + value.decor_mut().clear(); + value.despan(raw); + Ok(value) + } + Err(e) => Err(TomlError::new(e, b)), + } +} + +pub(crate) mod prelude { + pub(crate) use super::errors::Context; + pub(crate) use super::errors::ParserError; + pub(crate) use super::errors::ParserValue; + pub(crate) use nom8::IResult; + pub(crate) use nom8::Parser as _; + + pub(crate) use nom8::FinishIResult as _; + + pub(crate) type Input<'b> = nom8::input::Located<&'b [u8]>; + + pub(crate) fn new_input(s: &str) -> Input<'_> { + nom8::input::Located::new(s.as_bytes()) + } + + pub(crate) fn ok_error<I, O, E>(res: IResult<I, O, E>) -> Result<Option<(I, O)>, nom8::Err<E>> { + match res { + Ok(ok) => Ok(Some(ok)), + Err(nom8::Err::Error(_)) => Ok(None), + Err(err) => Err(err), + } + } + + #[allow(dead_code)] + pub(crate) fn trace<I: std::fmt::Debug, O: std::fmt::Debug, E: std::fmt::Debug>( + context: impl std::fmt::Display, + mut parser: impl nom8::Parser<I, O, E>, + ) -> impl FnMut(I) -> IResult<I, O, E> { + static DEPTH: std::sync::atomic::AtomicUsize = std::sync::atomic::AtomicUsize::new(0); + move |input: I| { + let depth = DEPTH.fetch_add(1, std::sync::atomic::Ordering::SeqCst) * 2; + eprintln!("{:depth$}--> {} {:?}", "", context, input); + match parser.parse(input) { + Ok((i, o)) => { + DEPTH.fetch_sub(1, std::sync::atomic::Ordering::SeqCst); + eprintln!("{:depth$}<-- {} {:?}", "", context, i); + Ok((i, o)) + } + Err(err) => { + DEPTH.fetch_sub(1, std::sync::atomic::Ordering::SeqCst); + eprintln!("{:depth$}<-- {} {:?}", "", context, err); + Err(err) + } + } + } + } + + #[cfg(not(feature = "unbounded"))] + #[derive(Copy, Clone, Debug, Default)] + pub(crate) struct RecursionCheck { + current: usize, + } + + #[cfg(not(feature = "unbounded"))] + impl RecursionCheck { + pub(crate) fn check_depth(depth: usize) -> Result<(), super::errors::CustomError> { + if depth < 128 { + Ok(()) + } else { + Err(super::errors::CustomError::RecursionLimitExceeded) + } + } + + pub(crate) fn recursing( + mut self, + input: Input<'_>, + ) -> Result<Self, nom8::Err<ParserError<'_>>> { + self.current += 1; + if self.current < 128 { + Ok(self) + } else { + Err(nom8::Err::Error( + nom8::error::FromExternalError::from_external_error( + input, + nom8::error::ErrorKind::Eof, + super::errors::CustomError::RecursionLimitExceeded, + ), + )) + } + } + } + + #[cfg(feature = "unbounded")] + #[derive(Copy, Clone, Debug, Default)] + pub(crate) struct RecursionCheck {} + + #[cfg(feature = "unbounded")] + impl RecursionCheck { + pub(crate) fn check_depth(_depth: usize) -> Result<(), super::errors::CustomError> { + Ok(()) + } + + pub(crate) fn recursing( + self, + _input: Input<'_>, + ) -> Result<Self, nom8::Err<ParserError<'_>>> { + Ok(self) + } + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn documents() { + let documents = [ + "", + r#" +# This is a TOML document. + +title = "TOML Example" + + [owner] + name = "Tom Preston-Werner" + dob = 1979-05-27T07:32:00-08:00 # First class dates + + [database] + server = "192.168.1.1" + ports = [ 8001, 8001, 8002 ] + connection_max = 5000 + enabled = true + + [servers] + + # Indentation (tabs and/or spaces) is allowed but not required +[servers.alpha] + ip = "10.0.0.1" + dc = "eqdc10" + + [servers.beta] + ip = "10.0.0.2" + dc = "eqdc10" + + [clients] + data = [ ["gamma", "delta"], [1, 2] ] + + # Line breaks are OK when inside arrays +hosts = [ + "alpha", + "omega" +] + + 'some.wierd .stuff' = """ + like + that + # """ # this broke my sintax highlighting + " also. like " = ''' +that +''' + double = 2e39 # this number looks familiar +# trailing comment"#, + r#""#, + r#" "#, + r#" hello = 'darkness' # my old friend +"#, + r#"[parent . child] +key = "value" +"#, + r#"hello.world = "a" +"#, + r#"foo = 1979-05-27 # Comment +"#, + ]; + for input in documents { + dbg!(input); + let mut parsed = parse_document(input); + if let Ok(parsed) = &mut parsed { + parsed.despan(); + } + let doc = match parsed { + Ok(doc) => doc, + Err(err) => { + panic!( + "Parse error: {:?}\nFailed to parse:\n```\n{}\n```", + err, input + ) + } + }; + + snapbox::assert_eq(input, doc.to_string()); + } + } + + #[test] + fn documents_parse_only() { + let parse_only = ["\u{FEFF} +[package] +name = \"foo\" +version = \"0.0.1\" +authors = [] +"]; + for input in parse_only { + dbg!(input); + let mut parsed = parse_document(input); + if let Ok(parsed) = &mut parsed { + parsed.despan(); + } + match parsed { + Ok(_) => (), + Err(err) => { + panic!( + "Parse error: {:?}\nFailed to parse:\n```\n{}\n```", + err, input + ) + } + } + } + } + + #[test] + fn invalid_documents() { + let invalid_inputs = [r#" hello = 'darkness' # my old friend +$"#]; + for input in invalid_inputs { + dbg!(input); + let mut parsed = parse_document(input); + if let Ok(parsed) = &mut parsed { + parsed.despan(); + } + assert!(parsed.is_err(), "Input: {:?}", input); + } + } +} diff --git a/vendor/toml_edit/src/parser/numbers.rs b/vendor/toml_edit/src/parser/numbers.rs new file mode 100644 index 000000000..5a2a931f7 --- /dev/null +++ b/vendor/toml_edit/src/parser/numbers.rs @@ -0,0 +1,344 @@ +use std::ops::RangeInclusive; + +use nom8::branch::alt; +use nom8::bytes::one_of; +use nom8::bytes::tag; +use nom8::bytes::take; +use nom8::combinator::cut; +use nom8::combinator::opt; +use nom8::combinator::peek; +use nom8::combinator::rest; +use nom8::multi::many0_count; +use nom8::sequence::preceded; + +use crate::parser::prelude::*; +use crate::parser::trivia::from_utf8_unchecked; + +// ;; Boolean + +// boolean = true / false +#[allow(dead_code)] // directly define in `fn value` +pub(crate) fn boolean(input: Input<'_>) -> IResult<Input<'_>, bool, ParserError<'_>> { + alt((true_, false_)).parse(input) +} + +pub(crate) fn true_(input: Input<'_>) -> IResult<Input<'_>, bool, ParserError<'_>> { + (peek(TRUE[0]), cut(TRUE)).value(true).parse(input) +} +const TRUE: &[u8] = b"true"; + +pub(crate) fn false_(input: Input<'_>) -> IResult<Input<'_>, bool, ParserError<'_>> { + (peek(FALSE[0]), cut(FALSE)).value(false).parse(input) +} +const FALSE: &[u8] = b"false"; + +// ;; Integer + +// integer = dec-int / hex-int / oct-int / bin-int +pub(crate) fn integer(input: Input<'_>) -> IResult<Input<'_>, i64, ParserError<'_>> { + dispatch! {peek(opt::<_, &[u8], _, _>(take(2usize))); + Some(b"0x") => cut(hex_int.map_res(|s| i64::from_str_radix(&s.replace('_', ""), 16))), + Some(b"0o") => cut(oct_int.map_res(|s| i64::from_str_radix(&s.replace('_', ""), 8))), + Some(b"0b") => cut(bin_int.map_res(|s| i64::from_str_radix(&s.replace('_', ""), 2))), + _ => dec_int.and_then(cut(rest + .map_res(|s: &str| s.replace('_', "").parse()))) + } + .parse(input) +} + +// dec-int = [ minus / plus ] unsigned-dec-int +// unsigned-dec-int = DIGIT / digit1-9 1*( DIGIT / underscore DIGIT ) +pub(crate) fn dec_int(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> { + ( + opt(one_of((b'+', b'-'))), + alt(( + ( + one_of(DIGIT1_9), + many0_count(alt(( + digit.value(()), + ( + one_of(b'_'), + cut(digit).context(Context::Expected(ParserValue::Description("digit"))), + ) + .value(()), + ))), + ) + .value(()), + digit.value(()), + )), + ) + .recognize() + .map(|b: &[u8]| unsafe { from_utf8_unchecked(b, "`digit` and `_` filter out non-ASCII") }) + .context(Context::Expression("integer")) + .parse(input) +} +const DIGIT1_9: RangeInclusive<u8> = b'1'..=b'9'; + +// hex-prefix = %x30.78 ; 0x +// hex-int = hex-prefix HEXDIG *( HEXDIG / underscore HEXDIG ) +pub(crate) fn hex_int(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> { + preceded( + HEX_PREFIX, + cut(( + hexdig, + many0_count(alt(( + hexdig.value(()), + ( + one_of(b'_'), + cut(hexdig).context(Context::Expected(ParserValue::Description("digit"))), + ) + .value(()), + ))), + )) + .recognize(), + ) + .map(|b| unsafe { from_utf8_unchecked(b, "`hexdig` and `_` filter out non-ASCII") }) + .context(Context::Expression("hexadecimal integer")) + .parse(input) +} +const HEX_PREFIX: &[u8] = b"0x"; + +// oct-prefix = %x30.6F ; 0o +// oct-int = oct-prefix digit0-7 *( digit0-7 / underscore digit0-7 ) +pub(crate) fn oct_int(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> { + preceded( + OCT_PREFIX, + cut(( + one_of(DIGIT0_7), + many0_count(alt(( + one_of(DIGIT0_7).value(()), + ( + one_of(b'_'), + cut(one_of(DIGIT0_7)) + .context(Context::Expected(ParserValue::Description("digit"))), + ) + .value(()), + ))), + )) + .recognize(), + ) + .map(|b| unsafe { from_utf8_unchecked(b, "`DIGIT0_7` and `_` filter out non-ASCII") }) + .context(Context::Expression("octal integer")) + .parse(input) +} +const OCT_PREFIX: &[u8] = b"0o"; +const DIGIT0_7: RangeInclusive<u8> = b'0'..=b'7'; + +// bin-prefix = %x30.62 ; 0b +// bin-int = bin-prefix digit0-1 *( digit0-1 / underscore digit0-1 ) +pub(crate) fn bin_int(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> { + preceded( + BIN_PREFIX, + cut(( + one_of(DIGIT0_1), + many0_count(alt(( + one_of(DIGIT0_1).value(()), + ( + one_of(b'_'), + cut(one_of(DIGIT0_1)) + .context(Context::Expected(ParserValue::Description("digit"))), + ) + .value(()), + ))), + )) + .recognize(), + ) + .map(|b| unsafe { from_utf8_unchecked(b, "`DIGIT0_1` and `_` filter out non-ASCII") }) + .context(Context::Expression("binary integer")) + .parse(input) +} +const BIN_PREFIX: &[u8] = b"0b"; +const DIGIT0_1: RangeInclusive<u8> = b'0'..=b'1'; + +// ;; Float + +// float = float-int-part ( exp / frac [ exp ] ) +// float =/ special-float +// float-int-part = dec-int +pub(crate) fn float(input: Input<'_>) -> IResult<Input<'_>, f64, ParserError<'_>> { + alt(( + float_.and_then(cut(rest + .map_res(|s: &str| s.replace('_', "").parse()) + .verify(|f: &f64| *f != f64::INFINITY))), + special_float, + )) + .context(Context::Expression("floating-point number")) + .parse(input) +} + +pub(crate) fn float_(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> { + (dec_int, alt((exp, (frac, opt(exp)).map(|_| "")))) + .recognize() + .map(|b: &[u8]| unsafe { + from_utf8_unchecked( + b, + "`dec_int`, `one_of`, `exp`, and `frac` filter out non-ASCII", + ) + }) + .parse(input) +} + +// frac = decimal-point zero-prefixable-int +// decimal-point = %x2E ; . +pub(crate) fn frac(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> { + ( + b'.', + cut(zero_prefixable_int).context(Context::Expected(ParserValue::Description("digit"))), + ) + .recognize() + .map(|b: &[u8]| unsafe { + from_utf8_unchecked( + b, + "`.` and `parse_zero_prefixable_int` filter out non-ASCII", + ) + }) + .parse(input) +} + +// zero-prefixable-int = DIGIT *( DIGIT / underscore DIGIT ) +pub(crate) fn zero_prefixable_int(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> { + ( + digit, + many0_count(alt(( + digit.value(()), + ( + one_of(b'_'), + cut(digit).context(Context::Expected(ParserValue::Description("digit"))), + ) + .value(()), + ))), + ) + .recognize() + .map(|b: &[u8]| unsafe { from_utf8_unchecked(b, "`digit` and `_` filter out non-ASCII") }) + .parse(input) +} + +// exp = "e" float-exp-part +// float-exp-part = [ minus / plus ] zero-prefixable-int +pub(crate) fn exp(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> { + ( + one_of((b'e', b'E')), + opt(one_of([b'+', b'-'])), + cut(zero_prefixable_int), + ) + .recognize() + .map(|b: &[u8]| unsafe { + from_utf8_unchecked( + b, + "`one_of` and `parse_zero_prefixable_int` filter out non-ASCII", + ) + }) + .parse(input) +} + +// special-float = [ minus / plus ] ( inf / nan ) +pub(crate) fn special_float(input: Input<'_>) -> IResult<Input<'_>, f64, ParserError<'_>> { + (opt(one_of((b'+', b'-'))), alt((inf, nan))) + .map(|(s, f)| match s { + Some(b'+') | None => f, + Some(b'-') => -f, + _ => unreachable!("one_of should prevent this"), + }) + .parse(input) +} +// inf = %x69.6e.66 ; inf +pub(crate) fn inf(input: Input<'_>) -> IResult<Input<'_>, f64, ParserError<'_>> { + tag(INF).value(f64::INFINITY).parse(input) +} +const INF: &[u8] = b"inf"; +// nan = %x6e.61.6e ; nan +pub(crate) fn nan(input: Input<'_>) -> IResult<Input<'_>, f64, ParserError<'_>> { + tag(NAN).value(f64::NAN).parse(input) +} +const NAN: &[u8] = b"nan"; + +// DIGIT = %x30-39 ; 0-9 +pub(crate) fn digit(input: Input<'_>) -> IResult<Input<'_>, u8, ParserError<'_>> { + one_of(DIGIT).parse(input) +} +const DIGIT: RangeInclusive<u8> = b'0'..=b'9'; + +// HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" +pub(crate) fn hexdig(input: Input<'_>) -> IResult<Input<'_>, u8, ParserError<'_>> { + one_of(HEXDIG).parse(input) +} +pub(crate) const HEXDIG: (RangeInclusive<u8>, RangeInclusive<u8>, RangeInclusive<u8>) = + (DIGIT, b'A'..=b'F', b'a'..=b'f'); + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn integers() { + let cases = [ + ("+99", 99), + ("42", 42), + ("0", 0), + ("-17", -17), + ("1_000", 1_000), + ("5_349_221", 5_349_221), + ("1_2_3_4_5", 1_2_3_4_5), + ("0xF", 15), + ("0o0_755", 493), + ("0b1_0_1", 5), + (&std::i64::MIN.to_string()[..], std::i64::MIN), + (&std::i64::MAX.to_string()[..], std::i64::MAX), + ]; + for &(input, expected) in &cases { + dbg!(input); + let parsed = integer.parse(new_input(input)).finish(); + assert_eq!(parsed, Ok(expected), "Parsing {input:?}"); + } + + let overflow = "1000000000000000000000000000000000"; + let parsed = integer.parse(new_input(overflow)).finish(); + assert!(parsed.is_err()); + } + + #[track_caller] + fn assert_float_eq(actual: f64, expected: f64) { + if expected.is_nan() { + assert!(actual.is_nan()); + } else if expected.is_infinite() { + assert!(actual.is_infinite()); + assert_eq!(expected.is_sign_positive(), actual.is_sign_positive()); + } else { + dbg!(expected); + dbg!(actual); + assert!((expected - actual).abs() < std::f64::EPSILON); + } + } + + #[test] + fn floats() { + let cases = [ + ("+1.0", 1.0), + ("3.1419", 3.1419), + ("-0.01", -0.01), + ("5e+22", 5e+22), + ("1e6", 1e6), + ("-2E-2", -2E-2), + ("6.626e-34", 6.626e-34), + ("9_224_617.445_991_228_313", 9_224_617.445_991_227), + ("-1.7976931348623157e+308", std::f64::MIN), + ("1.7976931348623157e+308", std::f64::MAX), + ("nan", f64::NAN), + ("+nan", f64::NAN), + ("-nan", f64::NAN), + ("inf", f64::INFINITY), + ("+inf", f64::INFINITY), + ("-inf", f64::NEG_INFINITY), + // ("1e+400", std::f64::INFINITY), + ]; + for &(input, expected) in &cases { + dbg!(input); + let parsed = float.parse(new_input(input)).finish().unwrap(); + assert_float_eq(parsed, expected); + + let overflow = "9e99999"; + let parsed = float.parse(new_input(overflow)).finish(); + assert!(parsed.is_err(), "{:?}", parsed); + } + } +} diff --git a/vendor/toml_edit/src/parser/state.rs b/vendor/toml_edit/src/parser/state.rs new file mode 100644 index 000000000..efa884d2f --- /dev/null +++ b/vendor/toml_edit/src/parser/state.rs @@ -0,0 +1,323 @@ +use crate::key::Key; +use crate::parser::errors::CustomError; +use crate::repr::Decor; +use crate::table::TableKeyValue; +use crate::{ArrayOfTables, Document, InternalString, Item, RawString, Table}; + +pub(crate) struct ParseState { + document: Document, + trailing: Option<std::ops::Range<usize>>, + current_table_position: usize, + current_table: Table, + current_is_array: bool, + current_table_path: Vec<Key>, +} + +impl ParseState { + pub(crate) fn into_document(mut self) -> Result<Document, CustomError> { + self.finalize_table()?; + let trailing = self.trailing.map(RawString::with_span); + self.document.trailing = trailing.unwrap_or_default(); + Ok(self.document) + } + + pub(crate) fn on_ws(&mut self, span: std::ops::Range<usize>) { + if let Some(old) = self.trailing.take() { + self.trailing = Some(old.start..span.end); + } else { + self.trailing = Some(span); + } + } + + pub(crate) fn on_comment(&mut self, span: std::ops::Range<usize>) { + if let Some(old) = self.trailing.take() { + self.trailing = Some(old.start..span.end); + } else { + self.trailing = Some(span); + } + } + + pub(crate) fn on_keyval( + &mut self, + mut path: Vec<Key>, + mut kv: TableKeyValue, + ) -> Result<(), CustomError> { + { + let mut prefix = self.trailing.take(); + let first_key = if path.is_empty() { + &mut kv.key + } else { + &mut path[0] + }; + let prefix = match ( + prefix.take(), + first_key.decor.prefix().and_then(|d| d.span()), + ) { + (Some(p), Some(k)) => Some(p.start..k.end), + (Some(p), None) | (None, Some(p)) => Some(p), + (None, None) => None, + }; + first_key + .decor + .set_prefix(prefix.map(RawString::with_span).unwrap_or_default()); + } + + if let (Some(existing), Some(value)) = (self.current_table.span(), kv.value.span()) { + self.current_table.span = Some((existing.start)..(value.end)); + } + let table = &mut self.current_table; + let table = Self::descend_path(table, &path, true)?; + + // "Likewise, using dotted keys to redefine tables already defined in [table] form is not allowed" + let mixed_table_types = table.is_dotted() == path.is_empty(); + if mixed_table_types { + return Err(CustomError::DuplicateKey { + key: kv.key.get().into(), + table: None, + }); + } + + let key: InternalString = kv.key.get_internal().into(); + match table.items.entry(key) { + indexmap::map::Entry::Vacant(o) => { + o.insert(kv); + } + indexmap::map::Entry::Occupied(o) => { + // "Since tables cannot be defined more than once, redefining such tables using a [table] header is not allowed" + return Err(CustomError::DuplicateKey { + key: o.key().as_str().into(), + table: Some(self.current_table_path.clone()), + }); + } + } + + Ok(()) + } + + pub(crate) fn start_aray_table( + &mut self, + path: Vec<Key>, + decor: Decor, + span: std::ops::Range<usize>, + ) -> Result<(), CustomError> { + debug_assert!(!path.is_empty()); + debug_assert!(self.current_table.is_empty()); + debug_assert!(self.current_table_path.is_empty()); + + // Look up the table on start to ensure the duplicate_key error points to the right line + let root = self.document.as_table_mut(); + let parent_table = Self::descend_path(root, &path[..path.len() - 1], false)?; + let key = &path[path.len() - 1]; + let entry = parent_table + .entry_format(key) + .or_insert(Item::ArrayOfTables(ArrayOfTables::new())); + entry + .as_array_of_tables() + .ok_or_else(|| CustomError::duplicate_key(&path, path.len() - 1))?; + + self.current_table_position += 1; + self.current_table.decor = decor; + self.current_table.set_implicit(false); + self.current_table.set_dotted(false); + self.current_table.set_position(self.current_table_position); + self.current_table.span = Some(span); + self.current_is_array = true; + self.current_table_path = path; + + Ok(()) + } + + pub(crate) fn start_table( + &mut self, + path: Vec<Key>, + decor: Decor, + span: std::ops::Range<usize>, + ) -> Result<(), CustomError> { + debug_assert!(!path.is_empty()); + debug_assert!(self.current_table.is_empty()); + debug_assert!(self.current_table_path.is_empty()); + + // 1. Look up the table on start to ensure the duplicate_key error points to the right line + // 2. Ensure any child tables from an implicit table are preserved + let root = self.document.as_table_mut(); + let parent_table = Self::descend_path(root, &path[..path.len() - 1], false)?; + let key = &path[path.len() - 1]; + if let Some(entry) = parent_table.remove(key.get()) { + match entry { + Item::Table(t) if t.implicit && !t.is_dotted() => { + self.current_table = t; + } + // Since tables cannot be defined more than once, redefining such tables using a [table] header is not allowed. Likewise, using dotted keys to redefine tables already defined in [table] form is not allowed. + _ => return Err(CustomError::duplicate_key(&path, path.len() - 1)), + } + } + + self.current_table_position += 1; + self.current_table.decor = decor; + self.current_table.set_implicit(false); + self.current_table.set_dotted(false); + self.current_table.set_position(self.current_table_position); + self.current_table.span = Some(span); + self.current_is_array = false; + self.current_table_path = path; + + Ok(()) + } + + pub(crate) fn finalize_table(&mut self) -> Result<(), CustomError> { + let mut table = std::mem::take(&mut self.current_table); + let path = std::mem::take(&mut self.current_table_path); + + let root = self.document.as_table_mut(); + if path.is_empty() { + assert!(root.is_empty()); + std::mem::swap(&mut table, root); + } else if self.current_is_array { + let parent_table = Self::descend_path(root, &path[..path.len() - 1], false)?; + let key = &path[path.len() - 1]; + + let entry = parent_table + .entry_format(key) + .or_insert(Item::ArrayOfTables(ArrayOfTables::new())); + let array = entry + .as_array_of_tables_mut() + .ok_or_else(|| CustomError::duplicate_key(&path, path.len() - 1))?; + array.push(table); + let span = if let (Some(first), Some(last)) = ( + array.values.first().and_then(|t| t.span()), + array.values.last().and_then(|t| t.span()), + ) { + Some((first.start)..(last.end)) + } else { + None + }; + array.span = span; + } else { + let parent_table = Self::descend_path(root, &path[..path.len() - 1], false)?; + let key = &path[path.len() - 1]; + + let entry = parent_table.entry_format(key); + match entry { + crate::Entry::Occupied(entry) => { + match entry.into_mut() { + // if [a.b.c] header preceded [a.b] + Item::Table(ref mut t) if t.implicit => { + std::mem::swap(t, &mut table); + } + _ => return Err(CustomError::duplicate_key(&path, path.len() - 1)), + } + } + crate::Entry::Vacant(entry) => { + let item = Item::Table(table); + entry.insert(item); + } + } + } + + Ok(()) + } + + pub(crate) fn descend_path<'t, 'k>( + mut table: &'t mut Table, + path: &'k [Key], + dotted: bool, + ) -> Result<&'t mut Table, CustomError> { + for (i, key) in path.iter().enumerate() { + let entry = table.entry_format(key).or_insert_with(|| { + let mut new_table = Table::new(); + new_table.set_implicit(true); + new_table.set_dotted(dotted); + + Item::Table(new_table) + }); + match *entry { + Item::Value(ref v) => { + return Err(CustomError::extend_wrong_type(path, i, v.type_name())); + } + Item::ArrayOfTables(ref mut array) => { + debug_assert!(!array.is_empty()); + + let index = array.len() - 1; + let last_child = array.get_mut(index).unwrap(); + + table = last_child; + } + Item::Table(ref mut sweet_child_of_mine) => { + // Since tables cannot be defined more than once, redefining such tables using a + // [table] header is not allowed. Likewise, using dotted keys to redefine tables + // already defined in [table] form is not allowed. + if dotted && !sweet_child_of_mine.is_implicit() { + return Err(CustomError::DuplicateKey { + key: key.get().into(), + table: None, + }); + } + table = sweet_child_of_mine; + } + _ => unreachable!(), + } + } + Ok(table) + } + + pub(crate) fn on_std_header( + &mut self, + path: Vec<Key>, + trailing: std::ops::Range<usize>, + span: std::ops::Range<usize>, + ) -> Result<(), CustomError> { + debug_assert!(!path.is_empty()); + + self.finalize_table()?; + let leading = self + .trailing + .take() + .map(RawString::with_span) + .unwrap_or_default(); + self.start_table( + path, + Decor::new(leading, RawString::with_span(trailing)), + span, + )?; + + Ok(()) + } + + pub(crate) fn on_array_header( + &mut self, + path: Vec<Key>, + trailing: std::ops::Range<usize>, + span: std::ops::Range<usize>, + ) -> Result<(), CustomError> { + debug_assert!(!path.is_empty()); + + self.finalize_table()?; + let leading = self + .trailing + .take() + .map(RawString::with_span) + .unwrap_or_default(); + self.start_aray_table( + path, + Decor::new(leading, RawString::with_span(trailing)), + span, + )?; + + Ok(()) + } +} + +impl Default for ParseState { + fn default() -> Self { + let mut root = Table::new(); + root.span = Some(0..0); + Self { + document: Document::new(), + trailing: None, + current_table_position: 0, + current_table: root, + current_is_array: false, + current_table_path: Vec::new(), + } + } +} diff --git a/vendor/toml_edit/src/parser/strings.rs b/vendor/toml_edit/src/parser/strings.rs new file mode 100644 index 000000000..d31c43887 --- /dev/null +++ b/vendor/toml_edit/src/parser/strings.rs @@ -0,0 +1,456 @@ +use std::borrow::Cow; +use std::char; +use std::ops::RangeInclusive; + +use nom8::branch::alt; +use nom8::bytes::any; +use nom8::bytes::none_of; +use nom8::bytes::one_of; +use nom8::bytes::tag; +use nom8::bytes::take_while; +use nom8::bytes::take_while1; +use nom8::bytes::take_while_m_n; +use nom8::combinator::cut; +use nom8::combinator::fail; +use nom8::combinator::opt; +use nom8::combinator::peek; +use nom8::combinator::success; +use nom8::multi::many0_count; +use nom8::multi::many1_count; +use nom8::sequence::delimited; +use nom8::sequence::preceded; +use nom8::sequence::terminated; + +use crate::parser::errors::CustomError; +use crate::parser::numbers::HEXDIG; +use crate::parser::prelude::*; +use crate::parser::trivia::{from_utf8_unchecked, newline, ws, ws_newlines, NON_ASCII, WSCHAR}; + +// ;; String + +// string = ml-basic-string / basic-string / ml-literal-string / literal-string +pub(crate) fn string(input: Input<'_>) -> IResult<Input<'_>, Cow<'_, str>, ParserError<'_>> { + alt(( + ml_basic_string, + basic_string, + ml_literal_string, + literal_string.map(Cow::Borrowed), + )) + .parse(input) +} + +// ;; Basic String + +// basic-string = quotation-mark *basic-char quotation-mark +pub(crate) fn basic_string(input: Input<'_>) -> IResult<Input<'_>, Cow<'_, str>, ParserError<'_>> { + let (mut input, _) = one_of(QUOTATION_MARK).parse(input)?; + + let mut c = Cow::Borrowed(""); + if let Some((i, ci)) = ok_error(basic_chars.parse(input))? { + input = i; + c = ci; + } + while let Some((i, ci)) = ok_error(basic_chars.parse(input))? { + input = i; + c.to_mut().push_str(&ci); + } + + let (input, _) = cut(one_of(QUOTATION_MARK)) + .context(Context::Expression("basic string")) + .parse(input)?; + + Ok((input, c)) +} + +// quotation-mark = %x22 ; " +pub(crate) const QUOTATION_MARK: u8 = b'"'; + +// basic-char = basic-unescaped / escaped +fn basic_chars(input: Input<'_>) -> IResult<Input<'_>, Cow<'_, str>, ParserError<'_>> { + alt(( + // Deviate from the official grammar by batching the unescaped chars so we build a string a + // chunk at a time, rather than a `char` at a time. + take_while1(BASIC_UNESCAPED) + .map_res(std::str::from_utf8) + .map(Cow::Borrowed), + escaped.map(|c| Cow::Owned(String::from(c))), + )) + .parse(input) +} + +// basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii +pub(crate) const BASIC_UNESCAPED: ( + (u8, u8), + u8, + RangeInclusive<u8>, + RangeInclusive<u8>, + RangeInclusive<u8>, +) = (WSCHAR, 0x21, 0x23..=0x5B, 0x5D..=0x7E, NON_ASCII); + +// escaped = escape escape-seq-char +fn escaped(input: Input<'_>) -> IResult<Input<'_>, char, ParserError<'_>> { + preceded(ESCAPE, escape_seq_char).parse(input) +} + +// escape = %x5C ; \ +pub(crate) const ESCAPE: u8 = b'\\'; + +// escape-seq-char = %x22 ; " quotation mark U+0022 +// escape-seq-char =/ %x5C ; \ reverse solidus U+005C +// escape-seq-char =/ %x62 ; b backspace U+0008 +// escape-seq-char =/ %x66 ; f form feed U+000C +// escape-seq-char =/ %x6E ; n line feed U+000A +// escape-seq-char =/ %x72 ; r carriage return U+000D +// escape-seq-char =/ %x74 ; t tab U+0009 +// escape-seq-char =/ %x75 4HEXDIG ; uXXXX U+XXXX +// escape-seq-char =/ %x55 8HEXDIG ; UXXXXXXXX U+XXXXXXXX +fn escape_seq_char(input: Input<'_>) -> IResult<Input<'_>, char, ParserError<'_>> { + dispatch! {any; + b'b' => success('\u{8}'), + b'f' => success('\u{c}'), + b'n' => success('\n'), + b'r' => success('\r'), + b't' => success('\t'), + b'u' => cut(hexescape::<4>).context(Context::Expression("unicode 4-digit hex code")), + b'U' => cut(hexescape::<8>).context(Context::Expression("unicode 8-digit hex code")), + b'\\' => success('\\'), + b'"' => success('"'), + _ => { + cut(fail::<_, char, _>) + .context(Context::Expression("escape sequence")) + .context(Context::Expected(ParserValue::CharLiteral('b'))) + .context(Context::Expected(ParserValue::CharLiteral('f'))) + .context(Context::Expected(ParserValue::CharLiteral('n'))) + .context(Context::Expected(ParserValue::CharLiteral('r'))) + .context(Context::Expected(ParserValue::CharLiteral('t'))) + .context(Context::Expected(ParserValue::CharLiteral('u'))) + .context(Context::Expected(ParserValue::CharLiteral('U'))) + .context(Context::Expected(ParserValue::CharLiteral('\\'))) + .context(Context::Expected(ParserValue::CharLiteral('"'))) + } + } + .parse(input) +} + +pub(crate) fn hexescape<const N: usize>( + input: Input<'_>, +) -> IResult<Input<'_>, char, ParserError<'_>> { + take_while_m_n(0, N, HEXDIG) + .verify(|b: &[u8]| b.len() == N) + .map(|b: &[u8]| unsafe { from_utf8_unchecked(b, "`is_ascii_digit` filters out on-ASCII") }) + .map_opt(|s| u32::from_str_radix(s, 16).ok()) + .map_res(|h| char::from_u32(h).ok_or(CustomError::OutOfRange)) + .parse(input) +} + +// ;; Multiline Basic String + +// ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body +// ml-basic-string-delim +fn ml_basic_string(input: Input<'_>) -> IResult<Input<'_>, Cow<'_, str>, ParserError<'_>> { + delimited( + ML_BASIC_STRING_DELIM, + preceded(opt(newline), cut(ml_basic_body)), + cut(ML_BASIC_STRING_DELIM), + ) + .context(Context::Expression("multiline basic string")) + .parse(input) +} + +// ml-basic-string-delim = 3quotation-mark +pub(crate) const ML_BASIC_STRING_DELIM: &[u8] = b"\"\"\""; + +// ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ] +fn ml_basic_body(mut input: Input<'_>) -> IResult<Input<'_>, Cow<'_, str>, ParserError<'_>> { + let mut c = Cow::Borrowed(""); + if let Some((i, ci)) = ok_error(mlb_content.parse(input))? { + input = i; + c = ci; + } + while let Some((i, ci)) = ok_error(mlb_content.parse(input))? { + input = i; + c.to_mut().push_str(&ci); + } + + while let Some((i, qi)) = ok_error(mlb_quotes(none_of(b'\"').value(())).parse(input))? { + if let Some((i, ci)) = ok_error(mlb_content.parse(i))? { + input = i; + c.to_mut().push_str(qi); + c.to_mut().push_str(&ci); + while let Some((i, ci)) = ok_error(mlb_content.parse(input))? { + input = i; + c.to_mut().push_str(&ci); + } + } else { + break; + } + } + + if let Some((i, qi)) = ok_error(mlb_quotes(tag(ML_BASIC_STRING_DELIM).value(())).parse(input))? + { + input = i; + c.to_mut().push_str(qi); + } + + Ok((input, c)) +} + +// mlb-content = mlb-char / newline / mlb-escaped-nl +// mlb-char = mlb-unescaped / escaped +fn mlb_content(input: Input<'_>) -> IResult<Input<'_>, Cow<'_, str>, ParserError<'_>> { + alt(( + // Deviate from the official grammar by batching the unescaped chars so we build a string a + // chunk at a time, rather than a `char` at a time. + take_while1(MLB_UNESCAPED) + .map_res(std::str::from_utf8) + .map(Cow::Borrowed), + // Order changed fromg grammar so `escaped` can more easily `cut` on bad escape sequences + mlb_escaped_nl.map(|_| Cow::Borrowed("")), + escaped.map(|c| Cow::Owned(String::from(c))), + newline.map(|_| Cow::Borrowed("\n")), + )) + .parse(input) +} + +// mlb-quotes = 1*2quotation-mark +fn mlb_quotes<'i>( + mut term: impl nom8::Parser<Input<'i>, (), ParserError<'i>>, +) -> impl FnMut(Input<'i>) -> IResult<Input<'i>, &str, ParserError<'i>> { + move |input| { + let res = terminated(b"\"\"", peek(term.by_ref())) + .map(|b| unsafe { from_utf8_unchecked(b, "`bytes` out non-ASCII") }) + .parse(input); + + match res { + Err(nom8::Err::Error(_)) => terminated(b"\"", peek(term.by_ref())) + .map(|b| unsafe { from_utf8_unchecked(b, "`bytes` out non-ASCII") }) + .parse(input), + res => res, + } + } +} + +// mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii +pub(crate) const MLB_UNESCAPED: ( + (u8, u8), + u8, + RangeInclusive<u8>, + RangeInclusive<u8>, + RangeInclusive<u8>, +) = (WSCHAR, 0x21, 0x23..=0x5B, 0x5D..=0x7E, NON_ASCII); + +// mlb-escaped-nl = escape ws newline *( wschar / newline +// When the last non-whitespace character on a line is a \, +// it will be trimmed along with all whitespace +// (including newlines) up to the next non-whitespace +// character or closing delimiter. +fn mlb_escaped_nl(input: Input<'_>) -> IResult<Input<'_>, (), ParserError<'_>> { + many1_count((ESCAPE, ws, ws_newlines)) + .value(()) + .parse(input) +} + +// ;; Literal String + +// literal-string = apostrophe *literal-char apostrophe +pub(crate) fn literal_string(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> { + delimited(APOSTROPHE, cut(take_while(LITERAL_CHAR)), cut(APOSTROPHE)) + .map_res(std::str::from_utf8) + .context(Context::Expression("literal string")) + .parse(input) +} + +// apostrophe = %x27 ; ' apostrophe +pub(crate) const APOSTROPHE: u8 = b'\''; + +// literal-char = %x09 / %x20-26 / %x28-7E / non-ascii +pub(crate) const LITERAL_CHAR: ( + u8, + RangeInclusive<u8>, + RangeInclusive<u8>, + RangeInclusive<u8>, +) = (0x9, 0x20..=0x26, 0x28..=0x7E, NON_ASCII); + +// ;; Multiline Literal String + +// ml-literal-string = ml-literal-string-delim [ newline ] ml-literal-body +// ml-literal-string-delim +fn ml_literal_string(input: Input<'_>) -> IResult<Input<'_>, Cow<'_, str>, ParserError<'_>> { + delimited( + (ML_LITERAL_STRING_DELIM, opt(newline)), + cut(ml_literal_body.map(|t| { + if t.contains("\r\n") { + Cow::Owned(t.replace("\r\n", "\n")) + } else { + Cow::Borrowed(t) + } + })), + cut(ML_LITERAL_STRING_DELIM), + ) + .context(Context::Expression("multiline literal string")) + .parse(input) +} + +// ml-literal-string-delim = 3apostrophe +pub(crate) const ML_LITERAL_STRING_DELIM: &[u8] = b"'''"; + +// ml-literal-body = *mll-content *( mll-quotes 1*mll-content ) [ mll-quotes ] +fn ml_literal_body(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> { + ( + many0_count(mll_content), + many0_count(( + mll_quotes(none_of(APOSTROPHE).value(())), + many1_count(mll_content), + )), + opt(mll_quotes(tag(ML_LITERAL_STRING_DELIM).value(()))), + ) + .recognize() + .map_res(std::str::from_utf8) + .parse(input) +} + +// mll-content = mll-char / newline +fn mll_content(input: Input<'_>) -> IResult<Input<'_>, u8, ParserError<'_>> { + alt((one_of(MLL_CHAR), newline)).parse(input) +} + +// mll-char = %x09 / %x20-26 / %x28-7E / non-ascii +const MLL_CHAR: ( + u8, + RangeInclusive<u8>, + RangeInclusive<u8>, + RangeInclusive<u8>, +) = (0x9, 0x20..=0x26, 0x28..=0x7E, NON_ASCII); + +// mll-quotes = 1*2apostrophe +fn mll_quotes<'i>( + mut term: impl nom8::Parser<Input<'i>, (), ParserError<'i>>, +) -> impl FnMut(Input<'i>) -> IResult<Input<'i>, &str, ParserError<'i>> { + move |input| { + let res = terminated(b"''", peek(term.by_ref())) + .map(|b| unsafe { from_utf8_unchecked(b, "`bytes` out non-ASCII") }) + .parse(input); + + match res { + Err(nom8::Err::Error(_)) => terminated(b"'", peek(term.by_ref())) + .map(|b| unsafe { from_utf8_unchecked(b, "`bytes` out non-ASCII") }) + .parse(input), + res => res, + } + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn basic_string() { + let input = + r#""I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF. \U0002070E""#; + let expected = "I\'m a string. \"You can quote me\". Name\tJosé\nLocation\tSF. \u{2070E}"; + let parsed = string.parse(new_input(input)).finish(); + assert_eq!(parsed.as_deref(), Ok(expected), "Parsing {input:?}"); + } + + #[test] + fn ml_basic_string() { + let cases = [ + ( + r#"""" +Roses are red +Violets are blue""""#, + r#"Roses are red +Violets are blue"#, + ), + (r#"""" \""" """"#, " \"\"\" "), + (r#"""" \\""""#, " \\"), + ]; + + for &(input, expected) in &cases { + let parsed = string.parse(new_input(input)).finish(); + assert_eq!(parsed.as_deref(), Ok(expected), "Parsing {input:?}"); + } + + let invalid_cases = [r#"""" """#, r#"""" \""""#]; + + for input in &invalid_cases { + let parsed = string.parse(new_input(input)).finish(); + assert!(parsed.is_err()); + } + } + + #[test] + fn ml_basic_string_escape_ws() { + let inputs = [ + r#"""" +The quick brown \ + + + fox jumps over \ + the lazy dog.""""#, + r#""""\ + The quick brown \ + fox jumps over \ + the lazy dog.\ + """"#, + ]; + for input in &inputs { + let expected = "The quick brown fox jumps over the lazy dog."; + let parsed = string.parse(new_input(input)).finish(); + assert_eq!(parsed.as_deref(), Ok(expected), "Parsing {input:?}"); + } + let empties = [ + r#""""\ + """"#, + r#"""" +\ + \ +""""#, + ]; + for input in &empties { + let expected = ""; + let parsed = string.parse(new_input(input)).finish(); + assert_eq!(parsed.as_deref(), Ok(expected), "Parsing {input:?}"); + } + } + + #[test] + fn literal_string() { + let inputs = [ + r#"'C:\Users\nodejs\templates'"#, + r#"'\\ServerX\admin$\system32\'"#, + r#"'Tom "Dubs" Preston-Werner'"#, + r#"'<\i\c*\s*>'"#, + ]; + + for input in &inputs { + let expected = &input[1..input.len() - 1]; + let parsed = string.parse(new_input(input)).finish(); + assert_eq!(parsed.as_deref(), Ok(expected), "Parsing {input:?}"); + } + } + + #[test] + fn ml_literal_string() { + let inputs = [ + r#"'''I [dw]on't need \d{2} apples'''"#, + r#"''''one_quote''''"#, + ]; + for input in &inputs { + let expected = &input[3..input.len() - 3]; + let parsed = string.parse(new_input(input)).finish(); + assert_eq!(parsed.as_deref(), Ok(expected), "Parsing {input:?}"); + } + + let input = r#"''' +The first newline is +trimmed in raw strings. + All other whitespace + is preserved. +'''"#; + let expected = &input[4..input.len() - 3]; + let parsed = string.parse(new_input(input)).finish(); + assert_eq!(parsed.as_deref(), Ok(expected), "Parsing {input:?}"); + } +} diff --git a/vendor/toml_edit/src/parser/table.rs b/vendor/toml_edit/src/parser/table.rs new file mode 100644 index 000000000..1c76ed19b --- /dev/null +++ b/vendor/toml_edit/src/parser/table.rs @@ -0,0 +1,89 @@ +use std::cell::RefCell; +#[allow(unused_imports)] +use std::ops::DerefMut; + +use nom8::bytes::take; +use nom8::combinator::cut; +use nom8::combinator::peek; +use nom8::sequence::delimited; + +// https://github.com/rust-lang/rust/issues/41358 +use crate::parser::key::key; +use crate::parser::prelude::*; +use crate::parser::state::ParseState; +use crate::parser::trivia::line_trailing; + +// std-table-open = %x5B ws ; [ Left square bracket +pub(crate) const STD_TABLE_OPEN: u8 = b'['; +// std-table-close = ws %x5D ; ] Right square bracket +const STD_TABLE_CLOSE: u8 = b']'; +// array-table-open = %x5B.5B ws ; [[ Double left square bracket +const ARRAY_TABLE_OPEN: &[u8] = b"[["; +// array-table-close = ws %x5D.5D ; ]] Double right quare bracket +const ARRAY_TABLE_CLOSE: &[u8] = b"]]"; + +// ;; Standard Table + +// std-table = std-table-open key *( table-key-sep key) std-table-close +pub(crate) fn std_table<'s, 'i>( + state: &'s RefCell<ParseState>, +) -> impl FnMut(Input<'i>) -> IResult<Input<'i>, (), ParserError<'i>> + 's { + move |i| { + ( + delimited( + STD_TABLE_OPEN, + cut(key), + cut(STD_TABLE_CLOSE) + .context(Context::Expected(ParserValue::CharLiteral('.'))) + .context(Context::Expected(ParserValue::StringLiteral("]"))), + ) + .with_span(), + cut(line_trailing) + .context(Context::Expected(ParserValue::CharLiteral('\n'))) + .context(Context::Expected(ParserValue::CharLiteral('#'))), + ) + .map_res(|((h, span), t)| state.borrow_mut().deref_mut().on_std_header(h, t, span)) + .parse(i) + } +} + +// ;; Array Table + +// array-table = array-table-open key *( table-key-sep key) array-table-close +pub(crate) fn array_table<'s, 'i>( + state: &'s RefCell<ParseState>, +) -> impl FnMut(Input<'i>) -> IResult<Input<'i>, (), ParserError<'i>> + 's { + move |i| { + ( + delimited( + ARRAY_TABLE_OPEN, + cut(key), + cut(ARRAY_TABLE_CLOSE) + .context(Context::Expected(ParserValue::CharLiteral('.'))) + .context(Context::Expected(ParserValue::StringLiteral("]]"))), + ) + .with_span(), + cut(line_trailing) + .context(Context::Expected(ParserValue::CharLiteral('\n'))) + .context(Context::Expected(ParserValue::CharLiteral('#'))), + ) + .map_res(|((h, span), t)| state.borrow_mut().deref_mut().on_array_header(h, t, span)) + .parse(i) + } +} + +// ;; Table + +// table = std-table / array-table +pub(crate) fn table<'s, 'i>( + state: &'s RefCell<ParseState>, +) -> impl FnMut(Input<'i>) -> IResult<Input<'i>, (), ParserError<'i>> + 's { + move |i| { + dispatch!(peek::<_, &[u8],_,_>(take(2usize)); + b"[[" => array_table(state), + _ => std_table(state), + ) + .context(Context::Expression("table header")) + .parse(i) + } +} diff --git a/vendor/toml_edit/src/parser/trivia.rs b/vendor/toml_edit/src/parser/trivia.rs new file mode 100644 index 000000000..981c7b23d --- /dev/null +++ b/vendor/toml_edit/src/parser/trivia.rs @@ -0,0 +1,150 @@ +use std::ops::RangeInclusive; + +use nom8::branch::alt; +use nom8::bytes::one_of; +use nom8::bytes::take_while; +use nom8::bytes::take_while1; +use nom8::combinator::eof; +use nom8::combinator::opt; +use nom8::multi::many0_count; +use nom8::multi::many1_count; +use nom8::prelude::*; +use nom8::sequence::terminated; + +use crate::parser::prelude::*; + +pub(crate) unsafe fn from_utf8_unchecked<'b>( + bytes: &'b [u8], + safety_justification: &'static str, +) -> &'b str { + if cfg!(debug_assertions) { + // Catch problems more quickly when testing + std::str::from_utf8(bytes).expect(safety_justification) + } else { + std::str::from_utf8_unchecked(bytes) + } +} + +// wschar = ( %x20 / ; Space +// %x09 ) ; Horizontal tab +pub(crate) const WSCHAR: (u8, u8) = (b' ', b'\t'); + +// ws = *wschar +pub(crate) fn ws(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> { + take_while(WSCHAR) + .map(|b| unsafe { from_utf8_unchecked(b, "`is_wschar` filters out on-ASCII") }) + .parse(input) +} + +// non-ascii = %x80-D7FF / %xE000-10FFFF +// - ASCII is 0xxxxxxx +// - First byte for UTF-8 is 11xxxxxx +// - Subsequent UTF-8 bytes are 10xxxxxx +pub(crate) const NON_ASCII: RangeInclusive<u8> = 0x80..=0xff; + +// non-eol = %x09 / %x20-7E / non-ascii +pub(crate) const NON_EOL: (u8, RangeInclusive<u8>, RangeInclusive<u8>) = + (0x09, 0x20..=0x7E, NON_ASCII); + +// comment-start-symbol = %x23 ; # +pub(crate) const COMMENT_START_SYMBOL: u8 = b'#'; + +// comment = comment-start-symbol *non-eol +pub(crate) fn comment(input: Input<'_>) -> IResult<Input<'_>, &[u8], ParserError<'_>> { + (COMMENT_START_SYMBOL, take_while(NON_EOL)) + .recognize() + .parse(input) +} + +// newline = ( %x0A / ; LF +// %x0D.0A ) ; CRLF +pub(crate) fn newline(input: Input<'_>) -> IResult<Input<'_>, u8, ParserError<'_>> { + alt(( + one_of(LF).value(b'\n'), + (one_of(CR), one_of(LF)).value(b'\n'), + )) + .parse(input) +} +pub(crate) const LF: u8 = b'\n'; +pub(crate) const CR: u8 = b'\r'; + +// ws-newline = *( wschar / newline ) +pub(crate) fn ws_newline(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> { + many0_count(alt((newline.value(&b"\n"[..]), take_while1(WSCHAR)))) + .recognize() + .map(|b| unsafe { + from_utf8_unchecked(b, "`is_wschar` and `newline` filters out on-ASCII") + }) + .parse(input) +} + +// ws-newlines = newline *( wschar / newline ) +pub(crate) fn ws_newlines(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> { + (newline, ws_newline) + .recognize() + .map(|b| unsafe { + from_utf8_unchecked(b, "`is_wschar` and `newline` filters out on-ASCII") + }) + .parse(input) +} + +// note: this rule is not present in the original grammar +// ws-comment-newline = *( ws-newline-nonempty / comment ) +pub(crate) fn ws_comment_newline(input: Input<'_>) -> IResult<Input<'_>, &[u8], ParserError<'_>> { + many0_count(alt(( + many1_count(alt((take_while1(WSCHAR), newline.value(&b"\n"[..])))).value(()), + comment.value(()), + ))) + .recognize() + .parse(input) +} + +// note: this rule is not present in the original grammar +// line-ending = newline / eof +pub(crate) fn line_ending(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> { + alt((newline.value("\n"), eof.value(""))).parse(input) +} + +// note: this rule is not present in the original grammar +// line-trailing = ws [comment] skip-line-ending +pub(crate) fn line_trailing( + input: Input<'_>, +) -> IResult<Input<'_>, std::ops::Range<usize>, ParserError<'_>> { + terminated((ws, opt(comment)).span(), line_ending).parse(input) +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn trivia() { + let inputs = [ + "", + r#" "#, + r#" +"#, + r#" +# comment + +# comment2 + + +"#, + r#" + "#, + r#"# comment +# comment2 + + + "#, + ]; + for input in inputs { + dbg!(input); + let parsed = ws_comment_newline.parse(new_input(input)).finish(); + assert!(parsed.is_ok(), "{:?}", parsed); + let parsed = parsed.unwrap(); + assert_eq!(parsed, input.as_bytes()); + } + } +} diff --git a/vendor/toml_edit/src/parser/value.rs b/vendor/toml_edit/src/parser/value.rs new file mode 100644 index 000000000..15ba4478e --- /dev/null +++ b/vendor/toml_edit/src/parser/value.rs @@ -0,0 +1,157 @@ +use nom8::branch::alt; +use nom8::bytes::any; +use nom8::combinator::fail; +use nom8::combinator::peek; + +use crate::parser::array::array; +use crate::parser::datetime::date_time; +use crate::parser::inline_table::inline_table; +use crate::parser::numbers::{float, integer}; +use crate::parser::prelude::*; +use crate::parser::strings::string; +use crate::repr::{Formatted, Repr}; +use crate::value as v; +use crate::RawString; +use crate::Value; + +// val = string / boolean / array / inline-table / date-time / float / integer +pub(crate) fn value( + check: RecursionCheck, +) -> impl FnMut(Input<'_>) -> IResult<Input<'_>, v::Value, ParserError<'_>> { + move |input| { + dispatch!{peek(any); + crate::parser::strings::QUOTATION_MARK | + crate::parser::strings::APOSTROPHE => string.map(|s| { + v::Value::String(Formatted::new( + s.into_owned() + )) + }), + crate::parser::array::ARRAY_OPEN => array(check).map(v::Value::Array), + crate::parser::inline_table::INLINE_TABLE_OPEN => inline_table(check).map(v::Value::InlineTable), + // Date/number starts + b'+' | b'-' | b'0'..=b'9' => { + // Uncommon enough not to be worth optimizing at this time + alt(( + date_time + .map(v::Value::from), + float + .map(v::Value::from), + integer + .map(v::Value::from), + )) + }, + // Report as if they were numbers because its most likely a typo + b'_' => { + integer + .map(v::Value::from) + .context(Context::Expected(ParserValue::Description("leading digit"))) + }, + // Report as if they were numbers because its most likely a typo + b'.' => { + float + .map(v::Value::from) + .context(Context::Expected(ParserValue::Description("leading digit"))) + }, + b't' => { + crate::parser::numbers::true_.map(v::Value::from) + .context(Context::Expression("string")) + .context(Context::Expected(ParserValue::CharLiteral('"'))) + .context(Context::Expected(ParserValue::CharLiteral('\''))) + }, + b'f' => { + crate::parser::numbers::false_.map(v::Value::from) + .context(Context::Expression("string")) + .context(Context::Expected(ParserValue::CharLiteral('"'))) + .context(Context::Expected(ParserValue::CharLiteral('\''))) + }, + b'i' => { + crate::parser::numbers::inf.map(v::Value::from) + .context(Context::Expression("string")) + .context(Context::Expected(ParserValue::CharLiteral('"'))) + .context(Context::Expected(ParserValue::CharLiteral('\''))) + }, + b'n' => { + crate::parser::numbers::nan.map(v::Value::from) + .context(Context::Expression("string")) + .context(Context::Expected(ParserValue::CharLiteral('"'))) + .context(Context::Expected(ParserValue::CharLiteral('\''))) + }, + _ => { + fail + .context(Context::Expression("string")) + .context(Context::Expected(ParserValue::CharLiteral('"'))) + .context(Context::Expected(ParserValue::CharLiteral('\''))) + }, + } + .with_span() + .map_res(|(value, span)| apply_raw(value, span)) + .parse(input) + } +} + +fn apply_raw(mut val: Value, span: std::ops::Range<usize>) -> Result<Value, std::str::Utf8Error> { + match val { + Value::String(ref mut f) => { + let raw = RawString::with_span(span); + f.set_repr_unchecked(Repr::new_unchecked(raw)); + } + Value::Integer(ref mut f) => { + let raw = RawString::with_span(span); + f.set_repr_unchecked(Repr::new_unchecked(raw)); + } + Value::Float(ref mut f) => { + let raw = RawString::with_span(span); + f.set_repr_unchecked(Repr::new_unchecked(raw)); + } + Value::Boolean(ref mut f) => { + let raw = RawString::with_span(span); + f.set_repr_unchecked(Repr::new_unchecked(raw)); + } + Value::Datetime(ref mut f) => { + let raw = RawString::with_span(span); + f.set_repr_unchecked(Repr::new_unchecked(raw)); + } + Value::Array(ref mut arr) => { + arr.span = Some(span); + } + Value::InlineTable(ref mut table) => { + table.span = Some(span); + } + }; + val.decorate("", ""); + Ok(val) +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn values() { + let inputs = [ + "1979-05-27T00:32:00.999999", + "-239", + "1e200", + "9_224_617.445_991_228_313", + r#"'''I [dw]on't need \d{2} apples'''"#, + r#"''' +The first newline is +trimmed in raw strings. + All other whitespace + is preserved. +'''"#, + r#""Jos\u00E9\n""#, + r#""\\\"\b/\f\n\r\t\u00E9\U000A0000""#, + r#"{ hello = "world", a = 1}"#, + r#"[ { x = 1, a = "2" }, {a = "a",b = "b", c = "c"} ]"#, + ]; + for input in inputs { + dbg!(input); + let mut parsed = value(Default::default()).parse(new_input(input)).finish(); + if let Ok(parsed) = &mut parsed { + parsed.despan(input); + } + assert_eq!(parsed.map(|a| a.to_string()), Ok(input.to_owned())); + } + } +} |