diff options
Diffstat (limited to 'vendor/toml_edit/src/parser')
-rw-r--r-- | vendor/toml_edit/src/parser/array.rs | 146 | ||||
-rw-r--r-- | vendor/toml_edit/src/parser/datetime.rs | 446 | ||||
-rw-r--r-- | vendor/toml_edit/src/parser/document.rs | 141 | ||||
-rw-r--r-- | vendor/toml_edit/src/parser/errors.rs | 316 | ||||
-rw-r--r-- | vendor/toml_edit/src/parser/inline_table.rs | 181 | ||||
-rw-r--r-- | vendor/toml_edit/src/parser/key.rs | 112 | ||||
-rw-r--r-- | vendor/toml_edit/src/parser/mod.rs | 265 | ||||
-rw-r--r-- | vendor/toml_edit/src/parser/numbers.rs | 397 | ||||
-rw-r--r-- | vendor/toml_edit/src/parser/state.rs | 323 | ||||
-rw-r--r-- | vendor/toml_edit/src/parser/strings.rs | 478 | ||||
-rw-r--r-- | vendor/toml_edit/src/parser/table.rs | 89 | ||||
-rw-r--r-- | vendor/toml_edit/src/parser/trivia.rs | 156 | ||||
-rw-r--r-- | vendor/toml_edit/src/parser/value.rs | 155 |
13 files changed, 3205 insertions, 0 deletions
diff --git a/vendor/toml_edit/src/parser/array.rs b/vendor/toml_edit/src/parser/array.rs new file mode 100644 index 0000000..e3b1f3f --- /dev/null +++ b/vendor/toml_edit/src/parser/array.rs @@ -0,0 +1,146 @@ +use winnow::combinator::cut_err; +use winnow::combinator::delimited; +use winnow::combinator::opt; +use winnow::combinator::separated1; +use winnow::trace::trace; + +use crate::parser::trivia::ws_comment_newline; +use crate::parser::value::value; +use crate::{Array, Item, RawString, Value}; + +use crate::parser::prelude::*; + +// ;; Array + +// array = array-open array-values array-close +pub(crate) fn array<'i>(check: RecursionCheck) -> impl Parser<Input<'i>, Array, ContextError> { + trace("array", move |input: &mut Input<'i>| { + delimited( + ARRAY_OPEN, + cut_err(array_values(check)), + cut_err(ARRAY_CLOSE) + .context(StrContext::Label("array")) + .context(StrContext::Expected(StrContextValue::CharLiteral(']'))), + ) + .parse_next(input) + }) +} + +// note: we're omitting ws and newlines here, because +// they should be part of the formatted values +// array-open = %x5B ws-newline ; [ +pub(crate) const ARRAY_OPEN: u8 = b'['; +// array-close = ws-newline %x5D ; ] +const ARRAY_CLOSE: u8 = b']'; +// array-sep = ws %x2C ws ; , Comma +const ARRAY_SEP: u8 = b','; + +// note: this rule is modified +// array-values = [ ( array-value array-sep array-values ) / +// array-value / ws-comment-newline ] +pub(crate) fn array_values<'i>( + check: RecursionCheck, +) -> impl Parser<Input<'i>, Array, ContextError> { + move |input: &mut Input<'i>| { + let check = check.recursing(input)?; + ( + opt( + (separated1(array_value(check), ARRAY_SEP), opt(ARRAY_SEP)).map( + |(v, trailing): (Vec<Value>, Option<u8>)| { + ( + Array::with_vec(v.into_iter().map(Item::Value).collect()), + trailing.is_some(), + ) + }, + ), + ), + ws_comment_newline.span(), + ) + .try_map::<_, _, std::str::Utf8Error>(|(array, trailing)| { + let (mut array, comma) = array.unwrap_or_default(); + array.set_trailing_comma(comma); + array.set_trailing(RawString::with_span(trailing)); + Ok(array) + }) + .parse_next(input) + } +} + +pub(crate) fn array_value<'i>( + check: RecursionCheck, +) -> impl Parser<Input<'i>, Value, ContextError> { + move |input: &mut Input<'i>| { + ( + ws_comment_newline.span(), + value(check), + ws_comment_newline.span(), + ) + .map(|(ws1, v, ws2)| v.decorated(RawString::with_span(ws1), RawString::with_span(ws2))) + .parse_next(input) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn arrays() { + let inputs = [ + r#"[]"#, + r#"[ ]"#, + r#"[ + 1, 2, 3 +]"#, + r#"[ + 1, + 2, # this is ok +]"#, + r#"[# comment +# comment2 + + + ]"#, + r#"[# comment +# comment2 + 1 + +#sd +, +# comment3 + + ]"#, + r#"[1]"#, + r#"[1,]"#, + r#"[ "all", 'strings', """are the same""", '''type''']"#, + r#"[ 100, -2,]"#, + r#"[1, 2, 3]"#, + r#"[1.1, 2.1, 3.1]"#, + r#"["a", "b", "c"]"#, + r#"[ [ 1, 2 ], [3, 4, 5] ]"#, + r#"[ [ 1, 2 ], ["a", "b", "c"] ]"#, + r#"[ { x = 1, a = "2" }, {a = "a",b = "b", c = "c"} ]"#, + ]; + for input in inputs { + dbg!(input); + let mut parsed = array(Default::default()).parse(new_input(input)); + if let Ok(parsed) = &mut parsed { + parsed.despan(input); + } + assert_eq!(parsed.map(|a| a.to_string()), Ok(input.to_owned())); + } + } + + #[test] + fn invalid_arrays() { + let invalid_inputs = [r#"["#, r#"[,]"#, r#"[,2]"#, r#"[1e165,,]"#]; + for input in invalid_inputs { + dbg!(input); + let mut parsed = array(Default::default()).parse(new_input(input)); + if let Ok(parsed) = &mut parsed { + parsed.despan(input); + } + assert!(parsed.is_err()); + } + } +} diff --git a/vendor/toml_edit/src/parser/datetime.rs b/vendor/toml_edit/src/parser/datetime.rs new file mode 100644 index 0000000..6e89b97 --- /dev/null +++ b/vendor/toml_edit/src/parser/datetime.rs @@ -0,0 +1,446 @@ +use std::ops::RangeInclusive; + +use crate::parser::errors::CustomError; +use crate::parser::prelude::*; +use crate::parser::trivia::from_utf8_unchecked; + +use toml_datetime::*; +use winnow::combinator::alt; +use winnow::combinator::cut_err; +use winnow::combinator::opt; +use winnow::combinator::preceded; +use winnow::token::one_of; +use winnow::token::take_while; +use winnow::trace::trace; + +// ;; Date and Time (as defined in RFC 3339) + +// date-time = offset-date-time / local-date-time / local-date / local-time +// offset-date-time = full-date time-delim full-time +// local-date-time = full-date time-delim partial-time +// local-date = full-date +// local-time = partial-time +// full-time = partial-time time-offset +pub(crate) fn date_time(input: &mut Input<'_>) -> PResult<Datetime> { + trace( + "date-time", + alt(( + (full_date, opt((time_delim, partial_time, opt(time_offset)))) + .map(|(date, opt)| { + match opt { + // Offset Date-Time + Some((_, time, offset)) => Datetime { + date: Some(date), + time: Some(time), + offset, + }, + // Local Date + None => Datetime { + date: Some(date), + time: None, + offset: None, + }, + } + }) + .context(StrContext::Label("date-time")), + partial_time + .map(|t| t.into()) + .context(StrContext::Label("time")), + )), + ) + .parse_next(input) +} + +// full-date = date-fullyear "-" date-month "-" date-mday +pub(crate) fn full_date(input: &mut Input<'_>) -> PResult<Date> { + trace( + "full-date", + (date_fullyear, b'-', cut_err((date_month, b'-', date_mday))) + .map(|(year, _, (month, _, day))| Date { year, month, day }), + ) + .parse_next(input) +} + +// partial-time = time-hour ":" time-minute ":" time-second [time-secfrac] +pub(crate) fn partial_time(input: &mut Input<'_>) -> PResult<Time> { + trace( + "partial-time", + ( + time_hour, + b':', + cut_err((time_minute, b':', time_second, opt(time_secfrac))), + ) + .map(|(hour, _, (minute, _, second, nanosecond))| Time { + hour, + minute, + second, + nanosecond: nanosecond.unwrap_or_default(), + }), + ) + .parse_next(input) +} + +// time-offset = "Z" / time-numoffset +// time-numoffset = ( "+" / "-" ) time-hour ":" time-minute +pub(crate) fn time_offset(input: &mut Input<'_>) -> PResult<Offset> { + trace( + "time-offset", + alt(( + one_of((b'Z', b'z')).value(Offset::Z), + ( + one_of((b'+', b'-')), + cut_err((time_hour, b':', time_minute)), + ) + .map(|(sign, (hours, _, minutes))| { + let sign = match sign { + b'+' => 1, + b'-' => -1, + _ => unreachable!("Parser prevents this"), + }; + sign * (hours as i16 * 60 + minutes as i16) + }) + .verify(|minutes| ((-24 * 60)..=(24 * 60)).contains(minutes)) + .map(|minutes| Offset::Custom { minutes }), + )) + .context(StrContext::Label("time offset")), + ) + .parse_next(input) +} + +// date-fullyear = 4DIGIT +pub(crate) fn date_fullyear(input: &mut Input<'_>) -> PResult<u16> { + unsigned_digits::<4, 4> + .map(|s: &str| s.parse::<u16>().expect("4DIGIT should match u8")) + .parse_next(input) +} + +// date-month = 2DIGIT ; 01-12 +pub(crate) fn date_month(input: &mut Input<'_>) -> PResult<u8> { + unsigned_digits::<2, 2> + .try_map(|s: &str| { + let d = s.parse::<u8>().expect("2DIGIT should match u8"); + if (1..=12).contains(&d) { + Ok(d) + } else { + Err(CustomError::OutOfRange) + } + }) + .parse_next(input) +} + +// date-mday = 2DIGIT ; 01-28, 01-29, 01-30, 01-31 based on month/year +pub(crate) fn date_mday(input: &mut Input<'_>) -> PResult<u8> { + unsigned_digits::<2, 2> + .try_map(|s: &str| { + let d = s.parse::<u8>().expect("2DIGIT should match u8"); + if (1..=31).contains(&d) { + Ok(d) + } else { + Err(CustomError::OutOfRange) + } + }) + .parse_next(input) +} + +// time-delim = "T" / %x20 ; T, t, or space +pub(crate) fn time_delim(input: &mut Input<'_>) -> PResult<u8> { + one_of(TIME_DELIM).parse_next(input) +} + +const TIME_DELIM: (u8, u8, u8) = (b'T', b't', b' '); + +// time-hour = 2DIGIT ; 00-23 +pub(crate) fn time_hour(input: &mut Input<'_>) -> PResult<u8> { + unsigned_digits::<2, 2> + .try_map(|s: &str| { + let d = s.parse::<u8>().expect("2DIGIT should match u8"); + if (0..=23).contains(&d) { + Ok(d) + } else { + Err(CustomError::OutOfRange) + } + }) + .parse_next(input) +} + +// time-minute = 2DIGIT ; 00-59 +pub(crate) fn time_minute(input: &mut Input<'_>) -> PResult<u8> { + unsigned_digits::<2, 2> + .try_map(|s: &str| { + let d = s.parse::<u8>().expect("2DIGIT should match u8"); + if (0..=59).contains(&d) { + Ok(d) + } else { + Err(CustomError::OutOfRange) + } + }) + .parse_next(input) +} + +// time-second = 2DIGIT ; 00-58, 00-59, 00-60 based on leap second rules +pub(crate) fn time_second(input: &mut Input<'_>) -> PResult<u8> { + unsigned_digits::<2, 2> + .try_map(|s: &str| { + let d = s.parse::<u8>().expect("2DIGIT should match u8"); + if (0..=60).contains(&d) { + Ok(d) + } else { + Err(CustomError::OutOfRange) + } + }) + .parse_next(input) +} + +// time-secfrac = "." 1*DIGIT +pub(crate) fn time_secfrac(input: &mut Input<'_>) -> PResult<u32> { + static SCALE: [u32; 10] = [ + 0, + 100_000_000, + 10_000_000, + 1_000_000, + 100_000, + 10_000, + 1_000, + 100, + 10, + 1, + ]; + const INF: usize = usize::MAX; + preceded(b'.', unsigned_digits::<1, INF>) + .try_map(|mut repr: &str| -> Result<u32, CustomError> { + let max_digits = SCALE.len() - 1; + if max_digits < repr.len() { + // Millisecond precision is required. Further precision of fractional seconds is + // implementation-specific. If the value contains greater precision than the + // implementation can support, the additional precision must be truncated, not rounded. + repr = &repr[0..max_digits]; + } + + let v = repr.parse::<u32>().map_err(|_| CustomError::OutOfRange)?; + let num_digits = repr.len(); + + // scale the number accordingly. + let scale = SCALE.get(num_digits).ok_or(CustomError::OutOfRange)?; + let v = v.checked_mul(*scale).ok_or(CustomError::OutOfRange)?; + Ok(v) + }) + .parse_next(input) +} + +pub(crate) fn unsigned_digits<'i, const MIN: usize, const MAX: usize>( + input: &mut Input<'i>, +) -> PResult<&'i str> { + take_while(MIN..=MAX, DIGIT) + .map(|b: &[u8]| unsafe { from_utf8_unchecked(b, "`is_ascii_digit` filters out on-ASCII") }) + .parse_next(input) +} + +// DIGIT = %x30-39 ; 0-9 +const DIGIT: RangeInclusive<u8> = b'0'..=b'9'; + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn offset_date_time() { + let inputs = [ + ( + "1979-05-27T07:32:00Z", + Datetime { + date: Some(Date { + year: 1979, + month: 5, + day: 27, + }), + time: Some(Time { + hour: 7, + minute: 32, + second: 0, + nanosecond: 0, + }), + offset: Some(Offset::Z), + }, + ), + ( + "1979-05-27T00:32:00-07:00", + Datetime { + date: Some(Date { + year: 1979, + month: 5, + day: 27, + }), + time: Some(Time { + hour: 0, + minute: 32, + second: 0, + nanosecond: 0, + }), + offset: Some(Offset::Custom { minutes: -7 * 60 }), + }, + ), + ( + "1979-05-27T00:32:00-00:36", + Datetime { + date: Some(Date { + year: 1979, + month: 5, + day: 27, + }), + time: Some(Time { + hour: 0, + minute: 32, + second: 0, + nanosecond: 0, + }), + offset: Some(Offset::Custom { minutes: -36 }), + }, + ), + ( + "1979-05-27T00:32:00.999999", + Datetime { + date: Some(Date { + year: 1979, + month: 5, + day: 27, + }), + time: Some(Time { + hour: 0, + minute: 32, + second: 0, + nanosecond: 999999000, + }), + offset: None, + }, + ), + ]; + for (input, expected) in inputs { + dbg!(input); + let actual = date_time.parse(new_input(input)).unwrap(); + assert_eq!(expected, actual); + } + } + + #[test] + fn local_date_time() { + let inputs = [ + ( + "1979-05-27T07:32:00", + Datetime { + date: Some(Date { + year: 1979, + month: 5, + day: 27, + }), + time: Some(Time { + hour: 7, + minute: 32, + second: 0, + nanosecond: 0, + }), + offset: None, + }, + ), + ( + "1979-05-27T00:32:00.999999", + Datetime { + date: Some(Date { + year: 1979, + month: 5, + day: 27, + }), + time: Some(Time { + hour: 0, + minute: 32, + second: 0, + nanosecond: 999999000, + }), + offset: None, + }, + ), + ]; + for (input, expected) in inputs { + dbg!(input); + let actual = date_time.parse(new_input(input)).unwrap(); + assert_eq!(expected, actual); + } + } + + #[test] + fn local_date() { + let inputs = [ + ( + "1979-05-27", + Datetime { + date: Some(Date { + year: 1979, + month: 5, + day: 27, + }), + time: None, + offset: None, + }, + ), + ( + "2017-07-20", + Datetime { + date: Some(Date { + year: 2017, + month: 7, + day: 20, + }), + time: None, + offset: None, + }, + ), + ]; + for (input, expected) in inputs { + dbg!(input); + let actual = date_time.parse(new_input(input)).unwrap(); + assert_eq!(expected, actual); + } + } + + #[test] + fn local_time() { + let inputs = [ + ( + "07:32:00", + Datetime { + date: None, + time: Some(Time { + hour: 7, + minute: 32, + second: 0, + nanosecond: 0, + }), + offset: None, + }, + ), + ( + "00:32:00.999999", + Datetime { + date: None, + time: Some(Time { + hour: 0, + minute: 32, + second: 0, + nanosecond: 999999000, + }), + offset: None, + }, + ), + ]; + for (input, expected) in inputs { + dbg!(input); + let actual = date_time.parse(new_input(input)).unwrap(); + assert_eq!(expected, actual); + } + } + + #[test] + fn time_fraction_truncated() { + let input = "1987-07-05T17:45:00.123456789012345Z"; + date_time.parse(new_input(input)).unwrap(); + } +} diff --git a/vendor/toml_edit/src/parser/document.rs b/vendor/toml_edit/src/parser/document.rs new file mode 100644 index 0000000..aa8fb11 --- /dev/null +++ b/vendor/toml_edit/src/parser/document.rs @@ -0,0 +1,141 @@ +use std::cell::RefCell; + +use winnow::combinator::cut_err; +use winnow::combinator::eof; +use winnow::combinator::opt; +use winnow::combinator::peek; +use winnow::combinator::repeat; +use winnow::token::any; +use winnow::token::one_of; +use winnow::trace::trace; + +use crate::document::Document; +use crate::key::Key; +use crate::parser::inline_table::KEYVAL_SEP; +use crate::parser::key::key; +use crate::parser::prelude::*; +use crate::parser::state::ParseState; +use crate::parser::table::table; +use crate::parser::trivia::{comment, line_ending, line_trailing, newline, ws}; +use crate::parser::value::value; +use crate::table::TableKeyValue; +use crate::Item; +use crate::RawString; + +// ;; TOML + +// toml = expression *( newline expression ) + +// expression = ( ( ws comment ) / +// ( ws keyval ws [ comment ] ) / +// ( ws table ws [ comment ] ) / +// ws ) +pub(crate) fn document(input: &mut Input<'_>) -> PResult<Document> { + let state = RefCell::new(ParseState::default()); + let state_ref = &state; + + let _o = ( + // Remove BOM if present + opt(b"\xEF\xBB\xBF"), + parse_ws(state_ref), + repeat(0.., ( + dispatch! {peek(any); + crate::parser::trivia::COMMENT_START_SYMBOL => cut_err(parse_comment(state_ref)), + crate::parser::table::STD_TABLE_OPEN => cut_err(table(state_ref)), + crate::parser::trivia::LF | + crate::parser::trivia::CR => parse_newline(state_ref), + _ => cut_err(keyval(state_ref)), + }, + parse_ws(state_ref), + )) + .map(|()| ()), + eof, + ) + .parse_next(input)?; + state.into_inner().into_document().map_err(|err| { + winnow::error::ErrMode::from_external_error(input, winnow::error::ErrorKind::Verify, err) + }) +} + +pub(crate) fn parse_comment<'s, 'i>( + state: &'s RefCell<ParseState>, +) -> impl Parser<Input<'i>, (), ContextError> + 's { + move |i: &mut Input<'i>| { + (comment, line_ending) + .span() + .map(|span| { + state.borrow_mut().on_comment(span); + }) + .parse_next(i) + } +} + +pub(crate) fn parse_ws<'s, 'i>( + state: &'s RefCell<ParseState>, +) -> impl Parser<Input<'i>, (), ContextError> + 's { + move |i: &mut Input<'i>| { + ws.span() + .map(|span| state.borrow_mut().on_ws(span)) + .parse_next(i) + } +} + +pub(crate) fn parse_newline<'s, 'i>( + state: &'s RefCell<ParseState>, +) -> impl Parser<Input<'i>, (), ContextError> + 's { + move |i: &mut Input<'i>| { + newline + .span() + .map(|span| state.borrow_mut().on_ws(span)) + .parse_next(i) + } +} + +pub(crate) fn keyval<'s, 'i>( + state: &'s RefCell<ParseState>, +) -> impl Parser<Input<'i>, (), ContextError> + 's { + move |i: &mut Input<'i>| { + parse_keyval + .try_map(|(p, kv)| state.borrow_mut().on_keyval(p, kv)) + .parse_next(i) + } +} + +// keyval = key keyval-sep val +pub(crate) fn parse_keyval(input: &mut Input<'_>) -> PResult<(Vec<Key>, TableKeyValue)> { + trace( + "keyval", + ( + key, + cut_err(( + one_of(KEYVAL_SEP) + .context(StrContext::Expected(StrContextValue::CharLiteral('.'))) + .context(StrContext::Expected(StrContextValue::CharLiteral('='))), + ( + ws.span(), + value(RecursionCheck::default()), + line_trailing + .context(StrContext::Expected(StrContextValue::CharLiteral('\n'))) + .context(StrContext::Expected(StrContextValue::CharLiteral('#'))), + ), + )), + ) + .try_map::<_, _, std::str::Utf8Error>(|(key, (_, v))| { + let mut path = key; + let key = path.pop().expect("grammar ensures at least 1"); + + let (pre, v, suf) = v; + let pre = RawString::with_span(pre); + let suf = RawString::with_span(suf); + let v = v.decorated(pre, suf); + Ok(( + path, + TableKeyValue { + key, + value: Item::Value(v), + }, + )) + }), + ) + .parse_next(input) +} diff --git a/vendor/toml_edit/src/parser/errors.rs b/vendor/toml_edit/src/parser/errors.rs new file mode 100644 index 0000000..96ad886 --- /dev/null +++ b/vendor/toml_edit/src/parser/errors.rs @@ -0,0 +1,316 @@ +use std::error::Error as StdError; +use std::fmt::{Display, Formatter, Result}; + +use crate::parser::prelude::*; +use crate::Key; + +use winnow::error::ContextError; +use winnow::error::ParseError; + +/// Type representing a TOML parse error +#[derive(Debug, Clone, Eq, PartialEq, Hash)] +pub struct TomlError { + message: String, + original: Option<String>, + keys: Vec<String>, + span: Option<std::ops::Range<usize>>, +} + +impl TomlError { + pub(crate) fn new(error: ParseError<Input<'_>, ContextError>, mut original: Input<'_>) -> Self { + use winnow::stream::Stream; + + let offset = error.offset(); + let span = if offset == original.len() { + offset..offset + } else { + offset..(offset + 1) + }; + + let message = error.inner().to_string(); + let original = original.finish(); + + Self { + message, + original: Some( + String::from_utf8(original.to_owned()).expect("original document was utf8"), + ), + keys: Vec::new(), + span: Some(span), + } + } + + #[cfg(feature = "serde")] + pub(crate) fn custom(message: String, span: Option<std::ops::Range<usize>>) -> Self { + Self { + message, + original: None, + keys: Vec::new(), + span, + } + } + + #[cfg(feature = "serde")] + pub(crate) fn add_key(&mut self, key: String) { + self.keys.insert(0, key); + } + + /// What went wrong + pub fn message(&self) -> &str { + &self.message + } + + /// The start/end index into the original document where the error occurred + pub fn span(&self) -> Option<std::ops::Range<usize>> { + self.span.clone() + } + + #[cfg(feature = "serde")] + pub(crate) fn set_span(&mut self, span: Option<std::ops::Range<usize>>) { + self.span = span; + } + + #[cfg(feature = "serde")] + pub(crate) fn set_original(&mut self, original: Option<String>) { + self.original = original; + } +} + +/// Displays a TOML parse error +/// +/// # Example +/// +/// TOML parse error at line 1, column 10 +/// | +/// 1 | 00:32:00.a999999 +/// | ^ +/// Unexpected `a` +/// Expected `digit` +/// While parsing a Time +/// While parsing a Date-Time +impl Display for TomlError { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + let mut context = false; + if let (Some(original), Some(span)) = (&self.original, self.span()) { + context = true; + + let (line, column) = translate_position(original.as_bytes(), span.start); + let line_num = line + 1; + let col_num = column + 1; + let gutter = line_num.to_string().len(); + let content = original.split('\n').nth(line).expect("valid line number"); + + writeln!( + f, + "TOML parse error at line {}, column {}", + line_num, col_num + )?; + // | + for _ in 0..=gutter { + write!(f, " ")?; + } + writeln!(f, "|")?; + + // 1 | 00:32:00.a999999 + write!(f, "{} | ", line_num)?; + writeln!(f, "{}", content)?; + + // | ^ + for _ in 0..=gutter { + write!(f, " ")?; + } + write!(f, "|")?; + for _ in 0..=column { + write!(f, " ")?; + } + // The span will be empty at eof, so we need to make sure we always print at least + // one `^` + write!(f, "^")?; + for _ in (span.start + 1)..(span.end.min(span.start + content.len())) { + write!(f, "^")?; + } + writeln!(f)?; + } + writeln!(f, "{}", self.message)?; + if !context && !self.keys.is_empty() { + writeln!(f, "in `{}`", self.keys.join("."))?; + } + + Ok(()) + } +} + +impl StdError for TomlError { + fn description(&self) -> &'static str { + "TOML parse error" + } +} + +fn translate_position(input: &[u8], index: usize) -> (usize, usize) { + if input.is_empty() { + return (0, index); + } + + let safe_index = index.min(input.len() - 1); + let column_offset = index - safe_index; + let index = safe_index; + + let nl = input[0..index] + .iter() + .rev() + .enumerate() + .find(|(_, b)| **b == b'\n') + .map(|(nl, _)| index - nl - 1); + let line_start = match nl { + Some(nl) => nl + 1, + None => 0, + }; + let line = input[0..line_start].iter().filter(|b| **b == b'\n').count(); + let line = line; + + let column = std::str::from_utf8(&input[line_start..=index]) + .map(|s| s.chars().count() - 1) + .unwrap_or_else(|_| index - line_start); + let column = column + column_offset; + + (line, column) +} + +#[cfg(test)] +mod test_translate_position { + use super::*; + + #[test] + fn empty() { + let input = b""; + let index = 0; + let position = translate_position(&input[..], index); + assert_eq!(position, (0, 0)); + } + + #[test] + fn start() { + let input = b"Hello"; + let index = 0; + let position = translate_position(&input[..], index); + assert_eq!(position, (0, 0)); + } + + #[test] + fn end() { + let input = b"Hello"; + let index = input.len() - 1; + let position = translate_position(&input[..], index); + assert_eq!(position, (0, input.len() - 1)); + } + + #[test] + fn after() { + let input = b"Hello"; + let index = input.len(); + let position = translate_position(&input[..], index); + assert_eq!(position, (0, input.len())); + } + + #[test] + fn first_line() { + let input = b"Hello\nWorld\n"; + let index = 2; + let position = translate_position(&input[..], index); + assert_eq!(position, (0, 2)); + } + + #[test] + fn end_of_line() { + let input = b"Hello\nWorld\n"; + let index = 5; + let position = translate_position(&input[..], index); + assert_eq!(position, (0, 5)); + } + + #[test] + fn start_of_second_line() { + let input = b"Hello\nWorld\n"; + let index = 6; + let position = translate_position(&input[..], index); + assert_eq!(position, (1, 0)); + } + + #[test] + fn second_line() { + let input = b"Hello\nWorld\n"; + let index = 8; + let position = translate_position(&input[..], index); + assert_eq!(position, (1, 2)); + } +} + +#[derive(Debug, Clone)] +pub(crate) enum CustomError { + DuplicateKey { + key: String, + table: Option<Vec<Key>>, + }, + DottedKeyExtendWrongType { + key: Vec<Key>, + actual: &'static str, + }, + OutOfRange, + #[cfg_attr(feature = "unbounded", allow(dead_code))] + RecursionLimitExceeded, +} + +impl CustomError { + pub(crate) fn duplicate_key(path: &[Key], i: usize) -> Self { + assert!(i < path.len()); + let key = &path[i]; + let repr = key.display_repr(); + Self::DuplicateKey { + key: repr.into(), + table: Some(path[..i].to_vec()), + } + } + + pub(crate) fn extend_wrong_type(path: &[Key], i: usize, actual: &'static str) -> Self { + assert!(i < path.len()); + Self::DottedKeyExtendWrongType { + key: path[..=i].to_vec(), + actual, + } + } +} + +impl StdError for CustomError { + fn description(&self) -> &'static str { + "TOML parse error" + } +} + +impl Display for CustomError { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + match self { + CustomError::DuplicateKey { key, table } => { + if let Some(table) = table { + if table.is_empty() { + write!(f, "duplicate key `{}` in document root", key) + } else { + let path = table.iter().map(|k| k.get()).collect::<Vec<_>>().join("."); + write!(f, "duplicate key `{}` in table `{}`", key, path) + } + } else { + write!(f, "duplicate key `{}`", key) + } + } + CustomError::DottedKeyExtendWrongType { key, actual } => { + let path = key.iter().map(|k| k.get()).collect::<Vec<_>>().join("."); + write!( + f, + "dotted key `{}` attempted to extend non-table type ({})", + path, actual + ) + } + CustomError::OutOfRange => write!(f, "value is out of range"), + CustomError::RecursionLimitExceeded => write!(f, "recursion limit exceeded"), + } + } +} diff --git a/vendor/toml_edit/src/parser/inline_table.rs b/vendor/toml_edit/src/parser/inline_table.rs new file mode 100644 index 0000000..994e003 --- /dev/null +++ b/vendor/toml_edit/src/parser/inline_table.rs @@ -0,0 +1,181 @@ +use winnow::combinator::cut_err; +use winnow::combinator::delimited; +use winnow::combinator::separated0; +use winnow::token::one_of; +use winnow::trace::trace; + +use crate::key::Key; +use crate::parser::errors::CustomError; +use crate::parser::key::key; +use crate::parser::prelude::*; +use crate::parser::trivia::ws; +use crate::parser::value::value; +use crate::table::TableKeyValue; +use crate::{InlineTable, InternalString, Item, RawString, Value}; + +use indexmap::map::Entry; + +// ;; Inline Table + +// inline-table = inline-table-open inline-table-keyvals inline-table-close +pub(crate) fn inline_table<'i>( + check: RecursionCheck, +) -> impl Parser<Input<'i>, InlineTable, ContextError> { + trace("inline-table", move |input: &mut Input<'i>| { + delimited( + INLINE_TABLE_OPEN, + cut_err(inline_table_keyvals(check).try_map(|(kv, p)| table_from_pairs(kv, p))), + cut_err(INLINE_TABLE_CLOSE) + .context(StrContext::Label("inline table")) + .context(StrContext::Expected(StrContextValue::CharLiteral('}'))), + ) + .parse_next(input) + }) +} + +fn table_from_pairs( + v: Vec<(Vec<Key>, TableKeyValue)>, + preamble: RawString, +) -> Result<InlineTable, CustomError> { + let mut root = InlineTable::new(); + root.set_preamble(preamble); + // Assuming almost all pairs will be directly in `root` + root.items.reserve(v.len()); + + for (path, kv) in v { + let table = descend_path(&mut root, &path)?; + let key: InternalString = kv.key.get_internal().into(); + match table.items.entry(key) { + Entry::Vacant(o) => { + o.insert(kv); + } + Entry::Occupied(o) => { + return Err(CustomError::DuplicateKey { + key: o.key().as_str().into(), + table: None, + }); + } + } + } + Ok(root) +} + +fn descend_path<'a>( + mut table: &'a mut InlineTable, + path: &'a [Key], +) -> Result<&'a mut InlineTable, CustomError> { + for (i, key) in path.iter().enumerate() { + let entry = table.entry_format(key).or_insert_with(|| { + let mut new_table = InlineTable::new(); + new_table.set_dotted(true); + + Value::InlineTable(new_table) + }); + match *entry { + Value::InlineTable(ref mut sweet_child_of_mine) => { + table = sweet_child_of_mine; + } + ref v => { + return Err(CustomError::extend_wrong_type(path, i, v.type_name())); + } + } + } + Ok(table) +} + +// inline-table-open = %x7B ws ; { +pub(crate) const INLINE_TABLE_OPEN: u8 = b'{'; +// inline-table-close = ws %x7D ; } +const INLINE_TABLE_CLOSE: u8 = b'}'; +// inline-table-sep = ws %x2C ws ; , Comma +const INLINE_TABLE_SEP: u8 = b','; +// keyval-sep = ws %x3D ws ; = +pub(crate) const KEYVAL_SEP: u8 = b'='; + +// inline-table-keyvals = [ inline-table-keyvals-non-empty ] +// inline-table-keyvals-non-empty = +// ( key keyval-sep val inline-table-sep inline-table-keyvals-non-empty ) / +// ( key keyval-sep val ) + +fn inline_table_keyvals<'i>( + check: RecursionCheck, +) -> impl Parser<Input<'i>, (Vec<(Vec<Key>, TableKeyValue)>, RawString), ContextError> { + move |input: &mut Input<'i>| { + let check = check.recursing(input)?; + ( + separated0(keyval(check), INLINE_TABLE_SEP), + ws.span().map(RawString::with_span), + ) + .parse_next(input) + } +} + +fn keyval<'i>( + check: RecursionCheck, +) -> impl Parser<Input<'i>, (Vec<Key>, TableKeyValue), ContextError> { + move |input: &mut Input<'i>| { + ( + key, + cut_err(( + one_of(KEYVAL_SEP) + .context(StrContext::Expected(StrContextValue::CharLiteral('.'))) + .context(StrContext::Expected(StrContextValue::CharLiteral('='))), + (ws.span(), value(check), ws.span()), + )), + ) + .map(|(key, (_, v))| { + let mut path = key; + let key = path.pop().expect("grammar ensures at least 1"); + + let (pre, v, suf) = v; + let pre = RawString::with_span(pre); + let suf = RawString::with_span(suf); + let v = v.decorated(pre, suf); + ( + path, + TableKeyValue { + key, + value: Item::Value(v), + }, + ) + }) + .parse_next(input) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn inline_tables() { + let inputs = [ + r#"{}"#, + r#"{ }"#, + r#"{a = 1e165}"#, + r#"{ hello = "world", a = 1}"#, + r#"{ hello.world = "a" }"#, + ]; + for input in inputs { + dbg!(input); + let mut parsed = inline_table(Default::default()).parse(new_input(input)); + if let Ok(parsed) = &mut parsed { + parsed.despan(input); + } + assert_eq!(parsed.map(|a| a.to_string()), Ok(input.to_owned())); + } + } + + #[test] + fn invalid_inline_tables() { + let invalid_inputs = [r#"{a = 1e165"#, r#"{ hello = "world", a = 2, hello = 1}"#]; + for input in invalid_inputs { + dbg!(input); + let mut parsed = inline_table(Default::default()).parse(new_input(input)); + if let Ok(parsed) = &mut parsed { + parsed.despan(input); + } + assert!(parsed.is_err()); + } + } +} diff --git a/vendor/toml_edit/src/parser/key.rs b/vendor/toml_edit/src/parser/key.rs new file mode 100644 index 0000000..12715da --- /dev/null +++ b/vendor/toml_edit/src/parser/key.rs @@ -0,0 +1,112 @@ +use std::ops::RangeInclusive; + +use winnow::combinator::peek; +use winnow::combinator::separated1; +use winnow::token::any; +use winnow::token::take_while; +use winnow::trace::trace; + +use crate::key::Key; +use crate::parser::errors::CustomError; +use crate::parser::prelude::*; +use crate::parser::strings::{basic_string, literal_string}; +use crate::parser::trivia::{from_utf8_unchecked, ws}; +use crate::repr::{Decor, Repr}; +use crate::InternalString; +use crate::RawString; + +// key = simple-key / dotted-key +// dotted-key = simple-key 1*( dot-sep simple-key ) +pub(crate) fn key(input: &mut Input<'_>) -> PResult<Vec<Key>> { + trace( + "dotted-key", + separated1( + (ws.span(), simple_key, ws.span()).map(|(pre, (raw, key), suffix)| { + Key::new(key) + .with_repr_unchecked(Repr::new_unchecked(raw)) + .with_decor(Decor::new( + RawString::with_span(pre), + RawString::with_span(suffix), + )) + }), + DOT_SEP, + ) + .context(StrContext::Label("key")) + .try_map(|k: Vec<_>| { + // Inserting the key will require recursion down the line + RecursionCheck::check_depth(k.len())?; + Ok::<_, CustomError>(k) + }), + ) + .parse_next(input) +} + +// simple-key = quoted-key / unquoted-key +// quoted-key = basic-string / literal-string +pub(crate) fn simple_key(input: &mut Input<'_>) -> PResult<(RawString, InternalString)> { + trace( + "simple-key", + dispatch! {peek(any); + crate::parser::strings::QUOTATION_MARK => basic_string + .map(|s: std::borrow::Cow<'_, str>| s.as_ref().into()), + crate::parser::strings::APOSTROPHE => literal_string.map(|s: &str| s.into()), + _ => unquoted_key.map(|s: &str| s.into()), + } + .with_span() + .map(|(k, span)| { + let raw = RawString::with_span(span); + (raw, k) + }), + ) + .parse_next(input) +} + +// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _ +fn unquoted_key<'i>(input: &mut Input<'i>) -> PResult<&'i str> { + trace( + "unquoted-key", + take_while(1.., UNQUOTED_CHAR) + .map(|b| unsafe { from_utf8_unchecked(b, "`is_unquoted_char` filters out on-ASCII") }), + ) + .parse_next(input) +} + +pub(crate) fn is_unquoted_char(c: u8) -> bool { + use winnow::stream::ContainsToken; + UNQUOTED_CHAR.contains_token(c) +} + +const UNQUOTED_CHAR: ( + RangeInclusive<u8>, + RangeInclusive<u8>, + RangeInclusive<u8>, + u8, + u8, +) = (b'A'..=b'Z', b'a'..=b'z', b'0'..=b'9', b'-', b'_'); + +// dot-sep = ws %x2E ws ; . Period +const DOT_SEP: u8 = b'.'; + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn keys() { + let cases = [ + ("a", "a"), + (r#""hello\n ""#, "hello\n "), + (r#"'hello\n '"#, "hello\\n "), + ]; + + for (input, expected) in cases { + dbg!(input); + let parsed = simple_key.parse(new_input(input)); + assert_eq!( + parsed, + Ok((RawString::with_span(0..(input.len())), expected.into())), + "Parsing {input:?}" + ); + } + } +} diff --git a/vendor/toml_edit/src/parser/mod.rs b/vendor/toml_edit/src/parser/mod.rs new file mode 100644 index 0000000..eb47550 --- /dev/null +++ b/vendor/toml_edit/src/parser/mod.rs @@ -0,0 +1,265 @@ +#![allow(clippy::type_complexity)] + +pub(crate) mod array; +pub(crate) mod datetime; +pub(crate) mod document; +pub(crate) mod errors; +pub(crate) mod inline_table; +pub(crate) mod key; +pub(crate) mod numbers; +pub(crate) mod state; +pub(crate) mod strings; +pub(crate) mod table; +pub(crate) mod trivia; +pub(crate) mod value; + +pub use errors::TomlError; + +pub(crate) fn parse_document(raw: &str) -> Result<crate::Document, TomlError> { + use prelude::*; + + let b = new_input(raw); + let mut doc = document::document + .parse(b) + .map_err(|e| TomlError::new(e, b))?; + doc.span = Some(0..(raw.len())); + doc.original = Some(raw.to_owned()); + Ok(doc) +} + +pub(crate) fn parse_key(raw: &str) -> Result<crate::Key, TomlError> { + use prelude::*; + + let b = new_input(raw); + let result = key::simple_key.parse(b); + match result { + Ok((raw, key)) => { + Ok(crate::Key::new(key).with_repr_unchecked(crate::Repr::new_unchecked(raw))) + } + Err(e) => Err(TomlError::new(e, b)), + } +} + +pub(crate) fn parse_key_path(raw: &str) -> Result<Vec<crate::Key>, TomlError> { + use prelude::*; + + let b = new_input(raw); + let result = key::key.parse(b); + match result { + Ok(mut keys) => { + for key in &mut keys { + key.despan(raw); + } + Ok(keys) + } + Err(e) => Err(TomlError::new(e, b)), + } +} + +pub(crate) fn parse_value(raw: &str) -> Result<crate::Value, TomlError> { + use prelude::*; + + let b = new_input(raw); + let parsed = value::value(RecursionCheck::default()).parse(b); + match parsed { + Ok(mut value) => { + // Only take the repr and not decor, as its probably not intended + value.decor_mut().clear(); + value.despan(raw); + Ok(value) + } + Err(e) => Err(TomlError::new(e, b)), + } +} + +pub(crate) mod prelude { + pub(crate) use winnow::combinator::dispatch; + pub(crate) use winnow::error::ContextError; + pub(crate) use winnow::error::FromExternalError; + pub(crate) use winnow::error::StrContext; + pub(crate) use winnow::error::StrContextValue; + pub(crate) use winnow::PResult; + pub(crate) use winnow::Parser; + + pub(crate) type Input<'b> = winnow::Located<&'b winnow::BStr>; + + pub(crate) fn new_input(s: &str) -> Input<'_> { + winnow::Located::new(winnow::BStr::new(s)) + } + + #[cfg(not(feature = "unbounded"))] + #[derive(Copy, Clone, Debug, Default)] + pub(crate) struct RecursionCheck { + current: usize, + } + + #[cfg(not(feature = "unbounded"))] + impl RecursionCheck { + pub(crate) fn check_depth(depth: usize) -> Result<(), super::errors::CustomError> { + if depth < 128 { + Ok(()) + } else { + Err(super::errors::CustomError::RecursionLimitExceeded) + } + } + + pub(crate) fn recursing( + mut self, + input: &mut Input<'_>, + ) -> Result<Self, winnow::error::ErrMode<ContextError>> { + self.current += 1; + if self.current < 128 { + Ok(self) + } else { + Err(winnow::error::ErrMode::from_external_error( + input, + winnow::error::ErrorKind::Eof, + super::errors::CustomError::RecursionLimitExceeded, + )) + } + } + } + + #[cfg(feature = "unbounded")] + #[derive(Copy, Clone, Debug, Default)] + pub(crate) struct RecursionCheck {} + + #[cfg(feature = "unbounded")] + impl RecursionCheck { + pub(crate) fn check_depth(_depth: usize) -> Result<(), super::errors::CustomError> { + Ok(()) + } + + pub(crate) fn recursing( + self, + _input: &mut Input<'_>, + ) -> Result<Self, winnow::error::ErrMode<ContextError>> { + Ok(self) + } + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn documents() { + let documents = [ + "", + r#" +# This is a TOML document. + +title = "TOML Example" + + [owner] + name = "Tom Preston-Werner" + dob = 1979-05-27T07:32:00-08:00 # First class dates + + [database] + server = "192.168.1.1" + ports = [ 8001, 8001, 8002 ] + connection_max = 5000 + enabled = true + + [servers] + + # Indentation (tabs and/or spaces) is allowed but not required +[servers.alpha] + ip = "10.0.0.1" + dc = "eqdc10" + + [servers.beta] + ip = "10.0.0.2" + dc = "eqdc10" + + [clients] + data = [ ["gamma", "delta"], [1, 2] ] + + # Line breaks are OK when inside arrays +hosts = [ + "alpha", + "omega" +] + + 'some.weird .stuff' = """ + like + that + # """ # this broke my syntax highlighting + " also. like " = ''' +that +''' + double = 2e39 # this number looks familiar +# trailing comment"#, + r#""#, + r#" "#, + r#" hello = 'darkness' # my old friend +"#, + r#"[parent . child] +key = "value" +"#, + r#"hello.world = "a" +"#, + r#"foo = 1979-05-27 # Comment +"#, + ]; + for input in documents { + dbg!(input); + let mut parsed = parse_document(input); + if let Ok(parsed) = &mut parsed { + parsed.despan(); + } + let doc = match parsed { + Ok(doc) => doc, + Err(err) => { + panic!( + "Parse error: {:?}\nFailed to parse:\n```\n{}\n```", + err, input + ) + } + }; + + snapbox::assert_eq(input, doc.to_string()); + } + } + + #[test] + fn documents_parse_only() { + let parse_only = ["\u{FEFF} +[package] +name = \"foo\" +version = \"0.0.1\" +authors = [] +"]; + for input in parse_only { + dbg!(input); + let mut parsed = parse_document(input); + if let Ok(parsed) = &mut parsed { + parsed.despan(); + } + match parsed { + Ok(_) => (), + Err(err) => { + panic!( + "Parse error: {:?}\nFailed to parse:\n```\n{}\n```", + err, input + ) + } + } + } + } + + #[test] + fn invalid_documents() { + let invalid_inputs = [r#" hello = 'darkness' # my old friend +$"#]; + for input in invalid_inputs { + dbg!(input); + let mut parsed = parse_document(input); + if let Ok(parsed) = &mut parsed { + parsed.despan(); + } + assert!(parsed.is_err(), "Input: {:?}", input); + } + } +} diff --git a/vendor/toml_edit/src/parser/numbers.rs b/vendor/toml_edit/src/parser/numbers.rs new file mode 100644 index 0000000..6e4757f --- /dev/null +++ b/vendor/toml_edit/src/parser/numbers.rs @@ -0,0 +1,397 @@ +use std::ops::RangeInclusive; + +use winnow::combinator::alt; +use winnow::combinator::cut_err; +use winnow::combinator::opt; +use winnow::combinator::peek; +use winnow::combinator::preceded; +use winnow::combinator::repeat; +use winnow::combinator::rest; +use winnow::token::one_of; +use winnow::token::tag; +use winnow::token::take; +use winnow::trace::trace; + +use crate::parser::prelude::*; +use crate::parser::trivia::from_utf8_unchecked; + +// ;; Boolean + +// boolean = true / false +#[allow(dead_code)] // directly define in `fn value` +pub(crate) fn boolean(input: &mut Input<'_>) -> PResult<bool> { + trace("boolean", alt((true_, false_))).parse_next(input) +} + +pub(crate) fn true_(input: &mut Input<'_>) -> PResult<bool> { + (peek(TRUE[0]), cut_err(TRUE)).value(true).parse_next(input) +} +const TRUE: &[u8] = b"true"; + +pub(crate) fn false_(input: &mut Input<'_>) -> PResult<bool> { + (peek(FALSE[0]), cut_err(FALSE)) + .value(false) + .parse_next(input) +} +const FALSE: &[u8] = b"false"; + +// ;; Integer + +// integer = dec-int / hex-int / oct-int / bin-int +pub(crate) fn integer(input: &mut Input<'_>) -> PResult<i64> { + trace("integer", + dispatch! {peek(opt::<_, &[u8], _, _>(take(2usize))); + Some(b"0x") => cut_err(hex_int.try_map(|s| i64::from_str_radix(&s.replace('_', ""), 16))), + Some(b"0o") => cut_err(oct_int.try_map(|s| i64::from_str_radix(&s.replace('_', ""), 8))), + Some(b"0b") => cut_err(bin_int.try_map(|s| i64::from_str_radix(&s.replace('_', ""), 2))), + _ => dec_int.and_then(cut_err(rest + .try_map(|s: &str| s.replace('_', "").parse()))) + }) + .parse_next(input) +} + +// dec-int = [ minus / plus ] unsigned-dec-int +// unsigned-dec-int = DIGIT / digit1-9 1*( DIGIT / underscore DIGIT ) +pub(crate) fn dec_int<'i>(input: &mut Input<'i>) -> PResult<&'i str> { + trace( + "dec-int", + ( + opt(one_of((b'+', b'-'))), + alt(( + ( + one_of(DIGIT1_9), + repeat( + 0.., + alt(( + digit.value(()), + ( + one_of(b'_'), + cut_err(digit).context(StrContext::Expected( + StrContextValue::Description("digit"), + )), + ) + .value(()), + )), + ) + .map(|()| ()), + ) + .value(()), + digit.value(()), + )), + ) + .recognize() + .map(|b: &[u8]| unsafe { + from_utf8_unchecked(b, "`digit` and `_` filter out non-ASCII") + }) + .context(StrContext::Label("integer")), + ) + .parse_next(input) +} +const DIGIT1_9: RangeInclusive<u8> = b'1'..=b'9'; + +// hex-prefix = %x30.78 ; 0x +// hex-int = hex-prefix HEXDIG *( HEXDIG / underscore HEXDIG ) +pub(crate) fn hex_int<'i>(input: &mut Input<'i>) -> PResult<&'i str> { + trace( + "hex-int", + preceded( + HEX_PREFIX, + cut_err(( + hexdig, + repeat( + 0.., + alt(( + hexdig.value(()), + ( + one_of(b'_'), + cut_err(hexdig).context(StrContext::Expected( + StrContextValue::Description("digit"), + )), + ) + .value(()), + )), + ) + .map(|()| ()), + )) + .recognize(), + ) + .map(|b| unsafe { from_utf8_unchecked(b, "`hexdig` and `_` filter out non-ASCII") }) + .context(StrContext::Label("hexadecimal integer")), + ) + .parse_next(input) +} +const HEX_PREFIX: &[u8] = b"0x"; + +// oct-prefix = %x30.6F ; 0o +// oct-int = oct-prefix digit0-7 *( digit0-7 / underscore digit0-7 ) +pub(crate) fn oct_int<'i>(input: &mut Input<'i>) -> PResult<&'i str> { + trace( + "oct-int", + preceded( + OCT_PREFIX, + cut_err(( + one_of(DIGIT0_7), + repeat( + 0.., + alt(( + one_of(DIGIT0_7).value(()), + ( + one_of(b'_'), + cut_err(one_of(DIGIT0_7)).context(StrContext::Expected( + StrContextValue::Description("digit"), + )), + ) + .value(()), + )), + ) + .map(|()| ()), + )) + .recognize(), + ) + .map(|b| unsafe { from_utf8_unchecked(b, "`DIGIT0_7` and `_` filter out non-ASCII") }) + .context(StrContext::Label("octal integer")), + ) + .parse_next(input) +} +const OCT_PREFIX: &[u8] = b"0o"; +const DIGIT0_7: RangeInclusive<u8> = b'0'..=b'7'; + +// bin-prefix = %x30.62 ; 0b +// bin-int = bin-prefix digit0-1 *( digit0-1 / underscore digit0-1 ) +pub(crate) fn bin_int<'i>(input: &mut Input<'i>) -> PResult<&'i str> { + trace( + "bin-int", + preceded( + BIN_PREFIX, + cut_err(( + one_of(DIGIT0_1), + repeat( + 0.., + alt(( + one_of(DIGIT0_1).value(()), + ( + one_of(b'_'), + cut_err(one_of(DIGIT0_1)).context(StrContext::Expected( + StrContextValue::Description("digit"), + )), + ) + .value(()), + )), + ) + .map(|()| ()), + )) + .recognize(), + ) + .map(|b| unsafe { from_utf8_unchecked(b, "`DIGIT0_1` and `_` filter out non-ASCII") }) + .context(StrContext::Label("binary integer")), + ) + .parse_next(input) +} +const BIN_PREFIX: &[u8] = b"0b"; +const DIGIT0_1: RangeInclusive<u8> = b'0'..=b'1'; + +// ;; Float + +// float = float-int-part ( exp / frac [ exp ] ) +// float =/ special-float +// float-int-part = dec-int +pub(crate) fn float(input: &mut Input<'_>) -> PResult<f64> { + trace( + "float", + alt(( + float_.and_then(cut_err( + rest.try_map(|s: &str| s.replace('_', "").parse()) + .verify(|f: &f64| *f != f64::INFINITY), + )), + special_float, + )) + .context(StrContext::Label("floating-point number")), + ) + .parse_next(input) +} + +pub(crate) fn float_<'i>(input: &mut Input<'i>) -> PResult<&'i str> { + ( + dec_int, + alt((exp.void(), (frac.void(), opt(exp.void())).void())), + ) + .recognize() + .map(|b: &[u8]| unsafe { + from_utf8_unchecked( + b, + "`dec_int`, `one_of`, `exp`, and `frac` filter out non-ASCII", + ) + }) + .parse_next(input) +} + +// frac = decimal-point zero-prefixable-int +// decimal-point = %x2E ; . +pub(crate) fn frac<'i>(input: &mut Input<'i>) -> PResult<&'i str> { + ( + b'.', + cut_err(zero_prefixable_int) + .context(StrContext::Expected(StrContextValue::Description("digit"))), + ) + .recognize() + .map(|b: &[u8]| unsafe { + from_utf8_unchecked( + b, + "`.` and `parse_zero_prefixable_int` filter out non-ASCII", + ) + }) + .parse_next(input) +} + +// zero-prefixable-int = DIGIT *( DIGIT / underscore DIGIT ) +pub(crate) fn zero_prefixable_int<'i>(input: &mut Input<'i>) -> PResult<&'i str> { + ( + digit, + repeat( + 0.., + alt(( + digit.value(()), + ( + one_of(b'_'), + cut_err(digit) + .context(StrContext::Expected(StrContextValue::Description("digit"))), + ) + .value(()), + )), + ) + .map(|()| ()), + ) + .recognize() + .map(|b: &[u8]| unsafe { from_utf8_unchecked(b, "`digit` and `_` filter out non-ASCII") }) + .parse_next(input) +} + +// exp = "e" float-exp-part +// float-exp-part = [ minus / plus ] zero-prefixable-int +pub(crate) fn exp<'i>(input: &mut Input<'i>) -> PResult<&'i str> { + ( + one_of((b'e', b'E')), + opt(one_of([b'+', b'-'])), + cut_err(zero_prefixable_int), + ) + .recognize() + .map(|b: &[u8]| unsafe { + from_utf8_unchecked( + b, + "`one_of` and `parse_zero_prefixable_int` filter out non-ASCII", + ) + }) + .parse_next(input) +} + +// special-float = [ minus / plus ] ( inf / nan ) +pub(crate) fn special_float(input: &mut Input<'_>) -> PResult<f64> { + (opt(one_of((b'+', b'-'))), alt((inf, nan))) + .map(|(s, f)| match s { + Some(b'+') | None => f, + Some(b'-') => -f, + _ => unreachable!("one_of should prevent this"), + }) + .parse_next(input) +} +// inf = %x69.6e.66 ; inf +pub(crate) fn inf(input: &mut Input<'_>) -> PResult<f64> { + tag(INF).value(f64::INFINITY).parse_next(input) +} +const INF: &[u8] = b"inf"; +// nan = %x6e.61.6e ; nan +pub(crate) fn nan(input: &mut Input<'_>) -> PResult<f64> { + tag(NAN).value(f64::NAN).parse_next(input) +} +const NAN: &[u8] = b"nan"; + +// DIGIT = %x30-39 ; 0-9 +pub(crate) fn digit(input: &mut Input<'_>) -> PResult<u8> { + one_of(DIGIT).parse_next(input) +} +const DIGIT: RangeInclusive<u8> = b'0'..=b'9'; + +// HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" +pub(crate) fn hexdig(input: &mut Input<'_>) -> PResult<u8> { + one_of(HEXDIG).parse_next(input) +} +pub(crate) const HEXDIG: (RangeInclusive<u8>, RangeInclusive<u8>, RangeInclusive<u8>) = + (DIGIT, b'A'..=b'F', b'a'..=b'f'); + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn integers() { + let cases = [ + ("+99", 99), + ("42", 42), + ("0", 0), + ("-17", -17), + ("1_000", 1_000), + ("5_349_221", 5_349_221), + ("1_2_3_4_5", 1_2_3_4_5), + ("0xF", 15), + ("0o0_755", 493), + ("0b1_0_1", 5), + (&std::i64::MIN.to_string()[..], std::i64::MIN), + (&std::i64::MAX.to_string()[..], std::i64::MAX), + ]; + for &(input, expected) in &cases { + dbg!(input); + let parsed = integer.parse(new_input(input)); + assert_eq!(parsed, Ok(expected), "Parsing {input:?}"); + } + + let overflow = "1000000000000000000000000000000000"; + let parsed = integer.parse(new_input(overflow)); + assert!(parsed.is_err()); + } + + #[track_caller] + fn assert_float_eq(actual: f64, expected: f64) { + if expected.is_nan() { + assert!(actual.is_nan()); + } else if expected.is_infinite() { + assert!(actual.is_infinite()); + assert_eq!(expected.is_sign_positive(), actual.is_sign_positive()); + } else { + dbg!(expected); + dbg!(actual); + assert!((expected - actual).abs() < std::f64::EPSILON); + } + } + + #[test] + fn floats() { + let cases = [ + ("+1.0", 1.0), + ("3.1419", 3.1419), + ("-0.01", -0.01), + ("5e+22", 5e+22), + ("1e6", 1e6), + ("-2E-2", -2E-2), + ("6.626e-34", 6.626e-34), + ("9_224_617.445_991_228_313", 9_224_617.445_991_227), + ("-1.7976931348623157e+308", std::f64::MIN), + ("1.7976931348623157e+308", std::f64::MAX), + ("nan", f64::NAN), + ("+nan", f64::NAN), + ("-nan", f64::NAN), + ("inf", f64::INFINITY), + ("+inf", f64::INFINITY), + ("-inf", f64::NEG_INFINITY), + // ("1e+400", std::f64::INFINITY), + ]; + for &(input, expected) in &cases { + dbg!(input); + let parsed = float.parse(new_input(input)).unwrap(); + assert_float_eq(parsed, expected); + + let overflow = "9e99999"; + let parsed = float.parse(new_input(overflow)); + assert!(parsed.is_err(), "{:?}", parsed); + } + } +} diff --git a/vendor/toml_edit/src/parser/state.rs b/vendor/toml_edit/src/parser/state.rs new file mode 100644 index 0000000..b30ee3f --- /dev/null +++ b/vendor/toml_edit/src/parser/state.rs @@ -0,0 +1,323 @@ +use crate::key::Key; +use crate::parser::errors::CustomError; +use crate::repr::Decor; +use crate::table::TableKeyValue; +use crate::{ArrayOfTables, Document, InternalString, Item, RawString, Table}; + +pub(crate) struct ParseState { + document: Document, + trailing: Option<std::ops::Range<usize>>, + current_table_position: usize, + current_table: Table, + current_is_array: bool, + current_table_path: Vec<Key>, +} + +impl ParseState { + pub(crate) fn into_document(mut self) -> Result<Document, CustomError> { + self.finalize_table()?; + let trailing = self.trailing.map(RawString::with_span); + self.document.trailing = trailing.unwrap_or_default(); + Ok(self.document) + } + + pub(crate) fn on_ws(&mut self, span: std::ops::Range<usize>) { + if let Some(old) = self.trailing.take() { + self.trailing = Some(old.start..span.end); + } else { + self.trailing = Some(span); + } + } + + pub(crate) fn on_comment(&mut self, span: std::ops::Range<usize>) { + if let Some(old) = self.trailing.take() { + self.trailing = Some(old.start..span.end); + } else { + self.trailing = Some(span); + } + } + + pub(crate) fn on_keyval( + &mut self, + mut path: Vec<Key>, + mut kv: TableKeyValue, + ) -> Result<(), CustomError> { + { + let mut prefix = self.trailing.take(); + let first_key = if path.is_empty() { + &mut kv.key + } else { + &mut path[0] + }; + let prefix = match ( + prefix.take(), + first_key.decor.prefix().and_then(|d| d.span()), + ) { + (Some(p), Some(k)) => Some(p.start..k.end), + (Some(p), None) | (None, Some(p)) => Some(p), + (None, None) => None, + }; + first_key + .decor + .set_prefix(prefix.map(RawString::with_span).unwrap_or_default()); + } + + if let (Some(existing), Some(value)) = (self.current_table.span(), kv.value.span()) { + self.current_table.span = Some((existing.start)..(value.end)); + } + let table = &mut self.current_table; + let table = Self::descend_path(table, &path, true)?; + + // "Likewise, using dotted keys to redefine tables already defined in [table] form is not allowed" + let mixed_table_types = table.is_dotted() == path.is_empty(); + if mixed_table_types { + return Err(CustomError::DuplicateKey { + key: kv.key.get().into(), + table: None, + }); + } + + let key: InternalString = kv.key.get_internal().into(); + match table.items.entry(key) { + indexmap::map::Entry::Vacant(o) => { + o.insert(kv); + } + indexmap::map::Entry::Occupied(o) => { + // "Since tables cannot be defined more than once, redefining such tables using a [table] header is not allowed" + return Err(CustomError::DuplicateKey { + key: o.key().as_str().into(), + table: Some(self.current_table_path.clone()), + }); + } + } + + Ok(()) + } + + pub(crate) fn start_array_table( + &mut self, + path: Vec<Key>, + decor: Decor, + span: std::ops::Range<usize>, + ) -> Result<(), CustomError> { + debug_assert!(!path.is_empty()); + debug_assert!(self.current_table.is_empty()); + debug_assert!(self.current_table_path.is_empty()); + + // Look up the table on start to ensure the duplicate_key error points to the right line + let root = self.document.as_table_mut(); + let parent_table = Self::descend_path(root, &path[..path.len() - 1], false)?; + let key = &path[path.len() - 1]; + let entry = parent_table + .entry_format(key) + .or_insert(Item::ArrayOfTables(ArrayOfTables::new())); + entry + .as_array_of_tables() + .ok_or_else(|| CustomError::duplicate_key(&path, path.len() - 1))?; + + self.current_table_position += 1; + self.current_table.decor = decor; + self.current_table.set_implicit(false); + self.current_table.set_dotted(false); + self.current_table.set_position(self.current_table_position); + self.current_table.span = Some(span); + self.current_is_array = true; + self.current_table_path = path; + + Ok(()) + } + + pub(crate) fn start_table( + &mut self, + path: Vec<Key>, + decor: Decor, + span: std::ops::Range<usize>, + ) -> Result<(), CustomError> { + debug_assert!(!path.is_empty()); + debug_assert!(self.current_table.is_empty()); + debug_assert!(self.current_table_path.is_empty()); + + // 1. Look up the table on start to ensure the duplicate_key error points to the right line + // 2. Ensure any child tables from an implicit table are preserved + let root = self.document.as_table_mut(); + let parent_table = Self::descend_path(root, &path[..path.len() - 1], false)?; + let key = &path[path.len() - 1]; + if let Some(entry) = parent_table.remove(key.get()) { + match entry { + Item::Table(t) if t.implicit && !t.is_dotted() => { + self.current_table = t; + } + // Since tables cannot be defined more than once, redefining such tables using a [table] header is not allowed. Likewise, using dotted keys to redefine tables already defined in [table] form is not allowed. + _ => return Err(CustomError::duplicate_key(&path, path.len() - 1)), + } + } + + self.current_table_position += 1; + self.current_table.decor = decor; + self.current_table.set_implicit(false); + self.current_table.set_dotted(false); + self.current_table.set_position(self.current_table_position); + self.current_table.span = Some(span); + self.current_is_array = false; + self.current_table_path = path; + + Ok(()) + } + + pub(crate) fn finalize_table(&mut self) -> Result<(), CustomError> { + let mut table = std::mem::take(&mut self.current_table); + let path = std::mem::take(&mut self.current_table_path); + + let root = self.document.as_table_mut(); + if path.is_empty() { + assert!(root.is_empty()); + std::mem::swap(&mut table, root); + } else if self.current_is_array { + let parent_table = Self::descend_path(root, &path[..path.len() - 1], false)?; + let key = &path[path.len() - 1]; + + let entry = parent_table + .entry_format(key) + .or_insert(Item::ArrayOfTables(ArrayOfTables::new())); + let array = entry + .as_array_of_tables_mut() + .ok_or_else(|| CustomError::duplicate_key(&path, path.len() - 1))?; + array.push(table); + let span = if let (Some(first), Some(last)) = ( + array.values.first().and_then(|t| t.span()), + array.values.last().and_then(|t| t.span()), + ) { + Some((first.start)..(last.end)) + } else { + None + }; + array.span = span; + } else { + let parent_table = Self::descend_path(root, &path[..path.len() - 1], false)?; + let key = &path[path.len() - 1]; + + let entry = parent_table.entry_format(key); + match entry { + crate::Entry::Occupied(entry) => { + match entry.into_mut() { + // if [a.b.c] header preceded [a.b] + Item::Table(ref mut t) if t.implicit => { + std::mem::swap(t, &mut table); + } + _ => return Err(CustomError::duplicate_key(&path, path.len() - 1)), + } + } + crate::Entry::Vacant(entry) => { + let item = Item::Table(table); + entry.insert(item); + } + } + } + + Ok(()) + } + + pub(crate) fn descend_path<'t, 'k>( + mut table: &'t mut Table, + path: &'k [Key], + dotted: bool, + ) -> Result<&'t mut Table, CustomError> { + for (i, key) in path.iter().enumerate() { + let entry = table.entry_format(key).or_insert_with(|| { + let mut new_table = Table::new(); + new_table.set_implicit(true); + new_table.set_dotted(dotted); + + Item::Table(new_table) + }); + match *entry { + Item::Value(ref v) => { + return Err(CustomError::extend_wrong_type(path, i, v.type_name())); + } + Item::ArrayOfTables(ref mut array) => { + debug_assert!(!array.is_empty()); + + let index = array.len() - 1; + let last_child = array.get_mut(index).unwrap(); + + table = last_child; + } + Item::Table(ref mut sweet_child_of_mine) => { + // Since tables cannot be defined more than once, redefining such tables using a + // [table] header is not allowed. Likewise, using dotted keys to redefine tables + // already defined in [table] form is not allowed. + if dotted && !sweet_child_of_mine.is_implicit() { + return Err(CustomError::DuplicateKey { + key: key.get().into(), + table: None, + }); + } + table = sweet_child_of_mine; + } + _ => unreachable!(), + } + } + Ok(table) + } + + pub(crate) fn on_std_header( + &mut self, + path: Vec<Key>, + trailing: std::ops::Range<usize>, + span: std::ops::Range<usize>, + ) -> Result<(), CustomError> { + debug_assert!(!path.is_empty()); + + self.finalize_table()?; + let leading = self + .trailing + .take() + .map(RawString::with_span) + .unwrap_or_default(); + self.start_table( + path, + Decor::new(leading, RawString::with_span(trailing)), + span, + )?; + + Ok(()) + } + + pub(crate) fn on_array_header( + &mut self, + path: Vec<Key>, + trailing: std::ops::Range<usize>, + span: std::ops::Range<usize>, + ) -> Result<(), CustomError> { + debug_assert!(!path.is_empty()); + + self.finalize_table()?; + let leading = self + .trailing + .take() + .map(RawString::with_span) + .unwrap_or_default(); + self.start_array_table( + path, + Decor::new(leading, RawString::with_span(trailing)), + span, + )?; + + Ok(()) + } +} + +impl Default for ParseState { + fn default() -> Self { + let mut root = Table::new(); + root.span = Some(0..0); + Self { + document: Document::new(), + trailing: None, + current_table_position: 0, + current_table: root, + current_is_array: false, + current_table_path: Vec::new(), + } + } +} diff --git a/vendor/toml_edit/src/parser/strings.rs b/vendor/toml_edit/src/parser/strings.rs new file mode 100644 index 0000000..26f9cc2 --- /dev/null +++ b/vendor/toml_edit/src/parser/strings.rs @@ -0,0 +1,478 @@ +use std::borrow::Cow; +use std::char; +use std::ops::RangeInclusive; + +use winnow::combinator::alt; +use winnow::combinator::cut_err; +use winnow::combinator::delimited; +use winnow::combinator::fail; +use winnow::combinator::opt; +use winnow::combinator::peek; +use winnow::combinator::preceded; +use winnow::combinator::repeat; +use winnow::combinator::success; +use winnow::combinator::terminated; +use winnow::prelude::*; +use winnow::stream::Stream; +use winnow::token::any; +use winnow::token::none_of; +use winnow::token::one_of; +use winnow::token::tag; +use winnow::token::take_while; +use winnow::trace::trace; + +use crate::parser::errors::CustomError; +use crate::parser::numbers::HEXDIG; +use crate::parser::prelude::*; +use crate::parser::trivia::{from_utf8_unchecked, newline, ws, ws_newlines, NON_ASCII, WSCHAR}; + +// ;; String + +// string = ml-basic-string / basic-string / ml-literal-string / literal-string +pub(crate) fn string<'i>(input: &mut Input<'i>) -> PResult<Cow<'i, str>> { + trace( + "string", + alt(( + ml_basic_string, + basic_string, + ml_literal_string, + literal_string.map(Cow::Borrowed), + )), + ) + .parse_next(input) +} + +// ;; Basic String + +// basic-string = quotation-mark *basic-char quotation-mark +pub(crate) fn basic_string<'i>(input: &mut Input<'i>) -> PResult<Cow<'i, str>> { + trace("basic-string", |input: &mut Input<'i>| { + let _ = one_of(QUOTATION_MARK).parse_next(input)?; + + let mut c = Cow::Borrowed(""); + if let Some(ci) = opt(basic_chars).parse_next(input)? { + c = ci; + } + while let Some(ci) = opt(basic_chars).parse_next(input)? { + c.to_mut().push_str(&ci); + } + + let _ = cut_err(one_of(QUOTATION_MARK)) + .context(StrContext::Label("basic string")) + .parse_next(input)?; + + Ok(c) + }) + .parse_next(input) +} + +// quotation-mark = %x22 ; " +pub(crate) const QUOTATION_MARK: u8 = b'"'; + +// basic-char = basic-unescaped / escaped +fn basic_chars<'i>(input: &mut Input<'i>) -> PResult<Cow<'i, str>> { + alt(( + // Deviate from the official grammar by batching the unescaped chars so we build a string a + // chunk at a time, rather than a `char` at a time. + take_while(1.., BASIC_UNESCAPED) + .try_map(std::str::from_utf8) + .map(Cow::Borrowed), + escaped.map(|c| Cow::Owned(String::from(c))), + )) + .parse_next(input) +} + +// basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii +pub(crate) const BASIC_UNESCAPED: ( + (u8, u8), + u8, + RangeInclusive<u8>, + RangeInclusive<u8>, + RangeInclusive<u8>, +) = (WSCHAR, 0x21, 0x23..=0x5B, 0x5D..=0x7E, NON_ASCII); + +// escaped = escape escape-seq-char +fn escaped(input: &mut Input<'_>) -> PResult<char> { + preceded(ESCAPE, escape_seq_char).parse_next(input) +} + +// escape = %x5C ; \ +pub(crate) const ESCAPE: u8 = b'\\'; + +// escape-seq-char = %x22 ; " quotation mark U+0022 +// escape-seq-char =/ %x5C ; \ reverse solidus U+005C +// escape-seq-char =/ %x62 ; b backspace U+0008 +// escape-seq-char =/ %x66 ; f form feed U+000C +// escape-seq-char =/ %x6E ; n line feed U+000A +// escape-seq-char =/ %x72 ; r carriage return U+000D +// escape-seq-char =/ %x74 ; t tab U+0009 +// escape-seq-char =/ %x75 4HEXDIG ; uXXXX U+XXXX +// escape-seq-char =/ %x55 8HEXDIG ; UXXXXXXXX U+XXXXXXXX +fn escape_seq_char(input: &mut Input<'_>) -> PResult<char> { + dispatch! {any; + b'b' => success('\u{8}'), + b'f' => success('\u{c}'), + b'n' => success('\n'), + b'r' => success('\r'), + b't' => success('\t'), + b'u' => cut_err(hexescape::<4>).context(StrContext::Label("unicode 4-digit hex code")), + b'U' => cut_err(hexescape::<8>).context(StrContext::Label("unicode 8-digit hex code")), + b'\\' => success('\\'), + b'"' => success('"'), + _ => { + cut_err(fail::<_, char, _>) + .context(StrContext::Label("escape sequence")) + .context(StrContext::Expected(StrContextValue::CharLiteral('b'))) + .context(StrContext::Expected(StrContextValue::CharLiteral('f'))) + .context(StrContext::Expected(StrContextValue::CharLiteral('n'))) + .context(StrContext::Expected(StrContextValue::CharLiteral('r'))) + .context(StrContext::Expected(StrContextValue::CharLiteral('t'))) + .context(StrContext::Expected(StrContextValue::CharLiteral('u'))) + .context(StrContext::Expected(StrContextValue::CharLiteral('U'))) + .context(StrContext::Expected(StrContextValue::CharLiteral('\\'))) + .context(StrContext::Expected(StrContextValue::CharLiteral('"'))) + } + } + .parse_next(input) +} + +pub(crate) fn hexescape<const N: usize>(input: &mut Input<'_>) -> PResult<char> { + take_while(0..=N, HEXDIG) + .verify(|b: &[u8]| b.len() == N) + .map(|b: &[u8]| unsafe { from_utf8_unchecked(b, "`is_ascii_digit` filters out on-ASCII") }) + .verify_map(|s| u32::from_str_radix(s, 16).ok()) + .try_map(|h| char::from_u32(h).ok_or(CustomError::OutOfRange)) + .parse_next(input) +} + +// ;; Multiline Basic String + +// ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body +// ml-basic-string-delim +fn ml_basic_string<'i>(input: &mut Input<'i>) -> PResult<Cow<'i, str>> { + trace( + "ml-basic-string", + delimited( + ML_BASIC_STRING_DELIM, + preceded(opt(newline), cut_err(ml_basic_body)), + cut_err(ML_BASIC_STRING_DELIM), + ) + .context(StrContext::Label("multiline basic string")), + ) + .parse_next(input) +} + +// ml-basic-string-delim = 3quotation-mark +pub(crate) const ML_BASIC_STRING_DELIM: &[u8] = b"\"\"\""; + +// ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ] +fn ml_basic_body<'i>(input: &mut Input<'i>) -> PResult<Cow<'i, str>> { + let mut c = Cow::Borrowed(""); + if let Some(ci) = opt(mlb_content).parse_next(input)? { + c = ci; + } + while let Some(ci) = opt(mlb_content).parse_next(input)? { + c.to_mut().push_str(&ci); + } + + while let Some(qi) = opt(mlb_quotes(none_of(b'\"').value(()))).parse_next(input)? { + if let Some(ci) = opt(mlb_content).parse_next(input)? { + c.to_mut().push_str(qi); + c.to_mut().push_str(&ci); + while let Some(ci) = opt(mlb_content).parse_next(input)? { + c.to_mut().push_str(&ci); + } + } else { + break; + } + } + + if let Some(qi) = opt(mlb_quotes(tag(ML_BASIC_STRING_DELIM).value(()))).parse_next(input)? { + c.to_mut().push_str(qi); + } + + Ok(c) +} + +// mlb-content = mlb-char / newline / mlb-escaped-nl +// mlb-char = mlb-unescaped / escaped +fn mlb_content<'i>(input: &mut Input<'i>) -> PResult<Cow<'i, str>> { + alt(( + // Deviate from the official grammar by batching the unescaped chars so we build a string a + // chunk at a time, rather than a `char` at a time. + take_while(1.., MLB_UNESCAPED) + .try_map(std::str::from_utf8) + .map(Cow::Borrowed), + // Order changed fromg grammar so `escaped` can more easily `cut_err` on bad escape sequences + mlb_escaped_nl.map(|_| Cow::Borrowed("")), + escaped.map(|c| Cow::Owned(String::from(c))), + newline.map(|_| Cow::Borrowed("\n")), + )) + .parse_next(input) +} + +// mlb-quotes = 1*2quotation-mark +fn mlb_quotes<'i>( + mut term: impl winnow::Parser<Input<'i>, (), ContextError>, +) -> impl Parser<Input<'i>, &'i str, ContextError> { + move |input: &mut Input<'i>| { + let start = input.checkpoint(); + let res = terminated(b"\"\"", peek(term.by_ref())) + .map(|b| unsafe { from_utf8_unchecked(b, "`bytes` out non-ASCII") }) + .parse_next(input); + + match res { + Err(winnow::error::ErrMode::Backtrack(_)) => { + input.reset(start); + terminated(b"\"", peek(term.by_ref())) + .map(|b| unsafe { from_utf8_unchecked(b, "`bytes` out non-ASCII") }) + .parse_next(input) + } + res => res, + } + } +} + +// mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii +pub(crate) const MLB_UNESCAPED: ( + (u8, u8), + u8, + RangeInclusive<u8>, + RangeInclusive<u8>, + RangeInclusive<u8>, +) = (WSCHAR, 0x21, 0x23..=0x5B, 0x5D..=0x7E, NON_ASCII); + +// mlb-escaped-nl = escape ws newline *( wschar / newline +// When the last non-whitespace character on a line is a \, +// it will be trimmed along with all whitespace +// (including newlines) up to the next non-whitespace +// character or closing delimiter. +fn mlb_escaped_nl(input: &mut Input<'_>) -> PResult<()> { + repeat(1.., (ESCAPE, ws, ws_newlines)) + .map(|()| ()) + .value(()) + .parse_next(input) +} + +// ;; Literal String + +// literal-string = apostrophe *literal-char apostrophe +pub(crate) fn literal_string<'i>(input: &mut Input<'i>) -> PResult<&'i str> { + trace( + "literal-string", + delimited( + APOSTROPHE, + cut_err(take_while(0.., LITERAL_CHAR)), + cut_err(APOSTROPHE), + ) + .try_map(std::str::from_utf8) + .context(StrContext::Label("literal string")), + ) + .parse_next(input) +} + +// apostrophe = %x27 ; ' apostrophe +pub(crate) const APOSTROPHE: u8 = b'\''; + +// literal-char = %x09 / %x20-26 / %x28-7E / non-ascii +pub(crate) const LITERAL_CHAR: ( + u8, + RangeInclusive<u8>, + RangeInclusive<u8>, + RangeInclusive<u8>, +) = (0x9, 0x20..=0x26, 0x28..=0x7E, NON_ASCII); + +// ;; Multiline Literal String + +// ml-literal-string = ml-literal-string-delim [ newline ] ml-literal-body +// ml-literal-string-delim +fn ml_literal_string<'i>(input: &mut Input<'i>) -> PResult<Cow<'i, str>> { + trace( + "ml-literal-string", + delimited( + (ML_LITERAL_STRING_DELIM, opt(newline)), + cut_err(ml_literal_body.map(|t| { + if t.contains("\r\n") { + Cow::Owned(t.replace("\r\n", "\n")) + } else { + Cow::Borrowed(t) + } + })), + cut_err(ML_LITERAL_STRING_DELIM), + ) + .context(StrContext::Label("multiline literal string")), + ) + .parse_next(input) +} + +// ml-literal-string-delim = 3apostrophe +pub(crate) const ML_LITERAL_STRING_DELIM: &[u8] = b"'''"; + +// ml-literal-body = *mll-content *( mll-quotes 1*mll-content ) [ mll-quotes ] +fn ml_literal_body<'i>(input: &mut Input<'i>) -> PResult<&'i str> { + ( + repeat(0.., mll_content).map(|()| ()), + repeat( + 0.., + ( + mll_quotes(none_of(APOSTROPHE).value(())), + repeat(1.., mll_content).map(|()| ()), + ), + ) + .map(|()| ()), + opt(mll_quotes(tag(ML_LITERAL_STRING_DELIM).value(()))), + ) + .recognize() + .try_map(std::str::from_utf8) + .parse_next(input) +} + +// mll-content = mll-char / newline +fn mll_content(input: &mut Input<'_>) -> PResult<u8> { + alt((one_of(MLL_CHAR), newline)).parse_next(input) +} + +// mll-char = %x09 / %x20-26 / %x28-7E / non-ascii +const MLL_CHAR: ( + u8, + RangeInclusive<u8>, + RangeInclusive<u8>, + RangeInclusive<u8>, +) = (0x9, 0x20..=0x26, 0x28..=0x7E, NON_ASCII); + +// mll-quotes = 1*2apostrophe +fn mll_quotes<'i>( + mut term: impl winnow::Parser<Input<'i>, (), ContextError>, +) -> impl Parser<Input<'i>, &'i str, ContextError> { + move |input: &mut Input<'i>| { + let start = input.checkpoint(); + let res = terminated(b"''", peek(term.by_ref())) + .map(|b| unsafe { from_utf8_unchecked(b, "`bytes` out non-ASCII") }) + .parse_next(input); + + match res { + Err(winnow::error::ErrMode::Backtrack(_)) => { + input.reset(start); + terminated(b"'", peek(term.by_ref())) + .map(|b| unsafe { from_utf8_unchecked(b, "`bytes` out non-ASCII") }) + .parse_next(input) + } + res => res, + } + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn basic_string() { + let input = + r#""I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF. \U0002070E""#; + let expected = "I\'m a string. \"You can quote me\". Name\tJosé\nLocation\tSF. \u{2070E}"; + let parsed = string.parse(new_input(input)); + assert_eq!(parsed.as_deref(), Ok(expected), "Parsing {input:?}"); + } + + #[test] + fn ml_basic_string() { + let cases = [ + ( + r#"""" +Roses are red +Violets are blue""""#, + r#"Roses are red +Violets are blue"#, + ), + (r#"""" \""" """"#, " \"\"\" "), + (r#"""" \\""""#, " \\"), + ]; + + for &(input, expected) in &cases { + let parsed = string.parse(new_input(input)); + assert_eq!(parsed.as_deref(), Ok(expected), "Parsing {input:?}"); + } + + let invalid_cases = [r#"""" """#, r#"""" \""""#]; + + for input in &invalid_cases { + let parsed = string.parse(new_input(input)); + assert!(parsed.is_err()); + } + } + + #[test] + fn ml_basic_string_escape_ws() { + let inputs = [ + r#"""" +The quick brown \ + + + fox jumps over \ + the lazy dog.""""#, + r#""""\ + The quick brown \ + fox jumps over \ + the lazy dog.\ + """"#, + ]; + for input in &inputs { + let expected = "The quick brown fox jumps over the lazy dog."; + let parsed = string.parse(new_input(input)); + assert_eq!(parsed.as_deref(), Ok(expected), "Parsing {input:?}"); + } + let empties = [ + r#""""\ + """"#, + r#"""" +\ + \ +""""#, + ]; + for input in &empties { + let expected = ""; + let parsed = string.parse(new_input(input)); + assert_eq!(parsed.as_deref(), Ok(expected), "Parsing {input:?}"); + } + } + + #[test] + fn literal_string() { + let inputs = [ + r#"'C:\Users\nodejs\templates'"#, + r#"'\\ServerX\admin$\system32\'"#, + r#"'Tom "Dubs" Preston-Werner'"#, + r#"'<\i\c*\s*>'"#, + ]; + + for input in &inputs { + let expected = &input[1..input.len() - 1]; + let parsed = string.parse(new_input(input)); + assert_eq!(parsed.as_deref(), Ok(expected), "Parsing {input:?}"); + } + } + + #[test] + fn ml_literal_string() { + let inputs = [ + r#"'''I [dw]on't need \d{2} apples'''"#, + r#"''''one_quote''''"#, + ]; + for input in &inputs { + let expected = &input[3..input.len() - 3]; + let parsed = string.parse(new_input(input)); + assert_eq!(parsed.as_deref(), Ok(expected), "Parsing {input:?}"); + } + + let input = r#"''' +The first newline is +trimmed in raw strings. + All other whitespace + is preserved. +'''"#; + let expected = &input[4..input.len() - 3]; + let parsed = string.parse(new_input(input)); + assert_eq!(parsed.as_deref(), Ok(expected), "Parsing {input:?}"); + } +} diff --git a/vendor/toml_edit/src/parser/table.rs b/vendor/toml_edit/src/parser/table.rs new file mode 100644 index 0000000..0ace0c7 --- /dev/null +++ b/vendor/toml_edit/src/parser/table.rs @@ -0,0 +1,89 @@ +use std::cell::RefCell; +#[allow(unused_imports)] +use std::ops::DerefMut; + +use winnow::combinator::cut_err; +use winnow::combinator::delimited; +use winnow::combinator::peek; +use winnow::token::take; + +// https://github.com/rust-lang/rust/issues/41358 +use crate::parser::key::key; +use crate::parser::prelude::*; +use crate::parser::state::ParseState; +use crate::parser::trivia::line_trailing; + +// std-table-open = %x5B ws ; [ Left square bracket +pub(crate) const STD_TABLE_OPEN: u8 = b'['; +// std-table-close = ws %x5D ; ] Right square bracket +const STD_TABLE_CLOSE: u8 = b']'; +// array-table-open = %x5B.5B ws ; [[ Double left square bracket +const ARRAY_TABLE_OPEN: &[u8] = b"[["; +// array-table-close = ws %x5D.5D ; ]] Double right quare bracket +const ARRAY_TABLE_CLOSE: &[u8] = b"]]"; + +// ;; Standard Table + +// std-table = std-table-open key *( table-key-sep key) std-table-close +pub(crate) fn std_table<'s, 'i>( + state: &'s RefCell<ParseState>, +) -> impl Parser<Input<'i>, (), ContextError> + 's { + move |i: &mut Input<'i>| { + ( + delimited( + STD_TABLE_OPEN, + cut_err(key), + cut_err(STD_TABLE_CLOSE) + .context(StrContext::Expected(StrContextValue::CharLiteral('.'))) + .context(StrContext::Expected(StrContextValue::StringLiteral("]"))), + ) + .with_span(), + cut_err(line_trailing) + .context(StrContext::Expected(StrContextValue::CharLiteral('\n'))) + .context(StrContext::Expected(StrContextValue::CharLiteral('#'))), + ) + .try_map(|((h, span), t)| state.borrow_mut().deref_mut().on_std_header(h, t, span)) + .parse_next(i) + } +} + +// ;; Array Table + +// array-table = array-table-open key *( table-key-sep key) array-table-close +pub(crate) fn array_table<'s, 'i>( + state: &'s RefCell<ParseState>, +) -> impl Parser<Input<'i>, (), ContextError> + 's { + move |i: &mut Input<'i>| { + ( + delimited( + ARRAY_TABLE_OPEN, + cut_err(key), + cut_err(ARRAY_TABLE_CLOSE) + .context(StrContext::Expected(StrContextValue::CharLiteral('.'))) + .context(StrContext::Expected(StrContextValue::StringLiteral("]]"))), + ) + .with_span(), + cut_err(line_trailing) + .context(StrContext::Expected(StrContextValue::CharLiteral('\n'))) + .context(StrContext::Expected(StrContextValue::CharLiteral('#'))), + ) + .try_map(|((h, span), t)| state.borrow_mut().deref_mut().on_array_header(h, t, span)) + .parse_next(i) + } +} + +// ;; Table + +// table = std-table / array-table +pub(crate) fn table<'s, 'i>( + state: &'s RefCell<ParseState>, +) -> impl Parser<Input<'i>, (), ContextError> + 's { + move |i: &mut Input<'i>| { + dispatch!(peek::<_, &[u8],_,_>(take(2usize)); + b"[[" => array_table(state), + _ => std_table(state), + ) + .context(StrContext::Label("table header")) + .parse_next(i) + } +} diff --git a/vendor/toml_edit/src/parser/trivia.rs b/vendor/toml_edit/src/parser/trivia.rs new file mode 100644 index 0000000..a359805 --- /dev/null +++ b/vendor/toml_edit/src/parser/trivia.rs @@ -0,0 +1,156 @@ +use std::ops::RangeInclusive; + +use winnow::combinator::alt; +use winnow::combinator::eof; +use winnow::combinator::opt; +use winnow::combinator::repeat; +use winnow::combinator::terminated; +use winnow::prelude::*; +use winnow::token::one_of; +use winnow::token::take_while; + +use crate::parser::prelude::*; + +pub(crate) unsafe fn from_utf8_unchecked<'b>( + bytes: &'b [u8], + safety_justification: &'static str, +) -> &'b str { + if cfg!(debug_assertions) { + // Catch problems more quickly when testing + std::str::from_utf8(bytes).expect(safety_justification) + } else { + std::str::from_utf8_unchecked(bytes) + } +} + +// wschar = ( %x20 / ; Space +// %x09 ) ; Horizontal tab +pub(crate) const WSCHAR: (u8, u8) = (b' ', b'\t'); + +// ws = *wschar +pub(crate) fn ws<'i>(input: &mut Input<'i>) -> PResult<&'i str> { + take_while(0.., WSCHAR) + .map(|b| unsafe { from_utf8_unchecked(b, "`is_wschar` filters out on-ASCII") }) + .parse_next(input) +} + +// non-ascii = %x80-D7FF / %xE000-10FFFF +// - ASCII is 0xxxxxxx +// - First byte for UTF-8 is 11xxxxxx +// - Subsequent UTF-8 bytes are 10xxxxxx +pub(crate) const NON_ASCII: RangeInclusive<u8> = 0x80..=0xff; + +// non-eol = %x09 / %x20-7E / non-ascii +pub(crate) const NON_EOL: (u8, RangeInclusive<u8>, RangeInclusive<u8>) = + (0x09, 0x20..=0x7E, NON_ASCII); + +// comment-start-symbol = %x23 ; # +pub(crate) const COMMENT_START_SYMBOL: u8 = b'#'; + +// comment = comment-start-symbol *non-eol +pub(crate) fn comment<'i>(input: &mut Input<'i>) -> PResult<&'i [u8]> { + (COMMENT_START_SYMBOL, take_while(0.., NON_EOL)) + .recognize() + .parse_next(input) +} + +// newline = ( %x0A / ; LF +// %x0D.0A ) ; CRLF +pub(crate) fn newline(input: &mut Input<'_>) -> PResult<u8> { + alt(( + one_of(LF).value(b'\n'), + (one_of(CR), one_of(LF)).value(b'\n'), + )) + .parse_next(input) +} +pub(crate) const LF: u8 = b'\n'; +pub(crate) const CR: u8 = b'\r'; + +// ws-newline = *( wschar / newline ) +pub(crate) fn ws_newline<'i>(input: &mut Input<'i>) -> PResult<&'i str> { + repeat( + 0.., + alt((newline.value(&b"\n"[..]), take_while(1.., WSCHAR))), + ) + .map(|()| ()) + .recognize() + .map(|b| unsafe { from_utf8_unchecked(b, "`is_wschar` and `newline` filters out on-ASCII") }) + .parse_next(input) +} + +// ws-newlines = newline *( wschar / newline ) +pub(crate) fn ws_newlines<'i>(input: &mut Input<'i>) -> PResult<&'i str> { + (newline, ws_newline) + .recognize() + .map(|b| unsafe { + from_utf8_unchecked(b, "`is_wschar` and `newline` filters out on-ASCII") + }) + .parse_next(input) +} + +// note: this rule is not present in the original grammar +// ws-comment-newline = *( ws-newline-nonempty / comment ) +pub(crate) fn ws_comment_newline<'i>(input: &mut Input<'i>) -> PResult<&'i [u8]> { + repeat( + 0.., + alt(( + repeat( + 1.., + alt((take_while(1.., WSCHAR), newline.value(&b"\n"[..]))), + ) + .map(|()| ()), + comment.value(()), + )), + ) + .map(|()| ()) + .recognize() + .parse_next(input) +} + +// note: this rule is not present in the original grammar +// line-ending = newline / eof +pub(crate) fn line_ending<'i>(input: &mut Input<'i>) -> PResult<&'i str> { + alt((newline.value("\n"), eof.value(""))).parse_next(input) +} + +// note: this rule is not present in the original grammar +// line-trailing = ws [comment] skip-line-ending +pub(crate) fn line_trailing(input: &mut Input<'_>) -> PResult<std::ops::Range<usize>> { + terminated((ws, opt(comment)).span(), line_ending).parse_next(input) +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn trivia() { + let inputs = [ + "", + r#" "#, + r#" +"#, + r#" +# comment + +# comment2 + + +"#, + r#" + "#, + r#"# comment +# comment2 + + + "#, + ]; + for input in inputs { + dbg!(input); + let parsed = ws_comment_newline.parse(new_input(input)); + assert!(parsed.is_ok(), "{:?}", parsed); + let parsed = parsed.unwrap(); + assert_eq!(parsed, input.as_bytes()); + } + } +} diff --git a/vendor/toml_edit/src/parser/value.rs b/vendor/toml_edit/src/parser/value.rs new file mode 100644 index 0000000..14cd951 --- /dev/null +++ b/vendor/toml_edit/src/parser/value.rs @@ -0,0 +1,155 @@ +use winnow::combinator::alt; +use winnow::combinator::fail; +use winnow::combinator::peek; +use winnow::token::any; + +use crate::parser::array::array; +use crate::parser::datetime::date_time; +use crate::parser::inline_table::inline_table; +use crate::parser::numbers::{float, integer}; +use crate::parser::prelude::*; +use crate::parser::strings::string; +use crate::repr::{Formatted, Repr}; +use crate::value as v; +use crate::RawString; +use crate::Value; + +// val = string / boolean / array / inline-table / date-time / float / integer +pub(crate) fn value<'i>(check: RecursionCheck) -> impl Parser<Input<'i>, v::Value, ContextError> { + move |input: &mut Input<'i>| { + dispatch!{peek(any); + crate::parser::strings::QUOTATION_MARK | + crate::parser::strings::APOSTROPHE => string.map(|s| { + v::Value::String(Formatted::new( + s.into_owned() + )) + }), + crate::parser::array::ARRAY_OPEN => array(check).map(v::Value::Array), + crate::parser::inline_table::INLINE_TABLE_OPEN => inline_table(check).map(v::Value::InlineTable), + // Date/number starts + b'+' | b'-' | b'0'..=b'9' => { + // Uncommon enough not to be worth optimizing at this time + alt(( + date_time + .map(v::Value::from), + float + .map(v::Value::from), + integer + .map(v::Value::from), + )) + }, + // Report as if they were numbers because its most likely a typo + b'_' => { + integer + .map(v::Value::from) + .context(StrContext::Expected(StrContextValue::Description("leading digit"))) + }, + // Report as if they were numbers because its most likely a typo + b'.' => { + float + .map(v::Value::from) + .context(StrContext::Expected(StrContextValue::Description("leading digit"))) + }, + b't' => { + crate::parser::numbers::true_.map(v::Value::from) + .context(StrContext::Label("string")) + .context(StrContext::Expected(StrContextValue::CharLiteral('"'))) + .context(StrContext::Expected(StrContextValue::CharLiteral('\''))) + }, + b'f' => { + crate::parser::numbers::false_.map(v::Value::from) + .context(StrContext::Label("string")) + .context(StrContext::Expected(StrContextValue::CharLiteral('"'))) + .context(StrContext::Expected(StrContextValue::CharLiteral('\''))) + }, + b'i' => { + crate::parser::numbers::inf.map(v::Value::from) + .context(StrContext::Label("string")) + .context(StrContext::Expected(StrContextValue::CharLiteral('"'))) + .context(StrContext::Expected(StrContextValue::CharLiteral('\''))) + }, + b'n' => { + crate::parser::numbers::nan.map(v::Value::from) + .context(StrContext::Label("string")) + .context(StrContext::Expected(StrContextValue::CharLiteral('"'))) + .context(StrContext::Expected(StrContextValue::CharLiteral('\''))) + }, + _ => { + fail + .context(StrContext::Label("string")) + .context(StrContext::Expected(StrContextValue::CharLiteral('"'))) + .context(StrContext::Expected(StrContextValue::CharLiteral('\''))) + }, + } + .with_span() + .try_map(|(value, span)| apply_raw(value, span)) + .parse_next(input) + } +} + +fn apply_raw(mut val: Value, span: std::ops::Range<usize>) -> Result<Value, std::str::Utf8Error> { + match val { + Value::String(ref mut f) => { + let raw = RawString::with_span(span); + f.set_repr_unchecked(Repr::new_unchecked(raw)); + } + Value::Integer(ref mut f) => { + let raw = RawString::with_span(span); + f.set_repr_unchecked(Repr::new_unchecked(raw)); + } + Value::Float(ref mut f) => { + let raw = RawString::with_span(span); + f.set_repr_unchecked(Repr::new_unchecked(raw)); + } + Value::Boolean(ref mut f) => { + let raw = RawString::with_span(span); + f.set_repr_unchecked(Repr::new_unchecked(raw)); + } + Value::Datetime(ref mut f) => { + let raw = RawString::with_span(span); + f.set_repr_unchecked(Repr::new_unchecked(raw)); + } + Value::Array(ref mut arr) => { + arr.span = Some(span); + } + Value::InlineTable(ref mut table) => { + table.span = Some(span); + } + }; + val.decorate("", ""); + Ok(val) +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn values() { + let inputs = [ + "1979-05-27T00:32:00.999999", + "-239", + "1e200", + "9_224_617.445_991_228_313", + r#"'''I [dw]on't need \d{2} apples'''"#, + r#"''' +The first newline is +trimmed in raw strings. + All other whitespace + is preserved. +'''"#, + r#""Jos\u00E9\n""#, + r#""\\\"\b/\f\n\r\t\u00E9\U000A0000""#, + r#"{ hello = "world", a = 1}"#, + r#"[ { x = 1, a = "2" }, {a = "a",b = "b", c = "c"} ]"#, + ]; + for input in inputs { + dbg!(input); + let mut parsed = value(Default::default()).parse(new_input(input)); + if let Ok(parsed) = &mut parsed { + parsed.despan(input); + } + assert_eq!(parsed.map(|a| a.to_string()), Ok(input.to_owned())); + } + } +} |