summaryrefslogtreecommitdiffstats
path: root/vendor/toml_edit-0.19.11/src/parser/strings.rs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-06-07 05:48:48 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-06-07 05:48:48 +0000
commitef24de24a82fe681581cc130f342363c47c0969a (patch)
tree0d494f7e1a38b95c92426f58fe6eaa877303a86c /vendor/toml_edit-0.19.11/src/parser/strings.rs
parentReleasing progress-linux version 1.74.1+dfsg1-1~progress7.99u1. (diff)
downloadrustc-ef24de24a82fe681581cc130f342363c47c0969a.tar.xz
rustc-ef24de24a82fe681581cc130f342363c47c0969a.zip
Merging upstream version 1.75.0+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/toml_edit-0.19.11/src/parser/strings.rs')
-rw-r--r--vendor/toml_edit-0.19.11/src/parser/strings.rs464
1 files changed, 464 insertions, 0 deletions
diff --git a/vendor/toml_edit-0.19.11/src/parser/strings.rs b/vendor/toml_edit-0.19.11/src/parser/strings.rs
new file mode 100644
index 000000000..2ce160506
--- /dev/null
+++ b/vendor/toml_edit-0.19.11/src/parser/strings.rs
@@ -0,0 +1,464 @@
+use std::borrow::Cow;
+use std::char;
+use std::ops::RangeInclusive;
+
+use winnow::combinator::alt;
+use winnow::combinator::cut_err;
+use winnow::combinator::delimited;
+use winnow::combinator::fail;
+use winnow::combinator::opt;
+use winnow::combinator::peek;
+use winnow::combinator::preceded;
+use winnow::combinator::repeat;
+use winnow::combinator::success;
+use winnow::combinator::terminated;
+use winnow::prelude::*;
+use winnow::token::any;
+use winnow::token::none_of;
+use winnow::token::one_of;
+use winnow::token::tag;
+use winnow::token::take_while;
+
+use crate::parser::errors::CustomError;
+use crate::parser::numbers::HEXDIG;
+use crate::parser::prelude::*;
+use crate::parser::trivia::{from_utf8_unchecked, newline, ws, ws_newlines, NON_ASCII, WSCHAR};
+
+// ;; String
+
+// string = ml-basic-string / basic-string / ml-literal-string / literal-string
+pub(crate) fn string(input: Input<'_>) -> IResult<Input<'_>, Cow<'_, str>, ParserError<'_>> {
+ alt((
+ ml_basic_string,
+ basic_string,
+ ml_literal_string,
+ literal_string.map(Cow::Borrowed),
+ ))
+ .parse_next(input)
+}
+
+// ;; Basic String
+
+// basic-string = quotation-mark *basic-char quotation-mark
+pub(crate) fn basic_string(input: Input<'_>) -> IResult<Input<'_>, Cow<'_, str>, ParserError<'_>> {
+ let (mut input, _) = one_of(QUOTATION_MARK).parse_next(input)?;
+
+ let mut c = Cow::Borrowed("");
+ if let Some((i, ci)) = ok_error(basic_chars.parse_next(input))? {
+ input = i;
+ c = ci;
+ }
+ while let Some((i, ci)) = ok_error(basic_chars.parse_next(input))? {
+ input = i;
+ c.to_mut().push_str(&ci);
+ }
+
+ let (input, _) = cut_err(one_of(QUOTATION_MARK))
+ .context(Context::Expression("basic string"))
+ .parse_next(input)?;
+
+ Ok((input, c))
+}
+
+// quotation-mark = %x22 ; "
+pub(crate) const QUOTATION_MARK: u8 = b'"';
+
+// basic-char = basic-unescaped / escaped
+fn basic_chars(input: Input<'_>) -> IResult<Input<'_>, Cow<'_, str>, ParserError<'_>> {
+ alt((
+ // Deviate from the official grammar by batching the unescaped chars so we build a string a
+ // chunk at a time, rather than a `char` at a time.
+ take_while(1.., BASIC_UNESCAPED)
+ .try_map(std::str::from_utf8)
+ .map(Cow::Borrowed),
+ escaped.map(|c| Cow::Owned(String::from(c))),
+ ))
+ .parse_next(input)
+}
+
+// basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
+pub(crate) const BASIC_UNESCAPED: (
+ (u8, u8),
+ u8,
+ RangeInclusive<u8>,
+ RangeInclusive<u8>,
+ RangeInclusive<u8>,
+) = (WSCHAR, 0x21, 0x23..=0x5B, 0x5D..=0x7E, NON_ASCII);
+
+// escaped = escape escape-seq-char
+fn escaped(input: Input<'_>) -> IResult<Input<'_>, char, ParserError<'_>> {
+ preceded(ESCAPE, escape_seq_char).parse_next(input)
+}
+
+// escape = %x5C ; \
+pub(crate) const ESCAPE: u8 = b'\\';
+
+// escape-seq-char = %x22 ; " quotation mark U+0022
+// escape-seq-char =/ %x5C ; \ reverse solidus U+005C
+// escape-seq-char =/ %x62 ; b backspace U+0008
+// escape-seq-char =/ %x66 ; f form feed U+000C
+// escape-seq-char =/ %x6E ; n line feed U+000A
+// escape-seq-char =/ %x72 ; r carriage return U+000D
+// escape-seq-char =/ %x74 ; t tab U+0009
+// escape-seq-char =/ %x75 4HEXDIG ; uXXXX U+XXXX
+// escape-seq-char =/ %x55 8HEXDIG ; UXXXXXXXX U+XXXXXXXX
+fn escape_seq_char(input: Input<'_>) -> IResult<Input<'_>, char, ParserError<'_>> {
+ dispatch! {any;
+ b'b' => success('\u{8}'),
+ b'f' => success('\u{c}'),
+ b'n' => success('\n'),
+ b'r' => success('\r'),
+ b't' => success('\t'),
+ b'u' => cut_err(hexescape::<4>).context(Context::Expression("unicode 4-digit hex code")),
+ b'U' => cut_err(hexescape::<8>).context(Context::Expression("unicode 8-digit hex code")),
+ b'\\' => success('\\'),
+ b'"' => success('"'),
+ _ => {
+ cut_err(fail::<_, char, _>)
+ .context(Context::Expression("escape sequence"))
+ .context(Context::Expected(ParserValue::CharLiteral('b')))
+ .context(Context::Expected(ParserValue::CharLiteral('f')))
+ .context(Context::Expected(ParserValue::CharLiteral('n')))
+ .context(Context::Expected(ParserValue::CharLiteral('r')))
+ .context(Context::Expected(ParserValue::CharLiteral('t')))
+ .context(Context::Expected(ParserValue::CharLiteral('u')))
+ .context(Context::Expected(ParserValue::CharLiteral('U')))
+ .context(Context::Expected(ParserValue::CharLiteral('\\')))
+ .context(Context::Expected(ParserValue::CharLiteral('"')))
+ }
+ }
+ .parse_next(input)
+}
+
+pub(crate) fn hexescape<const N: usize>(
+ input: Input<'_>,
+) -> IResult<Input<'_>, char, ParserError<'_>> {
+ take_while(0..=N, HEXDIG)
+ .verify(|b: &[u8]| b.len() == N)
+ .map(|b: &[u8]| unsafe { from_utf8_unchecked(b, "`is_ascii_digit` filters out on-ASCII") })
+ .verify_map(|s| u32::from_str_radix(s, 16).ok())
+ .try_map(|h| char::from_u32(h).ok_or(CustomError::OutOfRange))
+ .parse_next(input)
+}
+
+// ;; Multiline Basic String
+
+// ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body
+// ml-basic-string-delim
+fn ml_basic_string(input: Input<'_>) -> IResult<Input<'_>, Cow<'_, str>, ParserError<'_>> {
+ delimited(
+ ML_BASIC_STRING_DELIM,
+ preceded(opt(newline), cut_err(ml_basic_body)),
+ cut_err(ML_BASIC_STRING_DELIM),
+ )
+ .context(Context::Expression("multiline basic string"))
+ .parse_next(input)
+}
+
+// ml-basic-string-delim = 3quotation-mark
+pub(crate) const ML_BASIC_STRING_DELIM: &[u8] = b"\"\"\"";
+
+// ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ]
+fn ml_basic_body(mut input: Input<'_>) -> IResult<Input<'_>, Cow<'_, str>, ParserError<'_>> {
+ let mut c = Cow::Borrowed("");
+ if let Some((i, ci)) = ok_error(mlb_content.parse_next(input))? {
+ input = i;
+ c = ci;
+ }
+ while let Some((i, ci)) = ok_error(mlb_content.parse_next(input))? {
+ input = i;
+ c.to_mut().push_str(&ci);
+ }
+
+ while let Some((i, qi)) = ok_error(mlb_quotes(none_of(b'\"').value(())).parse_next(input))? {
+ if let Some((i, ci)) = ok_error(mlb_content.parse_next(i))? {
+ input = i;
+ c.to_mut().push_str(qi);
+ c.to_mut().push_str(&ci);
+ while let Some((i, ci)) = ok_error(mlb_content.parse_next(input))? {
+ input = i;
+ c.to_mut().push_str(&ci);
+ }
+ } else {
+ break;
+ }
+ }
+
+ if let Some((i, qi)) =
+ ok_error(mlb_quotes(tag(ML_BASIC_STRING_DELIM).value(())).parse_next(input))?
+ {
+ input = i;
+ c.to_mut().push_str(qi);
+ }
+
+ Ok((input, c))
+}
+
+// mlb-content = mlb-char / newline / mlb-escaped-nl
+// mlb-char = mlb-unescaped / escaped
+fn mlb_content(input: Input<'_>) -> IResult<Input<'_>, Cow<'_, str>, ParserError<'_>> {
+ alt((
+ // Deviate from the official grammar by batching the unescaped chars so we build a string a
+ // chunk at a time, rather than a `char` at a time.
+ take_while(1.., MLB_UNESCAPED)
+ .try_map(std::str::from_utf8)
+ .map(Cow::Borrowed),
+ // Order changed fromg grammar so `escaped` can more easily `cut_err` on bad escape sequences
+ mlb_escaped_nl.map(|_| Cow::Borrowed("")),
+ escaped.map(|c| Cow::Owned(String::from(c))),
+ newline.map(|_| Cow::Borrowed("\n")),
+ ))
+ .parse_next(input)
+}
+
+// mlb-quotes = 1*2quotation-mark
+fn mlb_quotes<'i>(
+ mut term: impl winnow::Parser<Input<'i>, (), ParserError<'i>>,
+) -> impl FnMut(Input<'i>) -> IResult<Input<'i>, &str, ParserError<'i>> {
+ move |input| {
+ let res = terminated(b"\"\"", peek(term.by_ref()))
+ .map(|b| unsafe { from_utf8_unchecked(b, "`bytes` out non-ASCII") })
+ .parse_next(input);
+
+ match res {
+ Err(winnow::error::ErrMode::Backtrack(_)) => terminated(b"\"", peek(term.by_ref()))
+ .map(|b| unsafe { from_utf8_unchecked(b, "`bytes` out non-ASCII") })
+ .parse_next(input),
+ res => res,
+ }
+ }
+}
+
+// mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
+pub(crate) const MLB_UNESCAPED: (
+ (u8, u8),
+ u8,
+ RangeInclusive<u8>,
+ RangeInclusive<u8>,
+ RangeInclusive<u8>,
+) = (WSCHAR, 0x21, 0x23..=0x5B, 0x5D..=0x7E, NON_ASCII);
+
+// mlb-escaped-nl = escape ws newline *( wschar / newline
+// When the last non-whitespace character on a line is a \,
+// it will be trimmed along with all whitespace
+// (including newlines) up to the next non-whitespace
+// character or closing delimiter.
+fn mlb_escaped_nl(input: Input<'_>) -> IResult<Input<'_>, (), ParserError<'_>> {
+ repeat(1.., (ESCAPE, ws, ws_newlines))
+ .map(|()| ())
+ .value(())
+ .parse_next(input)
+}
+
+// ;; Literal String
+
+// literal-string = apostrophe *literal-char apostrophe
+pub(crate) fn literal_string(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> {
+ delimited(
+ APOSTROPHE,
+ cut_err(take_while(0.., LITERAL_CHAR)),
+ cut_err(APOSTROPHE),
+ )
+ .try_map(std::str::from_utf8)
+ .context(Context::Expression("literal string"))
+ .parse_next(input)
+}
+
+// apostrophe = %x27 ; ' apostrophe
+pub(crate) const APOSTROPHE: u8 = b'\'';
+
+// literal-char = %x09 / %x20-26 / %x28-7E / non-ascii
+pub(crate) const LITERAL_CHAR: (
+ u8,
+ RangeInclusive<u8>,
+ RangeInclusive<u8>,
+ RangeInclusive<u8>,
+) = (0x9, 0x20..=0x26, 0x28..=0x7E, NON_ASCII);
+
+// ;; Multiline Literal String
+
+// ml-literal-string = ml-literal-string-delim [ newline ] ml-literal-body
+// ml-literal-string-delim
+fn ml_literal_string(input: Input<'_>) -> IResult<Input<'_>, Cow<'_, str>, ParserError<'_>> {
+ delimited(
+ (ML_LITERAL_STRING_DELIM, opt(newline)),
+ cut_err(ml_literal_body.map(|t| {
+ if t.contains("\r\n") {
+ Cow::Owned(t.replace("\r\n", "\n"))
+ } else {
+ Cow::Borrowed(t)
+ }
+ })),
+ cut_err(ML_LITERAL_STRING_DELIM),
+ )
+ .context(Context::Expression("multiline literal string"))
+ .parse_next(input)
+}
+
+// ml-literal-string-delim = 3apostrophe
+pub(crate) const ML_LITERAL_STRING_DELIM: &[u8] = b"'''";
+
+// ml-literal-body = *mll-content *( mll-quotes 1*mll-content ) [ mll-quotes ]
+fn ml_literal_body(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> {
+ (
+ repeat(0.., mll_content).map(|()| ()),
+ repeat(
+ 0..,
+ (
+ mll_quotes(none_of(APOSTROPHE).value(())),
+ repeat(1.., mll_content).map(|()| ()),
+ ),
+ )
+ .map(|()| ()),
+ opt(mll_quotes(tag(ML_LITERAL_STRING_DELIM).value(()))),
+ )
+ .recognize()
+ .try_map(std::str::from_utf8)
+ .parse_next(input)
+}
+
+// mll-content = mll-char / newline
+fn mll_content(input: Input<'_>) -> IResult<Input<'_>, u8, ParserError<'_>> {
+ alt((one_of(MLL_CHAR), newline)).parse_next(input)
+}
+
+// mll-char = %x09 / %x20-26 / %x28-7E / non-ascii
+const MLL_CHAR: (
+ u8,
+ RangeInclusive<u8>,
+ RangeInclusive<u8>,
+ RangeInclusive<u8>,
+) = (0x9, 0x20..=0x26, 0x28..=0x7E, NON_ASCII);
+
+// mll-quotes = 1*2apostrophe
+fn mll_quotes<'i>(
+ mut term: impl winnow::Parser<Input<'i>, (), ParserError<'i>>,
+) -> impl FnMut(Input<'i>) -> IResult<Input<'i>, &str, ParserError<'i>> {
+ move |input| {
+ let res = terminated(b"''", peek(term.by_ref()))
+ .map(|b| unsafe { from_utf8_unchecked(b, "`bytes` out non-ASCII") })
+ .parse_next(input);
+
+ match res {
+ Err(winnow::error::ErrMode::Backtrack(_)) => terminated(b"'", peek(term.by_ref()))
+ .map(|b| unsafe { from_utf8_unchecked(b, "`bytes` out non-ASCII") })
+ .parse_next(input),
+ res => res,
+ }
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ #[test]
+ fn basic_string() {
+ let input =
+ r#""I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF. \U0002070E""#;
+ let expected = "I\'m a string. \"You can quote me\". Name\tJosé\nLocation\tSF. \u{2070E}";
+ let parsed = string.parse(new_input(input));
+ assert_eq!(parsed.as_deref(), Ok(expected), "Parsing {input:?}");
+ }
+
+ #[test]
+ fn ml_basic_string() {
+ let cases = [
+ (
+ r#""""
+Roses are red
+Violets are blue""""#,
+ r#"Roses are red
+Violets are blue"#,
+ ),
+ (r#"""" \""" """"#, " \"\"\" "),
+ (r#"""" \\""""#, " \\"),
+ ];
+
+ for &(input, expected) in &cases {
+ let parsed = string.parse(new_input(input));
+ assert_eq!(parsed.as_deref(), Ok(expected), "Parsing {input:?}");
+ }
+
+ let invalid_cases = [r#"""" """#, r#"""" \""""#];
+
+ for input in &invalid_cases {
+ let parsed = string.parse(new_input(input));
+ assert!(parsed.is_err());
+ }
+ }
+
+ #[test]
+ fn ml_basic_string_escape_ws() {
+ let inputs = [
+ r#""""
+The quick brown \
+
+
+ fox jumps over \
+ the lazy dog.""""#,
+ r#""""\
+ The quick brown \
+ fox jumps over \
+ the lazy dog.\
+ """"#,
+ ];
+ for input in &inputs {
+ let expected = "The quick brown fox jumps over the lazy dog.";
+ let parsed = string.parse(new_input(input));
+ assert_eq!(parsed.as_deref(), Ok(expected), "Parsing {input:?}");
+ }
+ let empties = [
+ r#""""\
+ """"#,
+ r#""""
+\
+ \
+""""#,
+ ];
+ for input in &empties {
+ let expected = "";
+ let parsed = string.parse(new_input(input));
+ assert_eq!(parsed.as_deref(), Ok(expected), "Parsing {input:?}");
+ }
+ }
+
+ #[test]
+ fn literal_string() {
+ let inputs = [
+ r#"'C:\Users\nodejs\templates'"#,
+ r#"'\\ServerX\admin$\system32\'"#,
+ r#"'Tom "Dubs" Preston-Werner'"#,
+ r#"'<\i\c*\s*>'"#,
+ ];
+
+ for input in &inputs {
+ let expected = &input[1..input.len() - 1];
+ let parsed = string.parse(new_input(input));
+ assert_eq!(parsed.as_deref(), Ok(expected), "Parsing {input:?}");
+ }
+ }
+
+ #[test]
+ fn ml_literal_string() {
+ let inputs = [
+ r#"'''I [dw]on't need \d{2} apples'''"#,
+ r#"''''one_quote''''"#,
+ ];
+ for input in &inputs {
+ let expected = &input[3..input.len() - 3];
+ let parsed = string.parse(new_input(input));
+ assert_eq!(parsed.as_deref(), Ok(expected), "Parsing {input:?}");
+ }
+
+ let input = r#"'''
+The first newline is
+trimmed in raw strings.
+ All other whitespace
+ is preserved.
+'''"#;
+ let expected = &input[4..input.len() - 3];
+ let parsed = string.parse(new_input(input));
+ assert_eq!(parsed.as_deref(), Ok(expected), "Parsing {input:?}");
+ }
+}