Merging upstream version 1.75.0+dfsg1.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-06-07 05:48:48 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-06-07 05:48:48 +0000
commit: ef24de24a82fe681581cc130f342363c47c0969a (patch)
tree: 0d494f7e1a38b95c92426f58fe6eaa877303a86c /vendor/toml_edit-0.19.11/src/parser/strings.rs
parent: Releasing progress-linux version 1.74.1+dfsg1-1~progress7.99u1. (diff)
download: rustc-ef24de24a82fe681581cc130f342363c47c0969a.tar.xz
rustc-ef24de24a82fe681581cc130f342363c47c0969a.zip
1 files changed, 464 insertions, 0 deletions
diff --git a/vendor/toml_edit-0.19.11/src/parser/strings.rs b/vendor/toml_edit-0.19.11/src/parser/strings.rs
new file mode 100644
index 000000000..2ce160506
--- /dev/null
+++ b/vendor/toml_edit-0.19.11/src/parser/strings.rs
@@ -0,0 +1,464 @@
+use std::borrow::Cow;
+use std::char;
+use std::ops::RangeInclusive;
+
+use winnow::combinator::alt;
+use winnow::combinator::cut_err;
+use winnow::combinator::delimited;
+use winnow::combinator::fail;
+use winnow::combinator::opt;
+use winnow::combinator::peek;
+use winnow::combinator::preceded;
+use winnow::combinator::repeat;
+use winnow::combinator::success;
+use winnow::combinator::terminated;
+use winnow::prelude::*;
+use winnow::token::any;
+use winnow::token::none_of;
+use winnow::token::one_of;
+use winnow::token::tag;
+use winnow::token::take_while;
+
+use crate::parser::errors::CustomError;
+use crate::parser::numbers::HEXDIG;
+use crate::parser::prelude::*;
+use crate::parser::trivia::{from_utf8_unchecked, newline, ws, ws_newlines, NON_ASCII, WSCHAR};
+
+// ;; String
+
+// string = ml-basic-string / basic-string / ml-literal-string / literal-string
+pub(crate) fn string(input: Input<'_>) -> IResult<Input<'_>, Cow<'_, str>, ParserError<'_>> {
+    alt((
+        ml_basic_string,
+        basic_string,
+        ml_literal_string,
+        literal_string.map(Cow::Borrowed),
+    ))
+    .parse_next(input)
+}
+
+// ;; Basic String
+
+// basic-string = quotation-mark *basic-char quotation-mark
+pub(crate) fn basic_string(input: Input<'_>) -> IResult<Input<'_>, Cow<'_, str>, ParserError<'_>> {
+    let (mut input, _) = one_of(QUOTATION_MARK).parse_next(input)?;
+
+    let mut c = Cow::Borrowed("");
+    if let Some((i, ci)) = ok_error(basic_chars.parse_next(input))? {
+        input = i;
+        c = ci;
+    }
+    while let Some((i, ci)) = ok_error(basic_chars.parse_next(input))? {
+        input = i;
+        c.to_mut().push_str(&ci);
+    }
+
+    let (input, _) = cut_err(one_of(QUOTATION_MARK))
+        .context(Context::Expression("basic string"))
+        .parse_next(input)?;
+
+    Ok((input, c))
+}
+
+// quotation-mark = %x22            ; "
+pub(crate) const QUOTATION_MARK: u8 = b'"';
+
+// basic-char = basic-unescaped / escaped
+fn basic_chars(input: Input<'_>) -> IResult<Input<'_>, Cow<'_, str>, ParserError<'_>> {
+    alt((
+        // Deviate from the official grammar by batching the unescaped chars so we build a string a
+        // chunk at a time, rather than a `char` at a time.
+        take_while(1.., BASIC_UNESCAPED)
+            .try_map(std::str::from_utf8)
+            .map(Cow::Borrowed),
+        escaped.map(|c| Cow::Owned(String::from(c))),
+    ))
+    .parse_next(input)
+}
+
+// basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
+pub(crate) const BASIC_UNESCAPED: (
+    (u8, u8),
+    u8,
+    RangeInclusive<u8>,
+    RangeInclusive<u8>,
+    RangeInclusive<u8>,
+) = (WSCHAR, 0x21, 0x23..=0x5B, 0x5D..=0x7E, NON_ASCII);
+
+// escaped = escape escape-seq-char
+fn escaped(input: Input<'_>) -> IResult<Input<'_>, char, ParserError<'_>> {
+    preceded(ESCAPE, escape_seq_char).parse_next(input)
+}
+
+// escape = %x5C                    ; \
+pub(crate) const ESCAPE: u8 = b'\\';
+
+// escape-seq-char =  %x22         ; "    quotation mark  U+0022
+// escape-seq-char =/ %x5C         ; \    reverse solidus U+005C
+// escape-seq-char =/ %x62         ; b    backspace       U+0008
+// escape-seq-char =/ %x66         ; f    form feed       U+000C
+// escape-seq-char =/ %x6E         ; n    line feed       U+000A
+// escape-seq-char =/ %x72         ; r    carriage return U+000D
+// escape-seq-char =/ %x74         ; t    tab             U+0009
+// escape-seq-char =/ %x75 4HEXDIG ; uXXXX                U+XXXX
+// escape-seq-char =/ %x55 8HEXDIG ; UXXXXXXXX            U+XXXXXXXX
+fn escape_seq_char(input: Input<'_>) -> IResult<Input<'_>, char, ParserError<'_>> {
+    dispatch! {any;
+        b'b' => success('\u{8}'),
+        b'f' => success('\u{c}'),
+        b'n' => success('\n'),
+        b'r' => success('\r'),
+        b't' => success('\t'),
+        b'u' => cut_err(hexescape::<4>).context(Context::Expression("unicode 4-digit hex code")),
+        b'U' => cut_err(hexescape::<8>).context(Context::Expression("unicode 8-digit hex code")),
+        b'\\' => success('\\'),
+        b'"' => success('"'),
+        _ => {
+            cut_err(fail::<_, char, _>)
+            .context(Context::Expression("escape sequence"))
+            .context(Context::Expected(ParserValue::CharLiteral('b')))
+            .context(Context::Expected(ParserValue::CharLiteral('f')))
+            .context(Context::Expected(ParserValue::CharLiteral('n')))
+            .context(Context::Expected(ParserValue::CharLiteral('r')))
+            .context(Context::Expected(ParserValue::CharLiteral('t')))
+            .context(Context::Expected(ParserValue::CharLiteral('u')))
+            .context(Context::Expected(ParserValue::CharLiteral('U')))
+            .context(Context::Expected(ParserValue::CharLiteral('\\')))
+            .context(Context::Expected(ParserValue::CharLiteral('"')))
+        }
+    }
+    .parse_next(input)
+}
+
+pub(crate) fn hexescape<const N: usize>(
+    input: Input<'_>,
+) -> IResult<Input<'_>, char, ParserError<'_>> {
+    take_while(0..=N, HEXDIG)
+        .verify(|b: &[u8]| b.len() == N)
+        .map(|b: &[u8]| unsafe { from_utf8_unchecked(b, "`is_ascii_digit` filters out on-ASCII") })
+        .verify_map(|s| u32::from_str_radix(s, 16).ok())
+        .try_map(|h| char::from_u32(h).ok_or(CustomError::OutOfRange))
+        .parse_next(input)
+}
+
+// ;; Multiline Basic String
+
+// ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body
+//                   ml-basic-string-delim
+fn ml_basic_string(input: Input<'_>) -> IResult<Input<'_>, Cow<'_, str>, ParserError<'_>> {
+    delimited(
+        ML_BASIC_STRING_DELIM,
+        preceded(opt(newline), cut_err(ml_basic_body)),
+        cut_err(ML_BASIC_STRING_DELIM),
+    )
+    .context(Context::Expression("multiline basic string"))
+    .parse_next(input)
+}
+
+// ml-basic-string-delim = 3quotation-mark
+pub(crate) const ML_BASIC_STRING_DELIM: &[u8] = b"\"\"\"";
+
+// ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ]
+fn ml_basic_body(mut input: Input<'_>) -> IResult<Input<'_>, Cow<'_, str>, ParserError<'_>> {
+    let mut c = Cow::Borrowed("");
+    if let Some((i, ci)) = ok_error(mlb_content.parse_next(input))? {
+        input = i;
+        c = ci;
+    }
+    while let Some((i, ci)) = ok_error(mlb_content.parse_next(input))? {
+        input = i;
+        c.to_mut().push_str(&ci);
+    }
+
+    while let Some((i, qi)) = ok_error(mlb_quotes(none_of(b'\"').value(())).parse_next(input))? {
+        if let Some((i, ci)) = ok_error(mlb_content.parse_next(i))? {
+            input = i;
+            c.to_mut().push_str(qi);
+            c.to_mut().push_str(&ci);
+            while let Some((i, ci)) = ok_error(mlb_content.parse_next(input))? {
+                input = i;
+                c.to_mut().push_str(&ci);
+            }
+        } else {
+            break;
+        }
+    }
+
+    if let Some((i, qi)) =
+        ok_error(mlb_quotes(tag(ML_BASIC_STRING_DELIM).value(())).parse_next(input))?
+    {
+        input = i;
+        c.to_mut().push_str(qi);
+    }
+
+    Ok((input, c))
+}
+
+// mlb-content = mlb-char / newline / mlb-escaped-nl
+// mlb-char = mlb-unescaped / escaped
+fn mlb_content(input: Input<'_>) -> IResult<Input<'_>, Cow<'_, str>, ParserError<'_>> {
+    alt((
+        // Deviate from the official grammar by batching the unescaped chars so we build a string a
+        // chunk at a time, rather than a `char` at a time.
+        take_while(1.., MLB_UNESCAPED)
+            .try_map(std::str::from_utf8)
+            .map(Cow::Borrowed),
+        // Order changed fromg grammar so `escaped` can more easily `cut_err` on bad escape sequences
+        mlb_escaped_nl.map(|_| Cow::Borrowed("")),
+        escaped.map(|c| Cow::Owned(String::from(c))),
+        newline.map(|_| Cow::Borrowed("\n")),
+    ))
+    .parse_next(input)
+}
+
+// mlb-quotes = 1*2quotation-mark
+fn mlb_quotes<'i>(
+    mut term: impl winnow::Parser<Input<'i>, (), ParserError<'i>>,
+) -> impl FnMut(Input<'i>) -> IResult<Input<'i>, &str, ParserError<'i>> {
+    move |input| {
+        let res = terminated(b"\"\"", peek(term.by_ref()))
+            .map(|b| unsafe { from_utf8_unchecked(b, "`bytes` out non-ASCII") })
+            .parse_next(input);
+
+        match res {
+            Err(winnow::error::ErrMode::Backtrack(_)) => terminated(b"\"", peek(term.by_ref()))
+                .map(|b| unsafe { from_utf8_unchecked(b, "`bytes` out non-ASCII") })
+                .parse_next(input),
+            res => res,
+        }
+    }
+}
+
+// mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
+pub(crate) const MLB_UNESCAPED: (
+    (u8, u8),
+    u8,
+    RangeInclusive<u8>,
+    RangeInclusive<u8>,
+    RangeInclusive<u8>,
+) = (WSCHAR, 0x21, 0x23..=0x5B, 0x5D..=0x7E, NON_ASCII);
+
+// mlb-escaped-nl = escape ws newline *( wschar / newline
+// When the last non-whitespace character on a line is a \,
+// it will be trimmed along with all whitespace
+// (including newlines) up to the next non-whitespace
+// character or closing delimiter.
+fn mlb_escaped_nl(input: Input<'_>) -> IResult<Input<'_>, (), ParserError<'_>> {
+    repeat(1.., (ESCAPE, ws, ws_newlines))
+        .map(|()| ())
+        .value(())
+        .parse_next(input)
+}
+
+// ;; Literal String
+
+// literal-string = apostrophe *literal-char apostrophe
+pub(crate) fn literal_string(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> {
+    delimited(
+        APOSTROPHE,
+        cut_err(take_while(0.., LITERAL_CHAR)),
+        cut_err(APOSTROPHE),
+    )
+    .try_map(std::str::from_utf8)
+    .context(Context::Expression("literal string"))
+    .parse_next(input)
+}
+
+// apostrophe = %x27 ; ' apostrophe
+pub(crate) const APOSTROPHE: u8 = b'\'';
+
+// literal-char = %x09 / %x20-26 / %x28-7E / non-ascii
+pub(crate) const LITERAL_CHAR: (
+    u8,
+    RangeInclusive<u8>,
+    RangeInclusive<u8>,
+    RangeInclusive<u8>,
+) = (0x9, 0x20..=0x26, 0x28..=0x7E, NON_ASCII);
+
+// ;; Multiline Literal String
+
+// ml-literal-string = ml-literal-string-delim [ newline ] ml-literal-body
+//                     ml-literal-string-delim
+fn ml_literal_string(input: Input<'_>) -> IResult<Input<'_>, Cow<'_, str>, ParserError<'_>> {
+    delimited(
+        (ML_LITERAL_STRING_DELIM, opt(newline)),
+        cut_err(ml_literal_body.map(|t| {
+            if t.contains("\r\n") {
+                Cow::Owned(t.replace("\r\n", "\n"))
+            } else {
+                Cow::Borrowed(t)
+            }
+        })),
+        cut_err(ML_LITERAL_STRING_DELIM),
+    )
+    .context(Context::Expression("multiline literal string"))
+    .parse_next(input)
+}
+
+// ml-literal-string-delim = 3apostrophe
+pub(crate) const ML_LITERAL_STRING_DELIM: &[u8] = b"'''";
+
+// ml-literal-body = *mll-content *( mll-quotes 1*mll-content ) [ mll-quotes ]
+fn ml_literal_body(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> {
+    (
+        repeat(0.., mll_content).map(|()| ()),
+        repeat(
+            0..,
+            (
+                mll_quotes(none_of(APOSTROPHE).value(())),
+                repeat(1.., mll_content).map(|()| ()),
+            ),
+        )
+        .map(|()| ()),
+        opt(mll_quotes(tag(ML_LITERAL_STRING_DELIM).value(()))),
+    )
+        .recognize()
+        .try_map(std::str::from_utf8)
+        .parse_next(input)
+}
+
+// mll-content = mll-char / newline
+fn mll_content(input: Input<'_>) -> IResult<Input<'_>, u8, ParserError<'_>> {
+    alt((one_of(MLL_CHAR), newline)).parse_next(input)
+}
+
+// mll-char = %x09 / %x20-26 / %x28-7E / non-ascii
+const MLL_CHAR: (
+    u8,
+    RangeInclusive<u8>,
+    RangeInclusive<u8>,
+    RangeInclusive<u8>,
+) = (0x9, 0x20..=0x26, 0x28..=0x7E, NON_ASCII);
+
+// mll-quotes = 1*2apostrophe
+fn mll_quotes<'i>(
+    mut term: impl winnow::Parser<Input<'i>, (), ParserError<'i>>,
+) -> impl FnMut(Input<'i>) -> IResult<Input<'i>, &str, ParserError<'i>> {
+    move |input| {
+        let res = terminated(b"''", peek(term.by_ref()))
+            .map(|b| unsafe { from_utf8_unchecked(b, "`bytes` out non-ASCII") })
+            .parse_next(input);
+
+        match res {
+            Err(winnow::error::ErrMode::Backtrack(_)) => terminated(b"'", peek(term.by_ref()))
+                .map(|b| unsafe { from_utf8_unchecked(b, "`bytes` out non-ASCII") })
+                .parse_next(input),
+            res => res,
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn basic_string() {
+        let input =
+            r#""I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF. \U0002070E""#;
+        let expected = "I\'m a string. \"You can quote me\". Name\tJosé\nLocation\tSF. \u{2070E}";
+        let parsed = string.parse(new_input(input));
+        assert_eq!(parsed.as_deref(), Ok(expected), "Parsing {input:?}");
+    }
+
+    #[test]
+    fn ml_basic_string() {
+        let cases = [
+            (
+                r#""""
+Roses are red
+Violets are blue""""#,
+                r#"Roses are red
+Violets are blue"#,
+            ),
+            (r#"""" \""" """"#, " \"\"\" "),
+            (r#"""" \\""""#, " \\"),
+        ];
+
+        for &(input, expected) in &cases {
+            let parsed = string.parse(new_input(input));
+            assert_eq!(parsed.as_deref(), Ok(expected), "Parsing {input:?}");
+        }
+
+        let invalid_cases = [r#""""  """#, r#""""  \""""#];
+
+        for input in &invalid_cases {
+            let parsed = string.parse(new_input(input));
+            assert!(parsed.is_err());
+        }
+    }
+
+    #[test]
+    fn ml_basic_string_escape_ws() {
+        let inputs = [
+            r#""""
+The quick brown \
+
+
+  fox jumps over \
+    the lazy dog.""""#,
+            r#""""\
+       The quick brown \
+       fox jumps over \
+       the lazy dog.\
+       """"#,
+        ];
+        for input in &inputs {
+            let expected = "The quick brown fox jumps over the lazy dog.";
+            let parsed = string.parse(new_input(input));
+            assert_eq!(parsed.as_deref(), Ok(expected), "Parsing {input:?}");
+        }
+        let empties = [
+            r#""""\
+       """"#,
+            r#""""
+\
+  \
+""""#,
+        ];
+        for input in &empties {
+            let expected = "";
+            let parsed = string.parse(new_input(input));
+            assert_eq!(parsed.as_deref(), Ok(expected), "Parsing {input:?}");
+        }
+    }
+
+    #[test]
+    fn literal_string() {
+        let inputs = [
+            r#"'C:\Users\nodejs\templates'"#,
+            r#"'\\ServerX\admin$\system32\'"#,
+            r#"'Tom "Dubs" Preston-Werner'"#,
+            r#"'<\i\c*\s*>'"#,
+        ];
+
+        for input in &inputs {
+            let expected = &input[1..input.len() - 1];
+            let parsed = string.parse(new_input(input));
+            assert_eq!(parsed.as_deref(), Ok(expected), "Parsing {input:?}");
+        }
+    }
+
+    #[test]
+    fn ml_literal_string() {
+        let inputs = [
+            r#"'''I [dw]on't need \d{2} apples'''"#,
+            r#"''''one_quote''''"#,
+        ];
+        for input in &inputs {
+            let expected = &input[3..input.len() - 3];
+            let parsed = string.parse(new_input(input));
+            assert_eq!(parsed.as_deref(), Ok(expected), "Parsing {input:?}");
+        }
+
+        let input = r#"'''
+The first newline is
+trimmed in raw strings.
+   All other whitespace
+   is preserved.
+'''"#;
+        let expected = &input[4..input.len() - 3];
+        let parsed = string.parse(new_input(input));
+        assert_eq!(parsed.as_deref(), Ok(expected), "Parsing {input:?}");
+    }
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-06-07 05:48:48 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-06-07 05:48:48 +0000
commit	ef24de24a82fe681581cc130f342363c47c0969a (patch)
tree	0d494f7e1a38b95c92426f58fe6eaa877303a86c /vendor/toml_edit-0.19.11/src/parser/strings.rs
parent	Releasing progress-linux version 1.74.1+dfsg1-1~progress7.99u1. (diff)
download	rustc-ef24de24a82fe681581cc130f342363c47c0969a.tar.xz rustc-ef24de24a82fe681581cc130f342363c47c0969a.zip