2 files changed, 295 insertions, 0 deletions
diff --git a/third_party/rust/litrs/src/byte/mod.rs b/third_party/rust/litrs/src/byte/mod.rs
new file mode 100644
index 0000000000..ffdff5d04a
--- /dev/null
+++ b/third_party/rust/litrs/src/byte/mod.rs
@@ -0,0 +1,107 @@
+use core::fmt;
+
+use crate::{
+    Buffer, ParseError,
+    err::{perr, ParseErrorKind::*},
+    escape::unescape,
+    parse::check_suffix,
+};
+
+
+/// A (single) byte literal, e.g. `b'k'` or `b'!'`.
+///
+/// See [the reference][ref] for more information.
+///
+/// [ref]: https://doc.rust-lang.org/reference/tokens.html#byte-literals
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub struct ByteLit<B: Buffer> {
+    raw: B,
+    /// Start index of the suffix or `raw.len()` if there is no suffix.
+    start_suffix: usize,
+    value: u8,
+}
+
+impl<B: Buffer> ByteLit<B> {
+    /// Parses the input as a byte literal. Returns an error if the input is
+    /// invalid or represents a different kind of literal.
+    pub fn parse(input: B) -> Result<Self, ParseError> {
+        if input.is_empty() {
+            return Err(perr(None, Empty));
+        }
+        if !input.starts_with("b'") {
+            return Err(perr(None, InvalidByteLiteralStart));
+        }
+
+        let (value, start_suffix) = parse_impl(&input)?;
+        Ok(Self { raw: input, value, start_suffix })
+    }
+
+    /// Returns the byte value that this literal represents.
+    pub fn value(&self) -> u8 {
+        self.value
+    }
+
+    /// The optional suffix. Returns `""` if the suffix is empty/does not exist.
+    pub fn suffix(&self) -> &str {
+        &(*self.raw)[self.start_suffix..]
+    }
+
+    /// Returns the raw input that was passed to `parse`.
+    pub fn raw_input(&self) -> &str {
+        &self.raw
+    }
+
+    /// Returns the raw input that was passed to `parse`, potentially owned.
+    pub fn into_raw_input(self) -> B {
+        self.raw
+    }
+
+}
+
+impl ByteLit<&str> {
+    /// Makes a copy of the underlying buffer and returns the owned version of
+    /// `Self`.
+    pub fn to_owned(&self) -> ByteLit<String> {
+        ByteLit {
+            raw: self.raw.to_owned(),
+            start_suffix: self.start_suffix,
+            value: self.value,
+        }
+    }
+}
+
+impl<B: Buffer> fmt::Display for ByteLit<B> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.pad(&self.raw)
+    }
+}
+
+/// Precondition: must start with `b'`.
+#[inline(never)]
+pub(crate) fn parse_impl(input: &str) -> Result<(u8, usize), ParseError> {
+    let input_bytes = input.as_bytes();
+    let first = input_bytes.get(2).ok_or(perr(None, UnterminatedByteLiteral))?;
+    let (c, len) = match first {
+        b'\'' if input_bytes.get(3) == Some(&b'\'') => return Err(perr(2, UnescapedSingleQuote)),
+        b'\'' => return Err(perr(None, EmptyByteLiteral)),
+        b'\n' | b'\t' | b'\r' => return Err(perr(2, UnescapedSpecialWhitespace)),
+        b'\\' => unescape::<u8>(&input[2..], 2)?,
+        other if other.is_ascii() => (*other, 1),
+        _ => return Err(perr(2, NonAsciiInByteLiteral)),
+    };
+
+    match input[2 + len..].find('\'') {
+        Some(0) => {}
+        Some(_) => return Err(perr(None, OverlongByteLiteral)),
+        None => return Err(perr(None, UnterminatedByteLiteral)),
+    }
+
+    let start_suffix = 2 + len + 1;
+    let suffix = &input[start_suffix..];
+    check_suffix(suffix).map_err(|kind| perr(start_suffix, kind))?;
+
+    Ok((c, start_suffix))
+}
+
+#[cfg(test)]
+mod tests;
diff --git a/third_party/rust/litrs/src/byte/tests.rs b/third_party/rust/litrs/src/byte/tests.rs
new file mode 100644
index 0000000000..3cf16b5fc2
--- /dev/null
+++ b/third_party/rust/litrs/src/byte/tests.rs
@@ -0,0 +1,188 @@
+use crate::{ByteLit, Literal, test_util::{assert_parse_ok_eq, assert_roundtrip}};
+
+// ===== Utility functions =======================================================================
+
+macro_rules! check {
+    ($lit:literal) => { check!($lit, stringify!($lit), "") };
+    ($lit:literal, $input:expr, $suffix:literal) => {
+        let input = $input;
+        let expected = ByteLit {
+            raw: input,
+            start_suffix: input.len() - $suffix.len(),
+            value: $lit,
+        };
+
+        assert_parse_ok_eq(input, ByteLit::parse(input), expected.clone(), "ByteLit::parse");
+        assert_parse_ok_eq(input, Literal::parse(input), Literal::Byte(expected), "Literal::parse");
+        let lit = ByteLit::parse(input).unwrap();
+        assert_eq!(lit.value(), $lit);
+        assert_eq!(lit.suffix(), $suffix);
+        assert_roundtrip(expected.to_owned(), input);
+    };
+}
+
+
+// ===== Actual tests ============================================================================
+
+#[test]
+fn alphanumeric() {
+    check!(b'a');
+    check!(b'b');
+    check!(b'y');
+    check!(b'z');
+    check!(b'A');
+    check!(b'B');
+    check!(b'Y');
+    check!(b'Z');
+
+    check!(b'0');
+    check!(b'1');
+    check!(b'8');
+    check!(b'9');
+}
+
+#[test]
+fn special_chars() {
+    check!(b' ');
+    check!(b'!');
+    check!(b'"');
+    check!(b'#');
+    check!(b'$');
+    check!(b'%');
+    check!(b'&');
+    check!(b'(');
+    check!(b')');
+    check!(b'*');
+    check!(b'+');
+    check!(b',');
+    check!(b'-');
+    check!(b'.');
+    check!(b'/');
+    check!(b':');
+    check!(b';');
+    check!(b'<');
+    check!(b'=');
+    check!(b'>');
+    check!(b'?');
+    check!(b'@');
+    check!(b'[');
+    check!(b']');
+    check!(b'^');
+    check!(b'_');
+    check!(b'`');
+    check!(b'{');
+    check!(b'|');
+    check!(b'}');
+    check!(b'~');
+}
+
+#[test]
+fn quote_escapes() {
+    check!(b'\'');
+    check!(b'\"');
+}
+
+#[test]
+fn ascii_escapes() {
+    check!(b'\n');
+    check!(b'\r');
+    check!(b'\t');
+    check!(b'\\');
+    check!(b'\0');
+
+    check!(b'\x00');
+    check!(b'\x01');
+    check!(b'\x0c');
+    check!(b'\x0D');
+    check!(b'\x13');
+    check!(b'\x30');
+    check!(b'\x30');
+    check!(b'\x4B');
+    check!(b'\x6b');
+    check!(b'\x7F');
+    check!(b'\x7f');
+}
+
+#[test]
+fn byte_escapes() {
+    check!(b'\x80');
+    check!(b'\x8a');
+    check!(b'\x8C');
+    check!(b'\x99');
+    check!(b'\xa0');
+    check!(b'\xAd');
+    check!(b'\xfe');
+    check!(b'\xFe');
+    check!(b'\xfF');
+    check!(b'\xFF');
+}
+
+#[test]
+fn suffixes() {
+    check!(b'a', r##"b'a'peter"##, "peter");
+    check!(b'#', r##"b'#'peter"##, "peter");
+    check!(b'\n', r##"b'\n'peter"##, "peter");
+    check!(b'\'', r##"b'\''peter"##, "peter");
+    check!(b'\"', r##"b'\"'peter"##, "peter");
+    check!(b'\xFF', r##"b'\xFF'peter"##, "peter");
+}
+
+#[test]
+fn invald_escapes() {
+    assert_err!(ByteLit, r"b'\a'", UnknownEscape, 2..4);
+    assert_err!(ByteLit, r"b'\y'", UnknownEscape, 2..4);
+    assert_err!(ByteLit, r"b'\", UnterminatedEscape, 2..3);
+    assert_err!(ByteLit, r"b'\x'", UnterminatedEscape, 2..5);
+    assert_err!(ByteLit, r"b'\x1'", InvalidXEscape, 2..6);
+    assert_err!(ByteLit, r"b'\xaj'", InvalidXEscape, 2..6);
+    assert_err!(ByteLit, r"b'\xjb'", InvalidXEscape, 2..6);
+}
+
+#[test]
+fn unicode_escape_not_allowed() {
+    assert_err!(ByteLit, r"b'\u{0}'", UnicodeEscapeInByteLiteral, 2..4);
+    assert_err!(ByteLit, r"b'\u{00}'", UnicodeEscapeInByteLiteral, 2..4);
+    assert_err!(ByteLit, r"b'\u{b}'", UnicodeEscapeInByteLiteral, 2..4);
+    assert_err!(ByteLit, r"b'\u{B}'", UnicodeEscapeInByteLiteral, 2..4);
+    assert_err!(ByteLit, r"b'\u{7e}'", UnicodeEscapeInByteLiteral, 2..4);
+    assert_err!(ByteLit, r"b'\u{E4}'", UnicodeEscapeInByteLiteral, 2..4);
+    assert_err!(ByteLit, r"b'\u{e4}'", UnicodeEscapeInByteLiteral, 2..4);
+    assert_err!(ByteLit, r"b'\u{fc}'", UnicodeEscapeInByteLiteral, 2..4);
+    assert_err!(ByteLit, r"b'\u{Fc}'", UnicodeEscapeInByteLiteral, 2..4);
+    assert_err!(ByteLit, r"b'\u{fC}'", UnicodeEscapeInByteLiteral, 2..4);
+    assert_err!(ByteLit, r"b'\u{FC}'", UnicodeEscapeInByteLiteral, 2..4);
+    assert_err!(ByteLit, r"b'\u{b10}'", UnicodeEscapeInByteLiteral, 2..4);
+    assert_err!(ByteLit, r"b'\u{B10}'", UnicodeEscapeInByteLiteral, 2..4);
+    assert_err!(ByteLit, r"b'\u{0b10}'", UnicodeEscapeInByteLiteral, 2..4);
+    assert_err!(ByteLit, r"b'\u{2764}'", UnicodeEscapeInByteLiteral, 2..4);
+    assert_err!(ByteLit, r"b'\u{1f602}'", UnicodeEscapeInByteLiteral, 2..4);
+    assert_err!(ByteLit, r"b'\u{1F602}'", UnicodeEscapeInByteLiteral, 2..4);
+}
+
+#[test]
+fn parse_err() {
+    assert_err!(ByteLit, r"b''", EmptyByteLiteral, None);
+    assert_err!(ByteLit, r"b' ''", UnexpectedChar, 4..5);
+
+    assert_err!(ByteLit, r"b'", UnterminatedByteLiteral, None);
+    assert_err!(ByteLit, r"b'a", UnterminatedByteLiteral, None);
+    assert_err!(ByteLit, r"b'\n", UnterminatedByteLiteral, None);
+    assert_err!(ByteLit, r"b'\x35", UnterminatedByteLiteral, None);
+
+    assert_err!(ByteLit, r"b'ab'", OverlongByteLiteral, None);
+    assert_err!(ByteLit, r"b'a _'", OverlongByteLiteral, None);
+    assert_err!(ByteLit, r"b'\n3'", OverlongByteLiteral, None);
+
+    assert_err!(ByteLit, r"", Empty, None);
+
+    assert_err!(ByteLit, r"b'''", UnescapedSingleQuote, 2);
+    assert_err!(ByteLit, r"b''''", UnescapedSingleQuote, 2);
+
+    assert_err!(ByteLit, "b'\n'", UnescapedSpecialWhitespace, 2);
+    assert_err!(ByteLit, "b'\t'", UnescapedSpecialWhitespace, 2);
+    assert_err!(ByteLit, "b'\r'", UnescapedSpecialWhitespace, 2);
+
+    assert_err!(ByteLit, "b'న'", NonAsciiInByteLiteral, 2);
+    assert_err!(ByteLit, "b'犬'", NonAsciiInByteLiteral, 2);
+    assert_err!(ByteLit, "b'🦊'", NonAsciiInByteLiteral, 2);
+}