1 files changed, 125 insertions, 0 deletions
diff --git a/third_party/rust/litrs/src/parse.rs b/third_party/rust/litrs/src/parse.rs
new file mode 100644
index 0000000000..efc6b870f6
--- /dev/null
+++ b/third_party/rust/litrs/src/parse.rs
@@ -0,0 +1,125 @@
+use crate::{
+    BoolLit,
+    Buffer,
+    ByteLit,
+    ByteStringLit,
+    CharLit,
+    ParseError,
+    FloatLit,
+    IntegerLit,
+    Literal,
+    StringLit,
+    err::{perr, ParseErrorKind::{*, self}},
+};
+
+
+pub fn parse<B: Buffer>(input: B) -> Result<Literal<B>, ParseError> {
+    let (first, rest) = input.as_bytes().split_first().ok_or(perr(None, Empty))?;
+    let second = input.as_bytes().get(1).copied();
+
+    match first {
+        b'f' if &*input == "false" => Ok(Literal::Bool(BoolLit::False)),
+        b't' if &*input == "true" => Ok(Literal::Bool(BoolLit::True)),
+
+        // A number literal (integer or float).
+        b'0'..=b'9' => {
+            // To figure out whether this is a float or integer, we do some
+            // quick inspection here. Yes, this is technically duplicate
+            // work with what is happening in the integer/float parse
+            // methods, but it makes the code way easier for now and won't
+            // be a huge performance loss.
+            //
+            // The first non-decimal char in a float literal must
+            // be '.', 'e' or 'E'.
+            match input.as_bytes().get(1 + end_dec_digits(rest)) {
+                Some(b'.') | Some(b'e') | Some(b'E')
+                    => FloatLit::parse(input).map(Literal::Float),
+
+                _ => IntegerLit::parse(input).map(Literal::Integer),
+            }
+        },
+
+        b'\'' => CharLit::parse(input).map(Literal::Char),
+        b'"' | b'r' => StringLit::parse(input).map(Literal::String),
+
+        b'b' if second == Some(b'\'') => ByteLit::parse(input).map(Literal::Byte),
+        b'b' if second == Some(b'r') || second == Some(b'"')
+            => ByteStringLit::parse(input).map(Literal::ByteString),
+
+        _ => Err(perr(None, InvalidLiteral)),
+    }
+}
+
+
+pub(crate) fn first_byte_or_empty(s: &str) -> Result<u8, ParseError> {
+    s.as_bytes().get(0).copied().ok_or(perr(None, Empty))
+}
+
+/// Returns the index of the first non-underscore, non-decimal digit in `input`,
+/// or the `input.len()` if all characters are decimal digits.
+pub(crate) fn end_dec_digits(input: &[u8]) -> usize {
+    input.iter()
+        .position(|b| !matches!(b, b'_' | b'0'..=b'9'))
+        .unwrap_or(input.len())
+}
+
+pub(crate) fn hex_digit_value(digit: u8) -> Option<u8> {
+    match digit {
+        b'0'..=b'9' => Some(digit - b'0'),
+        b'a'..=b'f' => Some(digit - b'a' + 10),
+        b'A'..=b'F' => Some(digit - b'A' + 10),
+        _ => None,
+    }
+}
+
+/// Makes sure that `s` is a valid literal suffix.
+pub(crate) fn check_suffix(s: &str) -> Result<(), ParseErrorKind> {
+    if s.is_empty() {
+        return Ok(());
+    }
+
+    let mut chars = s.chars();
+    let first = chars.next().unwrap();
+    let rest = chars.as_str();
+    if first == '_' && rest.is_empty() {
+        return Err(InvalidSuffix);
+    }
+
+    // This is just an extra check to improve the error message. If the first
+    // character of the "suffix" is already some invalid ASCII
+    // char, "unexpected character" seems like the more fitting error.
+    if first.is_ascii() && !(first.is_ascii_alphabetic() || first == '_') {
+        return Err(UnexpectedChar);
+    }
+
+    // Proper check is optional as it's not really necessary in proc macro
+    // context.
+    #[cfg(feature = "check_suffix")]
+    fn is_valid_suffix(first: char, rest: &str) -> bool {
+        use unicode_xid::UnicodeXID;
+
+        (first == '_' || first.is_xid_start())
+            && rest.chars().all(|c| c.is_xid_continue())
+    }
+
+    // When avoiding the dependency on `unicode_xid`, we just do a best effort
+    // to catch the most common errors.
+    #[cfg(not(feature = "check_suffix"))]
+    fn is_valid_suffix(first: char, rest: &str) -> bool {
+        if first.is_ascii() && !(first.is_ascii_alphabetic() || first == '_') {
+            return false;
+        }
+        for c in rest.chars() {
+            if c.is_ascii() && !(c.is_ascii_alphanumeric() || c == '_') {
+                return false;
+            }
+        }
+        true
+    }
+
+    if is_valid_suffix(first, rest) {
+        Ok(())
+    } else {
+        Err(InvalidSuffix)
+    }
+}