diff options
Diffstat (limited to 'third_party/rust/litrs/src/parse.rs')
-rw-r--r-- | third_party/rust/litrs/src/parse.rs | 125 |
1 files changed, 125 insertions, 0 deletions
diff --git a/third_party/rust/litrs/src/parse.rs b/third_party/rust/litrs/src/parse.rs new file mode 100644 index 0000000000..efc6b870f6 --- /dev/null +++ b/third_party/rust/litrs/src/parse.rs @@ -0,0 +1,125 @@ +use crate::{ + BoolLit, + Buffer, + ByteLit, + ByteStringLit, + CharLit, + ParseError, + FloatLit, + IntegerLit, + Literal, + StringLit, + err::{perr, ParseErrorKind::{*, self}}, +}; + + +pub fn parse<B: Buffer>(input: B) -> Result<Literal<B>, ParseError> { + let (first, rest) = input.as_bytes().split_first().ok_or(perr(None, Empty))?; + let second = input.as_bytes().get(1).copied(); + + match first { + b'f' if &*input == "false" => Ok(Literal::Bool(BoolLit::False)), + b't' if &*input == "true" => Ok(Literal::Bool(BoolLit::True)), + + // A number literal (integer or float). + b'0'..=b'9' => { + // To figure out whether this is a float or integer, we do some + // quick inspection here. Yes, this is technically duplicate + // work with what is happening in the integer/float parse + // methods, but it makes the code way easier for now and won't + // be a huge performance loss. + // + // The first non-decimal char in a float literal must + // be '.', 'e' or 'E'. + match input.as_bytes().get(1 + end_dec_digits(rest)) { + Some(b'.') | Some(b'e') | Some(b'E') + => FloatLit::parse(input).map(Literal::Float), + + _ => IntegerLit::parse(input).map(Literal::Integer), + } + }, + + b'\'' => CharLit::parse(input).map(Literal::Char), + b'"' | b'r' => StringLit::parse(input).map(Literal::String), + + b'b' if second == Some(b'\'') => ByteLit::parse(input).map(Literal::Byte), + b'b' if second == Some(b'r') || second == Some(b'"') + => ByteStringLit::parse(input).map(Literal::ByteString), + + _ => Err(perr(None, InvalidLiteral)), + } +} + + +pub(crate) fn first_byte_or_empty(s: &str) -> Result<u8, ParseError> { + s.as_bytes().get(0).copied().ok_or(perr(None, Empty)) +} + +/// Returns the index of the first non-underscore, non-decimal digit in `input`, +/// or the `input.len()` if all characters are decimal digits. +pub(crate) fn end_dec_digits(input: &[u8]) -> usize { + input.iter() + .position(|b| !matches!(b, b'_' | b'0'..=b'9')) + .unwrap_or(input.len()) +} + +pub(crate) fn hex_digit_value(digit: u8) -> Option<u8> { + match digit { + b'0'..=b'9' => Some(digit - b'0'), + b'a'..=b'f' => Some(digit - b'a' + 10), + b'A'..=b'F' => Some(digit - b'A' + 10), + _ => None, + } +} + +/// Makes sure that `s` is a valid literal suffix. +pub(crate) fn check_suffix(s: &str) -> Result<(), ParseErrorKind> { + if s.is_empty() { + return Ok(()); + } + + let mut chars = s.chars(); + let first = chars.next().unwrap(); + let rest = chars.as_str(); + if first == '_' && rest.is_empty() { + return Err(InvalidSuffix); + } + + // This is just an extra check to improve the error message. If the first + // character of the "suffix" is already some invalid ASCII + // char, "unexpected character" seems like the more fitting error. + if first.is_ascii() && !(first.is_ascii_alphabetic() || first == '_') { + return Err(UnexpectedChar); + } + + // Proper check is optional as it's not really necessary in proc macro + // context. + #[cfg(feature = "check_suffix")] + fn is_valid_suffix(first: char, rest: &str) -> bool { + use unicode_xid::UnicodeXID; + + (first == '_' || first.is_xid_start()) + && rest.chars().all(|c| c.is_xid_continue()) + } + + // When avoiding the dependency on `unicode_xid`, we just do a best effort + // to catch the most common errors. + #[cfg(not(feature = "check_suffix"))] + fn is_valid_suffix(first: char, rest: &str) -> bool { + if first.is_ascii() && !(first.is_ascii_alphabetic() || first == '_') { + return false; + } + for c in rest.chars() { + if c.is_ascii() && !(c.is_ascii_alphanumeric() || c == '_') { + return false; + } + } + true + } + + if is_valid_suffix(first, rest) { + Ok(()) + } else { + Err(InvalidSuffix) + } +} |