summaryrefslogtreecommitdiffstats
path: root/third_party/rust/litrs/src/parse.rs
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/litrs/src/parse.rs')
-rw-r--r--third_party/rust/litrs/src/parse.rs125
1 files changed, 125 insertions, 0 deletions
diff --git a/third_party/rust/litrs/src/parse.rs b/third_party/rust/litrs/src/parse.rs
new file mode 100644
index 0000000000..efc6b870f6
--- /dev/null
+++ b/third_party/rust/litrs/src/parse.rs
@@ -0,0 +1,125 @@
+use crate::{
+ BoolLit,
+ Buffer,
+ ByteLit,
+ ByteStringLit,
+ CharLit,
+ ParseError,
+ FloatLit,
+ IntegerLit,
+ Literal,
+ StringLit,
+ err::{perr, ParseErrorKind::{*, self}},
+};
+
+
+pub fn parse<B: Buffer>(input: B) -> Result<Literal<B>, ParseError> {
+ let (first, rest) = input.as_bytes().split_first().ok_or(perr(None, Empty))?;
+ let second = input.as_bytes().get(1).copied();
+
+ match first {
+ b'f' if &*input == "false" => Ok(Literal::Bool(BoolLit::False)),
+ b't' if &*input == "true" => Ok(Literal::Bool(BoolLit::True)),
+
+ // A number literal (integer or float).
+ b'0'..=b'9' => {
+ // To figure out whether this is a float or integer, we do some
+ // quick inspection here. Yes, this is technically duplicate
+ // work with what is happening in the integer/float parse
+ // methods, but it makes the code way easier for now and won't
+ // be a huge performance loss.
+ //
+ // The first non-decimal char in a float literal must
+ // be '.', 'e' or 'E'.
+ match input.as_bytes().get(1 + end_dec_digits(rest)) {
+ Some(b'.') | Some(b'e') | Some(b'E')
+ => FloatLit::parse(input).map(Literal::Float),
+
+ _ => IntegerLit::parse(input).map(Literal::Integer),
+ }
+ },
+
+ b'\'' => CharLit::parse(input).map(Literal::Char),
+ b'"' | b'r' => StringLit::parse(input).map(Literal::String),
+
+ b'b' if second == Some(b'\'') => ByteLit::parse(input).map(Literal::Byte),
+ b'b' if second == Some(b'r') || second == Some(b'"')
+ => ByteStringLit::parse(input).map(Literal::ByteString),
+
+ _ => Err(perr(None, InvalidLiteral)),
+ }
+}
+
+
+pub(crate) fn first_byte_or_empty(s: &str) -> Result<u8, ParseError> {
+ s.as_bytes().get(0).copied().ok_or(perr(None, Empty))
+}
+
+/// Returns the index of the first non-underscore, non-decimal digit in `input`,
+/// or the `input.len()` if all characters are decimal digits.
+pub(crate) fn end_dec_digits(input: &[u8]) -> usize {
+ input.iter()
+ .position(|b| !matches!(b, b'_' | b'0'..=b'9'))
+ .unwrap_or(input.len())
+}
+
+pub(crate) fn hex_digit_value(digit: u8) -> Option<u8> {
+ match digit {
+ b'0'..=b'9' => Some(digit - b'0'),
+ b'a'..=b'f' => Some(digit - b'a' + 10),
+ b'A'..=b'F' => Some(digit - b'A' + 10),
+ _ => None,
+ }
+}
+
+/// Makes sure that `s` is a valid literal suffix.
+pub(crate) fn check_suffix(s: &str) -> Result<(), ParseErrorKind> {
+ if s.is_empty() {
+ return Ok(());
+ }
+
+ let mut chars = s.chars();
+ let first = chars.next().unwrap();
+ let rest = chars.as_str();
+ if first == '_' && rest.is_empty() {
+ return Err(InvalidSuffix);
+ }
+
+ // This is just an extra check to improve the error message. If the first
+ // character of the "suffix" is already some invalid ASCII
+ // char, "unexpected character" seems like the more fitting error.
+ if first.is_ascii() && !(first.is_ascii_alphabetic() || first == '_') {
+ return Err(UnexpectedChar);
+ }
+
+ // Proper check is optional as it's not really necessary in proc macro
+ // context.
+ #[cfg(feature = "check_suffix")]
+ fn is_valid_suffix(first: char, rest: &str) -> bool {
+ use unicode_xid::UnicodeXID;
+
+ (first == '_' || first.is_xid_start())
+ && rest.chars().all(|c| c.is_xid_continue())
+ }
+
+ // When avoiding the dependency on `unicode_xid`, we just do a best effort
+ // to catch the most common errors.
+ #[cfg(not(feature = "check_suffix"))]
+ fn is_valid_suffix(first: char, rest: &str) -> bool {
+ if first.is_ascii() && !(first.is_ascii_alphabetic() || first == '_') {
+ return false;
+ }
+ for c in rest.chars() {
+ if c.is_ascii() && !(c.is_ascii_alphanumeric() || c == '_') {
+ return false;
+ }
+ }
+ true
+ }
+
+ if is_valid_suffix(first, rest) {
+ Ok(())
+ } else {
+ Err(InvalidSuffix)
+ }
+}