summaryrefslogtreecommitdiffstats
path: root/third_party/rust/litrs/src/parse.rs
blob: efc6b870f6ddf7fbc19aae4f491fcd22be9d0602 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
use crate::{
    BoolLit,
    Buffer,
    ByteLit,
    ByteStringLit,
    CharLit,
    ParseError,
    FloatLit,
    IntegerLit,
    Literal,
    StringLit,
    err::{perr, ParseErrorKind::{*, self}},
};


pub fn parse<B: Buffer>(input: B) -> Result<Literal<B>, ParseError> {
    let (first, rest) = input.as_bytes().split_first().ok_or(perr(None, Empty))?;
    let second = input.as_bytes().get(1).copied();

    match first {
        b'f' if &*input == "false" => Ok(Literal::Bool(BoolLit::False)),
        b't' if &*input == "true" => Ok(Literal::Bool(BoolLit::True)),

        // A number literal (integer or float).
        b'0'..=b'9' => {
            // To figure out whether this is a float or integer, we do some
            // quick inspection here. Yes, this is technically duplicate
            // work with what is happening in the integer/float parse
            // methods, but it makes the code way easier for now and won't
            // be a huge performance loss.
            //
            // The first non-decimal char in a float literal must
            // be '.', 'e' or 'E'.
            match input.as_bytes().get(1 + end_dec_digits(rest)) {
                Some(b'.') | Some(b'e') | Some(b'E')
                    => FloatLit::parse(input).map(Literal::Float),

                _ => IntegerLit::parse(input).map(Literal::Integer),
            }
        },

        b'\'' => CharLit::parse(input).map(Literal::Char),
        b'"' | b'r' => StringLit::parse(input).map(Literal::String),

        b'b' if second == Some(b'\'') => ByteLit::parse(input).map(Literal::Byte),
        b'b' if second == Some(b'r') || second == Some(b'"')
            => ByteStringLit::parse(input).map(Literal::ByteString),

        _ => Err(perr(None, InvalidLiteral)),
    }
}


pub(crate) fn first_byte_or_empty(s: &str) -> Result<u8, ParseError> {
    s.as_bytes().get(0).copied().ok_or(perr(None, Empty))
}

/// Returns the index of the first non-underscore, non-decimal digit in `input`,
/// or the `input.len()` if all characters are decimal digits.
pub(crate) fn end_dec_digits(input: &[u8]) -> usize {
    input.iter()
        .position(|b| !matches!(b, b'_' | b'0'..=b'9'))
        .unwrap_or(input.len())
}

pub(crate) fn hex_digit_value(digit: u8) -> Option<u8> {
    match digit {
        b'0'..=b'9' => Some(digit - b'0'),
        b'a'..=b'f' => Some(digit - b'a' + 10),
        b'A'..=b'F' => Some(digit - b'A' + 10),
        _ => None,
    }
}

/// Makes sure that `s` is a valid literal suffix.
pub(crate) fn check_suffix(s: &str) -> Result<(), ParseErrorKind> {
    if s.is_empty() {
        return Ok(());
    }

    let mut chars = s.chars();
    let first = chars.next().unwrap();
    let rest = chars.as_str();
    if first == '_' && rest.is_empty() {
        return Err(InvalidSuffix);
    }

    // This is just an extra check to improve the error message. If the first
    // character of the "suffix" is already some invalid ASCII
    // char, "unexpected character" seems like the more fitting error.
    if first.is_ascii() && !(first.is_ascii_alphabetic() || first == '_') {
        return Err(UnexpectedChar);
    }

    // Proper check is optional as it's not really necessary in proc macro
    // context.
    #[cfg(feature = "check_suffix")]
    fn is_valid_suffix(first: char, rest: &str) -> bool {
        use unicode_xid::UnicodeXID;

        (first == '_' || first.is_xid_start())
            && rest.chars().all(|c| c.is_xid_continue())
    }

    // When avoiding the dependency on `unicode_xid`, we just do a best effort
    // to catch the most common errors.
    #[cfg(not(feature = "check_suffix"))]
    fn is_valid_suffix(first: char, rest: &str) -> bool {
        if first.is_ascii() && !(first.is_ascii_alphabetic() || first == '_') {
            return false;
        }
        for c in rest.chars() {
            if c.is_ascii() && !(c.is_ascii_alphanumeric() || c == '_') {
                return false;
            }
        }
        true
    }

    if is_valid_suffix(first, rest) {
        Ok(())
    } else {
        Err(InvalidSuffix)
    }
}