diff options
Diffstat (limited to 'third_party/rust/naga/src/front/wgsl/lexer.rs')
-rw-r--r-- | third_party/rust/naga/src/front/wgsl/lexer.rs | 671 |
1 files changed, 671 insertions, 0 deletions
diff --git a/third_party/rust/naga/src/front/wgsl/lexer.rs b/third_party/rust/naga/src/front/wgsl/lexer.rs new file mode 100644 index 0000000000..35fe450892 --- /dev/null +++ b/third_party/rust/naga/src/front/wgsl/lexer.rs @@ -0,0 +1,671 @@ +use super::{conv, number::consume_number, Error, ExpectedToken, Span, Token, TokenSpan}; + +fn consume_any(input: &str, what: impl Fn(char) -> bool) -> (&str, &str) { + let pos = input.find(|c| !what(c)).unwrap_or(input.len()); + input.split_at(pos) +} + +fn consume_token(input: &str, generic: bool) -> (Token<'_>, &str) { + let mut chars = input.chars(); + let cur = match chars.next() { + Some(c) => c, + None => return (Token::End, ""), + }; + match cur { + ':' | ';' | ',' => (Token::Separator(cur), chars.as_str()), + '.' => { + let og_chars = chars.as_str(); + match chars.next() { + Some('0'..='9') => consume_number(input), + _ => (Token::Separator(cur), og_chars), + } + } + '@' => (Token::Attribute, chars.as_str()), + '(' | ')' | '{' | '}' | '[' | ']' => (Token::Paren(cur), chars.as_str()), + '<' | '>' => { + let og_chars = chars.as_str(); + match chars.next() { + Some('=') if !generic => (Token::LogicalOperation(cur), chars.as_str()), + Some(c) if c == cur && !generic => { + let og_chars = chars.as_str(); + match chars.next() { + Some('=') => (Token::AssignmentOperation(cur), chars.as_str()), + _ => (Token::ShiftOperation(cur), og_chars), + } + } + _ => (Token::Paren(cur), og_chars), + } + } + '0'..='9' => consume_number(input), + '/' => { + let og_chars = chars.as_str(); + match chars.next() { + Some('/') => { + let _ = chars.position(is_comment_end); + (Token::Trivia, chars.as_str()) + } + Some('*') => { + let mut depth = 1; + let mut prev = None; + + for c in &mut chars { + match (prev, c) { + (Some('*'), '/') => { + prev = None; + depth -= 1; + if depth == 0 { + return (Token::Trivia, chars.as_str()); + } + } + (Some('/'), '*') => { + prev = None; + depth += 1; + } + _ => { + prev = Some(c); + } + } + } + + (Token::End, "") + } + Some('=') => (Token::AssignmentOperation(cur), chars.as_str()), + _ => (Token::Operation(cur), og_chars), + } + } + '-' => { + let og_chars = chars.as_str(); + match chars.next() { + Some('>') => (Token::Arrow, chars.as_str()), + Some('0'..='9' | '.') => consume_number(input), + Some('-') => (Token::DecrementOperation, chars.as_str()), + Some('=') => (Token::AssignmentOperation(cur), chars.as_str()), + _ => (Token::Operation(cur), og_chars), + } + } + '+' => { + let og_chars = chars.as_str(); + match chars.next() { + Some('+') => (Token::IncrementOperation, chars.as_str()), + Some('=') => (Token::AssignmentOperation(cur), chars.as_str()), + _ => (Token::Operation(cur), og_chars), + } + } + '*' | '%' | '^' => { + let og_chars = chars.as_str(); + match chars.next() { + Some('=') => (Token::AssignmentOperation(cur), chars.as_str()), + _ => (Token::Operation(cur), og_chars), + } + } + '~' => (Token::Operation(cur), chars.as_str()), + '=' | '!' => { + let og_chars = chars.as_str(); + match chars.next() { + Some('=') => (Token::LogicalOperation(cur), chars.as_str()), + _ => (Token::Operation(cur), og_chars), + } + } + '&' | '|' => { + let og_chars = chars.as_str(); + match chars.next() { + Some(c) if c == cur => (Token::LogicalOperation(cur), chars.as_str()), + Some('=') => (Token::AssignmentOperation(cur), chars.as_str()), + _ => (Token::Operation(cur), og_chars), + } + } + _ if is_blankspace(cur) => { + let (_, rest) = consume_any(input, is_blankspace); + (Token::Trivia, rest) + } + _ if is_word_start(cur) => { + let (word, rest) = consume_any(input, is_word_part); + (Token::Word(word), rest) + } + _ => (Token::Unknown(cur), chars.as_str()), + } +} + +/// Returns whether or not a char is a comment end +/// (Unicode Pattern_White_Space excluding U+0020, U+0009, U+200E and U+200F) +const fn is_comment_end(c: char) -> bool { + match c { + '\u{000a}'..='\u{000d}' | '\u{0085}' | '\u{2028}' | '\u{2029}' => true, + _ => false, + } +} + +/// Returns whether or not a char is a blankspace (Unicode Pattern_White_Space) +const fn is_blankspace(c: char) -> bool { + match c { + '\u{0020}' + | '\u{0009}'..='\u{000d}' + | '\u{0085}' + | '\u{200e}' + | '\u{200f}' + | '\u{2028}' + | '\u{2029}' => true, + _ => false, + } +} + +/// Returns whether or not a char is a word start (Unicode XID_Start + '_') +fn is_word_start(c: char) -> bool { + c == '_' || unicode_xid::UnicodeXID::is_xid_start(c) +} + +/// Returns whether or not a char is a word part (Unicode XID_Continue) +fn is_word_part(c: char) -> bool { + unicode_xid::UnicodeXID::is_xid_continue(c) +} + +#[derive(Clone)] +pub(super) struct Lexer<'a> { + input: &'a str, + pub(super) source: &'a str, + // The byte offset of the end of the last non-trivia token. + last_end_offset: usize, +} + +impl<'a> Lexer<'a> { + pub(super) const fn new(input: &'a str) -> Self { + Lexer { + input, + source: input, + last_end_offset: 0, + } + } + + pub(super) const fn _leftover_span(&self) -> Span { + self.source.len() - self.input.len()..self.source.len() + } + + /// Calls the function with a lexer and returns the result of the function as well as the span for everything the function parsed + /// + /// # Examples + /// ```ignore + /// let lexer = Lexer::new("5"); + /// let (value, span) = lexer.capture_span(Lexer::next_uint_literal); + /// assert_eq!(value, 5); + /// ``` + #[inline] + pub fn capture_span<T, E>( + &mut self, + inner: impl FnOnce(&mut Self) -> Result<T, E>, + ) -> Result<(T, Span), E> { + let start = self.current_byte_offset(); + let res = inner(self)?; + let end = self.current_byte_offset(); + Ok((res, start..end)) + } + + pub(super) fn start_byte_offset(&mut self) -> usize { + loop { + // Eat all trivia because `next` doesn't eat trailing trivia. + let (token, rest) = consume_token(self.input, false); + if let Token::Trivia = token { + self.input = rest; + } else { + return self.current_byte_offset(); + } + } + } + + pub(super) const fn end_byte_offset(&self) -> usize { + self.last_end_offset + } + + fn peek_token_and_rest(&mut self) -> (TokenSpan<'a>, &'a str) { + let mut cloned = self.clone(); + let token = cloned.next(); + let rest = cloned.input; + (token, rest) + } + + const fn current_byte_offset(&self) -> usize { + self.source.len() - self.input.len() + } + + pub(super) const fn span_from(&self, offset: usize) -> Span { + offset..self.end_byte_offset() + } + + #[must_use] + pub(super) fn next(&mut self) -> TokenSpan<'a> { + let mut start_byte_offset = self.current_byte_offset(); + loop { + let (token, rest) = consume_token(self.input, false); + self.input = rest; + match token { + Token::Trivia => start_byte_offset = self.current_byte_offset(), + _ => { + self.last_end_offset = self.current_byte_offset(); + return (token, start_byte_offset..self.last_end_offset); + } + } + } + } + + #[must_use] + pub(super) fn next_generic(&mut self) -> TokenSpan<'a> { + let mut start_byte_offset = self.current_byte_offset(); + loop { + let (token, rest) = consume_token(self.input, true); + self.input = rest; + match token { + Token::Trivia => start_byte_offset = self.current_byte_offset(), + _ => return (token, start_byte_offset..self.current_byte_offset()), + } + } + } + + #[must_use] + pub(super) fn peek(&mut self) -> TokenSpan<'a> { + let (token, _) = self.peek_token_and_rest(); + token + } + + pub(super) fn expect_span( + &mut self, + expected: Token<'a>, + ) -> Result<std::ops::Range<usize>, Error<'a>> { + let next = self.next(); + if next.0 == expected { + Ok(next.1) + } else { + Err(Error::Unexpected(next.1, ExpectedToken::Token(expected))) + } + } + + pub(super) fn expect(&mut self, expected: Token<'a>) -> Result<(), Error<'a>> { + self.expect_span(expected)?; + Ok(()) + } + + pub(super) fn expect_generic_paren(&mut self, expected: char) -> Result<(), Error<'a>> { + let next = self.next_generic(); + if next.0 == Token::Paren(expected) { + Ok(()) + } else { + Err(Error::Unexpected( + next.1, + ExpectedToken::Token(Token::Paren(expected)), + )) + } + } + + /// If the next token matches it is skipped and true is returned + pub(super) fn skip(&mut self, what: Token<'_>) -> bool { + let (peeked_token, rest) = self.peek_token_and_rest(); + if peeked_token.0 == what { + self.input = rest; + true + } else { + false + } + } + + pub(super) fn next_ident_with_span(&mut self) -> Result<(&'a str, Span), Error<'a>> { + match self.next() { + (Token::Word(word), span) if word == "_" => { + Err(Error::InvalidIdentifierUnderscore(span)) + } + (Token::Word(word), span) if word.starts_with("__") => { + Err(Error::ReservedIdentifierPrefix(span)) + } + (Token::Word(word), span) => Ok((word, span)), + other => Err(Error::Unexpected(other.1, ExpectedToken::Identifier)), + } + } + + pub(super) fn next_ident(&mut self) -> Result<&'a str, Error<'a>> { + self.next_ident_with_span().map(|(word, _)| word) + } + + /// Parses a generic scalar type, for example `<f32>`. + pub(super) fn next_scalar_generic( + &mut self, + ) -> Result<(crate::ScalarKind, crate::Bytes), Error<'a>> { + self.expect_generic_paren('<')?; + let pair = match self.next() { + (Token::Word(word), span) => { + conv::get_scalar_type(word).ok_or(Error::UnknownScalarType(span)) + } + (_, span) => Err(Error::UnknownScalarType(span)), + }?; + self.expect_generic_paren('>')?; + Ok(pair) + } + + /// Parses a generic scalar type, for example `<f32>`. + /// + /// Returns the span covering the inner type, excluding the brackets. + pub(super) fn next_scalar_generic_with_span( + &mut self, + ) -> Result<(crate::ScalarKind, crate::Bytes, Span), Error<'a>> { + self.expect_generic_paren('<')?; + let pair = match self.next() { + (Token::Word(word), span) => conv::get_scalar_type(word) + .map(|(a, b)| (a, b, span.clone())) + .ok_or(Error::UnknownScalarType(span)), + (_, span) => Err(Error::UnknownScalarType(span)), + }?; + self.expect_generic_paren('>')?; + Ok(pair) + } + + pub(super) fn next_storage_access(&mut self) -> Result<crate::StorageAccess, Error<'a>> { + let (ident, span) = self.next_ident_with_span()?; + match ident { + "read" => Ok(crate::StorageAccess::LOAD), + "write" => Ok(crate::StorageAccess::STORE), + "read_write" => Ok(crate::StorageAccess::LOAD | crate::StorageAccess::STORE), + _ => Err(Error::UnknownAccess(span)), + } + } + + pub(super) fn next_format_generic( + &mut self, + ) -> Result<(crate::StorageFormat, crate::StorageAccess), Error<'a>> { + self.expect(Token::Paren('<'))?; + let (ident, ident_span) = self.next_ident_with_span()?; + let format = conv::map_storage_format(ident, ident_span)?; + self.expect(Token::Separator(','))?; + let access = self.next_storage_access()?; + self.expect(Token::Paren('>'))?; + Ok((format, access)) + } + + pub(super) fn open_arguments(&mut self) -> Result<(), Error<'a>> { + self.expect(Token::Paren('(')) + } + + pub(super) fn close_arguments(&mut self) -> Result<(), Error<'a>> { + let _ = self.skip(Token::Separator(',')); + self.expect(Token::Paren(')')) + } + + pub(super) fn next_argument(&mut self) -> Result<bool, Error<'a>> { + let paren = Token::Paren(')'); + if self.skip(Token::Separator(',')) { + Ok(!self.skip(paren)) + } else { + self.expect(paren).map(|()| false) + } + } +} + +#[cfg(test)] +use super::{number::Number, NumberError}; + +#[cfg(test)] +fn sub_test(source: &str, expected_tokens: &[Token]) { + let mut lex = Lexer::new(source); + for &token in expected_tokens { + assert_eq!(lex.next().0, token); + } + assert_eq!(lex.next().0, Token::End); +} + +#[test] +fn test_numbers() { + // WGSL spec examples // + + // decimal integer + sub_test( + "0x123 0X123u 1u 123 0 0i 0x3f", + &[ + Token::Number(Ok(Number::I32(291))), + Token::Number(Ok(Number::U32(291))), + Token::Number(Ok(Number::U32(1))), + Token::Number(Ok(Number::I32(123))), + Token::Number(Ok(Number::I32(0))), + Token::Number(Ok(Number::I32(0))), + Token::Number(Ok(Number::I32(63))), + ], + ); + // decimal floating point + sub_test( + "0.e+4f 01. .01 12.34 .0f 0h 1e-3 0xa.fp+2 0x1P+4f 0X.3 0x3p+2h 0X1.fp-4 0x3.2p+2h", + &[ + Token::Number(Ok(Number::F32(0.))), + Token::Number(Ok(Number::F32(1.))), + Token::Number(Ok(Number::F32(0.01))), + Token::Number(Ok(Number::F32(12.34))), + Token::Number(Ok(Number::F32(0.))), + Token::Number(Err(NumberError::UnimplementedF16)), + Token::Number(Ok(Number::F32(0.001))), + Token::Number(Ok(Number::F32(43.75))), + Token::Number(Ok(Number::F32(16.))), + Token::Number(Ok(Number::F32(0.1875))), + Token::Number(Err(NumberError::UnimplementedF16)), + Token::Number(Ok(Number::F32(0.12109375))), + Token::Number(Err(NumberError::UnimplementedF16)), + ], + ); + + // MIN / MAX // + + // min / max decimal signed integer + sub_test( + "-2147483648i 2147483647i -2147483649i 2147483648i", + &[ + Token::Number(Ok(Number::I32(i32::MIN))), + Token::Number(Ok(Number::I32(i32::MAX))), + Token::Number(Err(NumberError::NotRepresentable)), + Token::Number(Err(NumberError::NotRepresentable)), + ], + ); + // min / max decimal unsigned integer + sub_test( + "0u 4294967295u -1u 4294967296u", + &[ + Token::Number(Ok(Number::U32(u32::MIN))), + Token::Number(Ok(Number::U32(u32::MAX))), + Token::Number(Err(NumberError::NotRepresentable)), + Token::Number(Err(NumberError::NotRepresentable)), + ], + ); + + // min / max hexadecimal signed integer + sub_test( + "-0x80000000i 0x7FFFFFFFi -0x80000001i 0x80000000i", + &[ + Token::Number(Ok(Number::I32(i32::MIN))), + Token::Number(Ok(Number::I32(i32::MAX))), + Token::Number(Err(NumberError::NotRepresentable)), + Token::Number(Err(NumberError::NotRepresentable)), + ], + ); + // min / max hexadecimal unsigned integer + sub_test( + "0x0u 0xFFFFFFFFu -0x1u 0x100000000u", + &[ + Token::Number(Ok(Number::U32(u32::MIN))), + Token::Number(Ok(Number::U32(u32::MAX))), + Token::Number(Err(NumberError::NotRepresentable)), + Token::Number(Err(NumberError::NotRepresentable)), + ], + ); + + /// ≈ 2^-126 * 2^−23 (= 2^−149) + const SMALLEST_POSITIVE_SUBNORMAL_F32: f32 = 1e-45; + /// ≈ 2^-126 * (1 − 2^−23) + const LARGEST_SUBNORMAL_F32: f32 = 1.1754942e-38; + /// ≈ 2^-126 + const SMALLEST_POSITIVE_NORMAL_F32: f32 = f32::MIN_POSITIVE; + /// ≈ 1 − 2^−24 + const LARGEST_F32_LESS_THAN_ONE: f32 = 0.99999994; + /// ≈ 1 + 2^−23 + const SMALLEST_F32_LARGER_THAN_ONE: f32 = 1.0000001; + /// ≈ -(2^127 * (2 − 2^−23)) + const SMALLEST_NORMAL_F32: f32 = f32::MIN; + /// ≈ 2^127 * (2 − 2^−23) + const LARGEST_NORMAL_F32: f32 = f32::MAX; + + // decimal floating point + sub_test( + "1e-45f 1.1754942e-38f 1.17549435e-38f 0.99999994f 1.0000001f -3.40282347e+38f 3.40282347e+38f", + &[ + Token::Number(Ok(Number::F32( + SMALLEST_POSITIVE_SUBNORMAL_F32, + ))), + Token::Number(Ok(Number::F32( + LARGEST_SUBNORMAL_F32, + ))), + Token::Number(Ok(Number::F32( + SMALLEST_POSITIVE_NORMAL_F32, + ))), + Token::Number(Ok(Number::F32( + LARGEST_F32_LESS_THAN_ONE, + ))), + Token::Number(Ok(Number::F32( + SMALLEST_F32_LARGER_THAN_ONE, + ))), + Token::Number(Ok(Number::F32( + SMALLEST_NORMAL_F32, + ))), + Token::Number(Ok(Number::F32( + LARGEST_NORMAL_F32, + ))), + ], + ); + sub_test( + "-3.40282367e+38f 3.40282367e+38f", + &[ + Token::Number(Err(NumberError::NotRepresentable)), // ≈ -2^128 + Token::Number(Err(NumberError::NotRepresentable)), // ≈ 2^128 + ], + ); + + // hexadecimal floating point + sub_test( + "0x1p-149f 0x7FFFFFp-149f 0x1p-126f 0xFFFFFFp-24f 0x800001p-23f -0xFFFFFFp+104f 0xFFFFFFp+104f", + &[ + Token::Number(Ok(Number::F32( + SMALLEST_POSITIVE_SUBNORMAL_F32, + ))), + Token::Number(Ok(Number::F32( + LARGEST_SUBNORMAL_F32, + ))), + Token::Number(Ok(Number::F32( + SMALLEST_POSITIVE_NORMAL_F32, + ))), + Token::Number(Ok(Number::F32( + LARGEST_F32_LESS_THAN_ONE, + ))), + Token::Number(Ok(Number::F32( + SMALLEST_F32_LARGER_THAN_ONE, + ))), + Token::Number(Ok(Number::F32( + SMALLEST_NORMAL_F32, + ))), + Token::Number(Ok(Number::F32( + LARGEST_NORMAL_F32, + ))), + ], + ); + sub_test( + "-0x1p128f 0x1p128f 0x1.000001p0f", + &[ + Token::Number(Err(NumberError::NotRepresentable)), // = -2^128 + Token::Number(Err(NumberError::NotRepresentable)), // = 2^128 + Token::Number(Err(NumberError::NotRepresentable)), + ], + ); +} + +#[test] +fn test_tokens() { + sub_test("id123_OK", &[Token::Word("id123_OK")]); + sub_test( + "92No", + &[Token::Number(Ok(Number::I32(92))), Token::Word("No")], + ); + sub_test( + "2u3o", + &[ + Token::Number(Ok(Number::U32(2))), + Token::Number(Ok(Number::I32(3))), + Token::Word("o"), + ], + ); + sub_test( + "2.4f44po", + &[ + Token::Number(Ok(Number::F32(2.4))), + Token::Number(Ok(Number::I32(44))), + Token::Word("po"), + ], + ); + sub_test( + "Δέλτα réflexion Кызыл 𐰓𐰏𐰇 朝焼け سلام 검정 שָׁלוֹם गुलाबी փիրուզ", + &[ + Token::Word("Δέλτα"), + Token::Word("réflexion"), + Token::Word("Кызыл"), + Token::Word("𐰓𐰏𐰇"), + Token::Word("朝焼け"), + Token::Word("سلام"), + Token::Word("검정"), + Token::Word("שָׁלוֹם"), + Token::Word("गुलाबी"), + Token::Word("փիրուզ"), + ], + ); + sub_test("æNoø", &[Token::Word("æNoø")]); + sub_test("No¾", &[Token::Word("No"), Token::Unknown('¾')]); + sub_test("No好", &[Token::Word("No好")]); + sub_test("_No", &[Token::Word("_No")]); + sub_test( + "*/*/***/*//=/*****//", + &[ + Token::Operation('*'), + Token::AssignmentOperation('/'), + Token::Operation('/'), + ], + ); +} + +#[test] +fn test_variable_decl() { + sub_test( + "@group(0 ) var< uniform> texture: texture_multisampled_2d <f32 >;", + &[ + Token::Attribute, + Token::Word("group"), + Token::Paren('('), + Token::Number(Ok(Number::I32(0))), + Token::Paren(')'), + Token::Word("var"), + Token::Paren('<'), + Token::Word("uniform"), + Token::Paren('>'), + Token::Word("texture"), + Token::Separator(':'), + Token::Word("texture_multisampled_2d"), + Token::Paren('<'), + Token::Word("f32"), + Token::Paren('>'), + Token::Separator(';'), + ], + ); + sub_test( + "var<storage,read_write> buffer: array<u32>;", + &[ + Token::Word("var"), + Token::Paren('<'), + Token::Word("storage"), + Token::Separator(','), + Token::Word("read_write"), + Token::Paren('>'), + Token::Word("buffer"), + Token::Separator(':'), + Token::Word("array"), + Token::Paren('<'), + Token::Word("u32"), + Token::Paren('>'), + Token::Separator(';'), + ], + ); +} |