use super::{conv, number::consume_number, Error, ExpectedToken, Span, Token, TokenSpan}; fn consume_any(input: &str, what: impl Fn(char) -> bool) -> (&str, &str) { let pos = input.find(|c| !what(c)).unwrap_or(input.len()); input.split_at(pos) } fn consume_token(input: &str, generic: bool) -> (Token<'_>, &str) { let mut chars = input.chars(); let cur = match chars.next() { Some(c) => c, None => return (Token::End, ""), }; match cur { ':' | ';' | ',' => (Token::Separator(cur), chars.as_str()), '.' => { let og_chars = chars.as_str(); match chars.next() { Some('0'..='9') => consume_number(input), _ => (Token::Separator(cur), og_chars), } } '@' => (Token::Attribute, chars.as_str()), '(' | ')' | '{' | '}' | '[' | ']' => (Token::Paren(cur), chars.as_str()), '<' | '>' => { let og_chars = chars.as_str(); match chars.next() { Some('=') if !generic => (Token::LogicalOperation(cur), chars.as_str()), Some(c) if c == cur && !generic => { let og_chars = chars.as_str(); match chars.next() { Some('=') => (Token::AssignmentOperation(cur), chars.as_str()), _ => (Token::ShiftOperation(cur), og_chars), } } _ => (Token::Paren(cur), og_chars), } } '0'..='9' => consume_number(input), '/' => { let og_chars = chars.as_str(); match chars.next() { Some('/') => { let _ = chars.position(is_comment_end); (Token::Trivia, chars.as_str()) } Some('*') => { let mut depth = 1; let mut prev = None; for c in &mut chars { match (prev, c) { (Some('*'), '/') => { prev = None; depth -= 1; if depth == 0 { return (Token::Trivia, chars.as_str()); } } (Some('/'), '*') => { prev = None; depth += 1; } _ => { prev = Some(c); } } } (Token::End, "") } Some('=') => (Token::AssignmentOperation(cur), chars.as_str()), _ => (Token::Operation(cur), og_chars), } } '-' => { let og_chars = chars.as_str(); match chars.next() { Some('>') => (Token::Arrow, chars.as_str()), Some('0'..='9' | '.') => consume_number(input), Some('-') => (Token::DecrementOperation, chars.as_str()), Some('=') => (Token::AssignmentOperation(cur), chars.as_str()), _ => (Token::Operation(cur), og_chars), } } '+' => { let og_chars = chars.as_str(); match chars.next() { Some('+') => (Token::IncrementOperation, chars.as_str()), Some('=') => (Token::AssignmentOperation(cur), chars.as_str()), _ => (Token::Operation(cur), og_chars), } } '*' | '%' | '^' => { let og_chars = chars.as_str(); match chars.next() { Some('=') => (Token::AssignmentOperation(cur), chars.as_str()), _ => (Token::Operation(cur), og_chars), } } '~' => (Token::Operation(cur), chars.as_str()), '=' | '!' => { let og_chars = chars.as_str(); match chars.next() { Some('=') => (Token::LogicalOperation(cur), chars.as_str()), _ => (Token::Operation(cur), og_chars), } } '&' | '|' => { let og_chars = chars.as_str(); match chars.next() { Some(c) if c == cur => (Token::LogicalOperation(cur), chars.as_str()), Some('=') => (Token::AssignmentOperation(cur), chars.as_str()), _ => (Token::Operation(cur), og_chars), } } _ if is_blankspace(cur) => { let (_, rest) = consume_any(input, is_blankspace); (Token::Trivia, rest) } _ if is_word_start(cur) => { let (word, rest) = consume_any(input, is_word_part); (Token::Word(word), rest) } _ => (Token::Unknown(cur), chars.as_str()), } } /// Returns whether or not a char is a comment end /// (Unicode Pattern_White_Space excluding U+0020, U+0009, U+200E and U+200F) const fn is_comment_end(c: char) -> bool { match c { '\u{000a}'..='\u{000d}' | '\u{0085}' | '\u{2028}' | '\u{2029}' => true, _ => false, } } /// Returns whether or not a char is a blankspace (Unicode Pattern_White_Space) const fn is_blankspace(c: char) -> bool { match c { '\u{0020}' | '\u{0009}'..='\u{000d}' | '\u{0085}' | '\u{200e}' | '\u{200f}' | '\u{2028}' | '\u{2029}' => true, _ => false, } } /// Returns whether or not a char is a word start (Unicode XID_Start + '_') fn is_word_start(c: char) -> bool { c == '_' || unicode_xid::UnicodeXID::is_xid_start(c) } /// Returns whether or not a char is a word part (Unicode XID_Continue) fn is_word_part(c: char) -> bool { unicode_xid::UnicodeXID::is_xid_continue(c) } #[derive(Clone)] pub(super) struct Lexer<'a> { input: &'a str, pub(super) source: &'a str, // The byte offset of the end of the last non-trivia token. last_end_offset: usize, } impl<'a> Lexer<'a> { pub(super) const fn new(input: &'a str) -> Self { Lexer { input, source: input, last_end_offset: 0, } } pub(super) const fn _leftover_span(&self) -> Span { self.source.len() - self.input.len()..self.source.len() } /// Calls the function with a lexer and returns the result of the function as well as the span for everything the function parsed /// /// # Examples /// ```ignore /// let lexer = Lexer::new("5"); /// let (value, span) = lexer.capture_span(Lexer::next_uint_literal); /// assert_eq!(value, 5); /// ``` #[inline] pub fn capture_span( &mut self, inner: impl FnOnce(&mut Self) -> Result, ) -> Result<(T, Span), E> { let start = self.current_byte_offset(); let res = inner(self)?; let end = self.current_byte_offset(); Ok((res, start..end)) } pub(super) fn start_byte_offset(&mut self) -> usize { loop { // Eat all trivia because `next` doesn't eat trailing trivia. let (token, rest) = consume_token(self.input, false); if let Token::Trivia = token { self.input = rest; } else { return self.current_byte_offset(); } } } pub(super) const fn end_byte_offset(&self) -> usize { self.last_end_offset } fn peek_token_and_rest(&mut self) -> (TokenSpan<'a>, &'a str) { let mut cloned = self.clone(); let token = cloned.next(); let rest = cloned.input; (token, rest) } const fn current_byte_offset(&self) -> usize { self.source.len() - self.input.len() } pub(super) const fn span_from(&self, offset: usize) -> Span { offset..self.end_byte_offset() } #[must_use] pub(super) fn next(&mut self) -> TokenSpan<'a> { let mut start_byte_offset = self.current_byte_offset(); loop { let (token, rest) = consume_token(self.input, false); self.input = rest; match token { Token::Trivia => start_byte_offset = self.current_byte_offset(), _ => { self.last_end_offset = self.current_byte_offset(); return (token, start_byte_offset..self.last_end_offset); } } } } #[must_use] pub(super) fn next_generic(&mut self) -> TokenSpan<'a> { let mut start_byte_offset = self.current_byte_offset(); loop { let (token, rest) = consume_token(self.input, true); self.input = rest; match token { Token::Trivia => start_byte_offset = self.current_byte_offset(), _ => return (token, start_byte_offset..self.current_byte_offset()), } } } #[must_use] pub(super) fn peek(&mut self) -> TokenSpan<'a> { let (token, _) = self.peek_token_and_rest(); token } pub(super) fn expect_span( &mut self, expected: Token<'a>, ) -> Result, Error<'a>> { let next = self.next(); if next.0 == expected { Ok(next.1) } else { Err(Error::Unexpected(next.1, ExpectedToken::Token(expected))) } } pub(super) fn expect(&mut self, expected: Token<'a>) -> Result<(), Error<'a>> { self.expect_span(expected)?; Ok(()) } pub(super) fn expect_generic_paren(&mut self, expected: char) -> Result<(), Error<'a>> { let next = self.next_generic(); if next.0 == Token::Paren(expected) { Ok(()) } else { Err(Error::Unexpected( next.1, ExpectedToken::Token(Token::Paren(expected)), )) } } /// If the next token matches it is skipped and true is returned pub(super) fn skip(&mut self, what: Token<'_>) -> bool { let (peeked_token, rest) = self.peek_token_and_rest(); if peeked_token.0 == what { self.input = rest; true } else { false } } pub(super) fn next_ident_with_span(&mut self) -> Result<(&'a str, Span), Error<'a>> { match self.next() { (Token::Word(word), span) if word == "_" => { Err(Error::InvalidIdentifierUnderscore(span)) } (Token::Word(word), span) if word.starts_with("__") => { Err(Error::ReservedIdentifierPrefix(span)) } (Token::Word(word), span) => Ok((word, span)), other => Err(Error::Unexpected(other.1, ExpectedToken::Identifier)), } } pub(super) fn next_ident(&mut self) -> Result<&'a str, Error<'a>> { self.next_ident_with_span().map(|(word, _)| word) } /// Parses a generic scalar type, for example ``. pub(super) fn next_scalar_generic( &mut self, ) -> Result<(crate::ScalarKind, crate::Bytes), Error<'a>> { self.expect_generic_paren('<')?; let pair = match self.next() { (Token::Word(word), span) => { conv::get_scalar_type(word).ok_or(Error::UnknownScalarType(span)) } (_, span) => Err(Error::UnknownScalarType(span)), }?; self.expect_generic_paren('>')?; Ok(pair) } /// Parses a generic scalar type, for example ``. /// /// Returns the span covering the inner type, excluding the brackets. pub(super) fn next_scalar_generic_with_span( &mut self, ) -> Result<(crate::ScalarKind, crate::Bytes, Span), Error<'a>> { self.expect_generic_paren('<')?; let pair = match self.next() { (Token::Word(word), span) => conv::get_scalar_type(word) .map(|(a, b)| (a, b, span.clone())) .ok_or(Error::UnknownScalarType(span)), (_, span) => Err(Error::UnknownScalarType(span)), }?; self.expect_generic_paren('>')?; Ok(pair) } pub(super) fn next_storage_access(&mut self) -> Result> { let (ident, span) = self.next_ident_with_span()?; match ident { "read" => Ok(crate::StorageAccess::LOAD), "write" => Ok(crate::StorageAccess::STORE), "read_write" => Ok(crate::StorageAccess::LOAD | crate::StorageAccess::STORE), _ => Err(Error::UnknownAccess(span)), } } pub(super) fn next_format_generic( &mut self, ) -> Result<(crate::StorageFormat, crate::StorageAccess), Error<'a>> { self.expect(Token::Paren('<'))?; let (ident, ident_span) = self.next_ident_with_span()?; let format = conv::map_storage_format(ident, ident_span)?; self.expect(Token::Separator(','))?; let access = self.next_storage_access()?; self.expect(Token::Paren('>'))?; Ok((format, access)) } pub(super) fn open_arguments(&mut self) -> Result<(), Error<'a>> { self.expect(Token::Paren('(')) } pub(super) fn close_arguments(&mut self) -> Result<(), Error<'a>> { let _ = self.skip(Token::Separator(',')); self.expect(Token::Paren(')')) } pub(super) fn next_argument(&mut self) -> Result> { let paren = Token::Paren(')'); if self.skip(Token::Separator(',')) { Ok(!self.skip(paren)) } else { self.expect(paren).map(|()| false) } } } #[cfg(test)] use super::{number::Number, NumberError}; #[cfg(test)] fn sub_test(source: &str, expected_tokens: &[Token]) { let mut lex = Lexer::new(source); for &token in expected_tokens { assert_eq!(lex.next().0, token); } assert_eq!(lex.next().0, Token::End); } #[test] fn test_numbers() { // WGSL spec examples // // decimal integer sub_test( "0x123 0X123u 1u 123 0 0i 0x3f", &[ Token::Number(Ok(Number::I32(291))), Token::Number(Ok(Number::U32(291))), Token::Number(Ok(Number::U32(1))), Token::Number(Ok(Number::I32(123))), Token::Number(Ok(Number::I32(0))), Token::Number(Ok(Number::I32(0))), Token::Number(Ok(Number::I32(63))), ], ); // decimal floating point sub_test( "0.e+4f 01. .01 12.34 .0f 0h 1e-3 0xa.fp+2 0x1P+4f 0X.3 0x3p+2h 0X1.fp-4 0x3.2p+2h", &[ Token::Number(Ok(Number::F32(0.))), Token::Number(Ok(Number::F32(1.))), Token::Number(Ok(Number::F32(0.01))), Token::Number(Ok(Number::F32(12.34))), Token::Number(Ok(Number::F32(0.))), Token::Number(Err(NumberError::UnimplementedF16)), Token::Number(Ok(Number::F32(0.001))), Token::Number(Ok(Number::F32(43.75))), Token::Number(Ok(Number::F32(16.))), Token::Number(Ok(Number::F32(0.1875))), Token::Number(Err(NumberError::UnimplementedF16)), Token::Number(Ok(Number::F32(0.12109375))), Token::Number(Err(NumberError::UnimplementedF16)), ], ); // MIN / MAX // // min / max decimal signed integer sub_test( "-2147483648i 2147483647i -2147483649i 2147483648i", &[ Token::Number(Ok(Number::I32(i32::MIN))), Token::Number(Ok(Number::I32(i32::MAX))), Token::Number(Err(NumberError::NotRepresentable)), Token::Number(Err(NumberError::NotRepresentable)), ], ); // min / max decimal unsigned integer sub_test( "0u 4294967295u -1u 4294967296u", &[ Token::Number(Ok(Number::U32(u32::MIN))), Token::Number(Ok(Number::U32(u32::MAX))), Token::Number(Err(NumberError::NotRepresentable)), Token::Number(Err(NumberError::NotRepresentable)), ], ); // min / max hexadecimal signed integer sub_test( "-0x80000000i 0x7FFFFFFFi -0x80000001i 0x80000000i", &[ Token::Number(Ok(Number::I32(i32::MIN))), Token::Number(Ok(Number::I32(i32::MAX))), Token::Number(Err(NumberError::NotRepresentable)), Token::Number(Err(NumberError::NotRepresentable)), ], ); // min / max hexadecimal unsigned integer sub_test( "0x0u 0xFFFFFFFFu -0x1u 0x100000000u", &[ Token::Number(Ok(Number::U32(u32::MIN))), Token::Number(Ok(Number::U32(u32::MAX))), Token::Number(Err(NumberError::NotRepresentable)), Token::Number(Err(NumberError::NotRepresentable)), ], ); /// ≈ 2^-126 * 2^−23 (= 2^−149) const SMALLEST_POSITIVE_SUBNORMAL_F32: f32 = 1e-45; /// ≈ 2^-126 * (1 − 2^−23) const LARGEST_SUBNORMAL_F32: f32 = 1.1754942e-38; /// ≈ 2^-126 const SMALLEST_POSITIVE_NORMAL_F32: f32 = f32::MIN_POSITIVE; /// ≈ 1 − 2^−24 const LARGEST_F32_LESS_THAN_ONE: f32 = 0.99999994; /// ≈ 1 + 2^−23 const SMALLEST_F32_LARGER_THAN_ONE: f32 = 1.0000001; /// ≈ -(2^127 * (2 − 2^−23)) const SMALLEST_NORMAL_F32: f32 = f32::MIN; /// ≈ 2^127 * (2 − 2^−23) const LARGEST_NORMAL_F32: f32 = f32::MAX; // decimal floating point sub_test( "1e-45f 1.1754942e-38f 1.17549435e-38f 0.99999994f 1.0000001f -3.40282347e+38f 3.40282347e+38f", &[ Token::Number(Ok(Number::F32( SMALLEST_POSITIVE_SUBNORMAL_F32, ))), Token::Number(Ok(Number::F32( LARGEST_SUBNORMAL_F32, ))), Token::Number(Ok(Number::F32( SMALLEST_POSITIVE_NORMAL_F32, ))), Token::Number(Ok(Number::F32( LARGEST_F32_LESS_THAN_ONE, ))), Token::Number(Ok(Number::F32( SMALLEST_F32_LARGER_THAN_ONE, ))), Token::Number(Ok(Number::F32( SMALLEST_NORMAL_F32, ))), Token::Number(Ok(Number::F32( LARGEST_NORMAL_F32, ))), ], ); sub_test( "-3.40282367e+38f 3.40282367e+38f", &[ Token::Number(Err(NumberError::NotRepresentable)), // ≈ -2^128 Token::Number(Err(NumberError::NotRepresentable)), // ≈ 2^128 ], ); // hexadecimal floating point sub_test( "0x1p-149f 0x7FFFFFp-149f 0x1p-126f 0xFFFFFFp-24f 0x800001p-23f -0xFFFFFFp+104f 0xFFFFFFp+104f", &[ Token::Number(Ok(Number::F32( SMALLEST_POSITIVE_SUBNORMAL_F32, ))), Token::Number(Ok(Number::F32( LARGEST_SUBNORMAL_F32, ))), Token::Number(Ok(Number::F32( SMALLEST_POSITIVE_NORMAL_F32, ))), Token::Number(Ok(Number::F32( LARGEST_F32_LESS_THAN_ONE, ))), Token::Number(Ok(Number::F32( SMALLEST_F32_LARGER_THAN_ONE, ))), Token::Number(Ok(Number::F32( SMALLEST_NORMAL_F32, ))), Token::Number(Ok(Number::F32( LARGEST_NORMAL_F32, ))), ], ); sub_test( "-0x1p128f 0x1p128f 0x1.000001p0f", &[ Token::Number(Err(NumberError::NotRepresentable)), // = -2^128 Token::Number(Err(NumberError::NotRepresentable)), // = 2^128 Token::Number(Err(NumberError::NotRepresentable)), ], ); } #[test] fn test_tokens() { sub_test("id123_OK", &[Token::Word("id123_OK")]); sub_test( "92No", &[Token::Number(Ok(Number::I32(92))), Token::Word("No")], ); sub_test( "2u3o", &[ Token::Number(Ok(Number::U32(2))), Token::Number(Ok(Number::I32(3))), Token::Word("o"), ], ); sub_test( "2.4f44po", &[ Token::Number(Ok(Number::F32(2.4))), Token::Number(Ok(Number::I32(44))), Token::Word("po"), ], ); sub_test( "Δέλτα réflexion Кызыл 𐰓𐰏𐰇 朝焼け سلام 검정 שָׁלוֹם गुलाबी փիրուզ", &[ Token::Word("Δέλτα"), Token::Word("réflexion"), Token::Word("Кызыл"), Token::Word("𐰓𐰏𐰇"), Token::Word("朝焼け"), Token::Word("سلام"), Token::Word("검정"), Token::Word("שָׁלוֹם"), Token::Word("गुलाबी"), Token::Word("փիրուզ"), ], ); sub_test("æNoø", &[Token::Word("æNoø")]); sub_test("No¾", &[Token::Word("No"), Token::Unknown('¾')]); sub_test("No好", &[Token::Word("No好")]); sub_test("_No", &[Token::Word("_No")]); sub_test( "*/*/***/*//=/*****//", &[ Token::Operation('*'), Token::AssignmentOperation('/'), Token::Operation('/'), ], ); } #[test] fn test_variable_decl() { sub_test( "@group(0 ) var< uniform> texture: texture_multisampled_2d ;", &[ Token::Attribute, Token::Word("group"), Token::Paren('('), Token::Number(Ok(Number::I32(0))), Token::Paren(')'), Token::Word("var"), Token::Paren('<'), Token::Word("uniform"), Token::Paren('>'), Token::Word("texture"), Token::Separator(':'), Token::Word("texture_multisampled_2d"), Token::Paren('<'), Token::Word("f32"), Token::Paren('>'), Token::Separator(';'), ], ); sub_test( "var buffer: array;", &[ Token::Word("var"), Token::Paren('<'), Token::Word("storage"), Token::Separator(','), Token::Word("read_write"), Token::Paren('>'), Token::Word("buffer"), Token::Separator(':'), Token::Word("array"), Token::Paren('<'), Token::Word("u32"), Token::Paren('>'), Token::Separator(';'), ], ); }