From d1b2d29528b7794b41e66fc2136e395a02f8529b Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Thu, 30 May 2024 05:59:35 +0200 Subject: Merging upstream version 1.73.0+dfsg1. Signed-off-by: Daniel Baumann --- compiler/rustc_lexer/src/cursor.rs | 4 ++++ compiler/rustc_lexer/src/lib.rs | 18 +++++++++++------- compiler/rustc_lexer/src/unescape.rs | 2 +- 3 files changed, 16 insertions(+), 8 deletions(-) (limited to 'compiler/rustc_lexer/src') diff --git a/compiler/rustc_lexer/src/cursor.rs b/compiler/rustc_lexer/src/cursor.rs index eceef5980..aba7f9548 100644 --- a/compiler/rustc_lexer/src/cursor.rs +++ b/compiler/rustc_lexer/src/cursor.rs @@ -24,6 +24,10 @@ impl<'a> Cursor<'a> { } } + pub fn as_str(&self) -> &'a str { + self.chars.as_str() + } + /// Returns the last eaten symbol (or `'\0'` in release builds). /// (For debug assertions only.) pub(crate) fn prev(&self) -> char { diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index 29335a8c0..43dfd34a6 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -34,6 +34,7 @@ pub use crate::cursor::Cursor; use self::LiteralKind::*; use self::TokenKind::*; use crate::cursor::EOF_CHAR; +use unicode_properties::UnicodeEmoji; /// Parsed token. /// It doesn't contain information about data that has been parsed, @@ -367,6 +368,13 @@ impl Cursor<'_> { Some(|terminated| Byte { terminated }), ), + // c-string literal, raw c-string literal or identifier. + 'c' => self.c_or_byte_string( + |terminated| CStr { terminated }, + |n_hashes| RawCStr { n_hashes }, + None, + ), + // Identifier (this should be checked after other variant that can // start as identifier). c if is_id_start(c) => self.ident_or_unknown_prefix(), @@ -421,9 +429,7 @@ impl Cursor<'_> { Literal { kind, suffix_start } } // Identifier starting with an emoji. Only lexed for graceful error recovery. - c if !c.is_ascii() && unic_emoji_char::is_emoji(c) => { - self.fake_ident_or_unknown_prefix() - } + c if !c.is_ascii() && c.is_emoji_char() => self.fake_ident_or_unknown_prefix(), _ => Unknown, }; let res = Token::new(token_kind, self.pos_within_token()); @@ -507,9 +513,7 @@ impl Cursor<'_> { // we see a prefix here, it is definitely an unknown prefix. match self.first() { '#' | '"' | '\'' => UnknownPrefix, - c if !c.is_ascii() && unic_emoji_char::is_emoji(c) => { - self.fake_ident_or_unknown_prefix() - } + c if !c.is_ascii() && c.is_emoji_char() => self.fake_ident_or_unknown_prefix(), _ => Ident, } } @@ -518,7 +522,7 @@ impl Cursor<'_> { // Start is already eaten, eat the rest of identifier. self.eat_while(|c| { unicode_xid::UnicodeXID::is_xid_continue(c) - || (!c.is_ascii() && unic_emoji_char::is_emoji(c)) + || (!c.is_ascii() && c.is_emoji_char()) || c == '\u{200d}' }); // Known prefixes must have been handled earlier. So if diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index c9ad54d8d..717b042fb 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -372,7 +372,7 @@ where callback(start..end, EscapeError::MultipleSkippedLinesWarning); } let tail = &tail[first_non_space..]; - if let Some(c) = tail.chars().nth(0) { + if let Some(c) = tail.chars().next() { if c.is_whitespace() { // For error reporting, we would like the span to contain the character that was not // skipped. The +1 is necessary to account for the leading \ that started the escape. -- cgit v1.2.3