diff options
Diffstat (limited to 'compiler/rustc_parse/src/lexer')
-rw-r--r-- | compiler/rustc_parse/src/lexer/diagnostics.rs | 2 | ||||
-rw-r--r-- | compiler/rustc_parse/src/lexer/mod.rs | 42 | ||||
-rw-r--r-- | compiler/rustc_parse/src/lexer/tokentrees.rs | 2 | ||||
-rw-r--r-- | compiler/rustc_parse/src/lexer/unescape_error_reporting.rs | 39 | ||||
-rw-r--r-- | compiler/rustc_parse/src/lexer/unicode_chars.rs | 2 |
5 files changed, 51 insertions, 36 deletions
diff --git a/compiler/rustc_parse/src/lexer/diagnostics.rs b/compiler/rustc_parse/src/lexer/diagnostics.rs index 9e6d27bf0..b50bb47f2 100644 --- a/compiler/rustc_parse/src/lexer/diagnostics.rs +++ b/compiler/rustc_parse/src/lexer/diagnostics.rs @@ -46,7 +46,7 @@ pub fn report_missing_open_delim( }; err.span_label( unmatch_brace.found_span.shrink_to_lo(), - format!("missing open `{}` for this delimiter", missed_open), + format!("missing open `{missed_open}` for this delimiter"), ); reported_missing_open = true; } diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index c6e6b46e4..a375a1d69 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -9,8 +9,8 @@ use rustc_ast::tokenstream::TokenStream; use rustc_ast::util::unicode::contains_text_flow_control_chars; use rustc_errors::{error_code, Applicability, Diagnostic, DiagnosticBuilder, StashKey}; use rustc_lexer::unescape::{self, EscapeError, Mode}; -use rustc_lexer::Cursor; use rustc_lexer::{Base, DocStyle, RawStrError}; +use rustc_lexer::{Cursor, LiteralKind}; use rustc_session::lint::builtin::{ RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, TEXT_DIRECTION_CODEPOINT_IN_COMMENT, }; @@ -74,7 +74,6 @@ pub(crate) fn parse_token_trees<'a>( // because the delimiter mismatch is more likely to be the root cause of error let mut buffer = Vec::with_capacity(1); - // Not using `emit_unclosed_delims` to use `db.buffer` for unmatched in unmatched_delims { if let Some(err) = make_unclosed_delims_error(unmatched, &sess) { err.buffer(&mut buffer); @@ -118,6 +117,7 @@ impl<'a> StringReader<'a> { let mut swallow_next_invalid = 0; // Skip trivial (whitespace & comments) tokens loop { + let str_before = self.cursor.as_str(); let token = self.cursor.advance_token(); let start = self.pos; self.pos = self.pos + BytePos(token.len); @@ -165,10 +165,7 @@ impl<'a> StringReader<'a> { continue; } rustc_lexer::TokenKind::Ident => { - let sym = nfc_normalize(self.str_from(start)); - let span = self.mk_sp(start, self.pos); - self.sess.symbol_gallery.insert(sym, span); - token::Ident(sym, false) + self.ident(start) } rustc_lexer::TokenKind::RawIdent => { let sym = nfc_normalize(self.str_from(start + BytePos(2))); @@ -182,10 +179,7 @@ impl<'a> StringReader<'a> { } rustc_lexer::TokenKind::UnknownPrefix => { self.report_unknown_prefix(start); - let sym = nfc_normalize(self.str_from(start)); - let span = self.mk_sp(start, self.pos); - self.sess.symbol_gallery.insert(sym, span); - token::Ident(sym, false) + self.ident(start) } rustc_lexer::TokenKind::InvalidIdent // Do not recover an identifier with emoji if the codepoint is a confusable @@ -203,6 +197,27 @@ impl<'a> StringReader<'a> { .push(span); token::Ident(sym, false) } + // split up (raw) c string literals to an ident and a string literal when edition < 2021. + rustc_lexer::TokenKind::Literal { + kind: kind @ (LiteralKind::CStr { .. } | LiteralKind::RawCStr { .. }), + suffix_start: _, + } if !self.mk_sp(start, self.pos).edition().at_least_rust_2021() => { + let prefix_len = match kind { + LiteralKind::CStr { .. } => 1, + LiteralKind::RawCStr { .. } => 2, + _ => unreachable!(), + }; + + // reset the state so that only the prefix ("c" or "cr") + // was consumed. + let lit_start = start + BytePos(prefix_len); + self.pos = lit_start; + self.cursor = Cursor::new(&str_before[prefix_len as usize..]); + + self.report_unknown_prefix(start); + let prefix_span = self.mk_sp(start, lit_start); + return (Token::new(self.ident(start), prefix_span), preceded_by_whitespace); + } rustc_lexer::TokenKind::Literal { kind, suffix_start } => { let suffix_start = start + BytePos(suffix_start); let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind); @@ -317,6 +332,13 @@ impl<'a> StringReader<'a> { } } + fn ident(&self, start: BytePos) -> TokenKind { + let sym = nfc_normalize(self.str_from(start)); + let span = self.mk_sp(start, self.pos); + self.sess.symbol_gallery.insert(sym, span); + token::Ident(sym, false) + } + fn struct_fatal_span_char( &self, from_pos: BytePos, diff --git a/compiler/rustc_parse/src/lexer/tokentrees.rs b/compiler/rustc_parse/src/lexer/tokentrees.rs index 318a29985..07910113d 100644 --- a/compiler/rustc_parse/src/lexer/tokentrees.rs +++ b/compiler/rustc_parse/src/lexer/tokentrees.rs @@ -198,7 +198,7 @@ impl<'a> TokenTreesReader<'a> { // An unexpected closing delimiter (i.e., there is no // matching opening delimiter). let token_str = token_to_string(&self.token); - let msg = format!("unexpected closing delimiter: `{}`", token_str); + let msg = format!("unexpected closing delimiter: `{token_str}`"); let mut err = self.string_reader.sess.span_diagnostic.struct_span_err(self.token.span, msg); report_suspicious_mismatch_block( diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index 461a34b67..b659c40b2 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -27,7 +27,7 @@ pub(crate) fn emit_unescape_error( lit, span_with_quotes, mode, range, error ); let last_char = || { - let c = lit[range.clone()].chars().rev().next().unwrap(); + let c = lit[range.clone()].chars().next_back().unwrap(); let span = span.with_lo(span.hi() - BytePos(c.len_utf8() as u32)); (c, span) }; @@ -80,20 +80,14 @@ pub(crate) fn emit_unescape_error( let sugg = sugg.unwrap_or_else(|| { let prefix = mode.prefix_noraw(); let mut escaped = String::with_capacity(lit.len()); - let mut chrs = lit.chars().peekable(); - while let Some(first) = chrs.next() { - match (first, chrs.peek()) { - ('\\', Some('"')) => { - escaped.push('\\'); - escaped.push('"'); - chrs.next(); - } - ('"', _) => { - escaped.push('\\'); - escaped.push('"') - } - (c, _) => escaped.push(c), - }; + let mut in_escape = false; + for c in lit.chars() { + match c { + '\\' => in_escape = !in_escape, + '"' if !in_escape => escaped.push('\\'), + _ => in_escape = false, + } + escaped.push(c); } let sugg = format!("{prefix}\"{escaped}\""); MoreThanOneCharSugg::Quotes { @@ -135,7 +129,7 @@ pub(crate) fn emit_unescape_error( "unknown character escape" }; let ec = escaped_char(c); - let mut diag = handler.struct_span_err(span, format!("{}: `{}`", label, ec)); + let mut diag = handler.struct_span_err(span, format!("{label}: `{ec}`")); diag.span_label(span, label); if c == '{' || c == '}' && matches!(mode, Mode::Str | Mode::RawStr) { diag.help( @@ -151,7 +145,7 @@ pub(crate) fn emit_unescape_error( diag.span_suggestion( span_with_quotes, "if you meant to write a literal backslash (perhaps escaping in a regular expression), consider a raw string literal", - format!("r\"{}\"", lit), + format!("r\"{lit}\""), Applicability::MaybeIncorrect, ); } @@ -180,21 +174,20 @@ pub(crate) fn emit_unescape_error( Mode::RawByteStr => "raw byte string literal", _ => panic!("non-is_byte literal paired with NonAsciiCharInByte"), }; - let mut err = handler.struct_span_err(span, format!("non-ASCII character in {}", desc)); + let mut err = handler.struct_span_err(span, format!("non-ASCII character in {desc}")); let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 { - format!(" but is {:?}", c) + format!(" but is {c:?}") } else { String::new() }; - err.span_label(span, format!("must be ASCII{}", postfix)); + err.span_label(span, format!("must be ASCII{postfix}")); // Note: the \\xHH suggestions are not given for raw byte string // literals, because they are araw and so cannot use any escapes. if (c as u32) <= 0xFF && mode != Mode::RawByteStr { err.span_suggestion( span, format!( - "if you meant to use the unicode code point for {:?}, use a \\xHH escape", - c + "if you meant to use the unicode code point for {c:?}, use a \\xHH escape" ), format!("\\x{:X}", c as u32), Applicability::MaybeIncorrect, @@ -206,7 +199,7 @@ pub(crate) fn emit_unescape_error( utf8.push(c); err.span_suggestion( span, - format!("if you meant to use the UTF-8 encoding of {:?}, use \\xHH escapes", c), + format!("if you meant to use the UTF-8 encoding of {c:?}, use \\xHH escapes"), utf8.as_bytes() .iter() .map(|b: &u8| format!("\\x{:X}", *b)) diff --git a/compiler/rustc_parse/src/lexer/unicode_chars.rs b/compiler/rustc_parse/src/lexer/unicode_chars.rs index 829d9693e..bbfb160eb 100644 --- a/compiler/rustc_parse/src/lexer/unicode_chars.rs +++ b/compiler/rustc_parse/src/lexer/unicode_chars.rs @@ -349,7 +349,7 @@ pub(super) fn check_for_substitution( let span = Span::with_root_ctxt(pos, pos + Pos::from_usize(ch.len_utf8() * count)); let Some((_, ascii_name, token)) = ASCII_ARRAY.iter().find(|&&(s, _, _)| s == ascii_str) else { - let msg = format!("substitution character not found for '{}'", ch); + let msg = format!("substitution character not found for '{ch}'"); reader.sess.span_diagnostic.span_bug_no_panic(span, msg); return (None, None); }; |