From 3e3e70d529d8c7d7c4d7bc4fefc9f109393b9245 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:19:43 +0200 Subject: Merging upstream version 1.69.0+dfsg1. Signed-off-by: Daniel Baumann --- compiler/rustc_parse/src/lexer/diagnostics.rs | 119 ++++++++++ compiler/rustc_parse/src/lexer/mod.rs | 189 ++++++---------- compiler/rustc_parse/src/lexer/tokentrees.rs | 150 +++++-------- .../src/lexer/unescape_error_reporting.rs | 248 ++++++--------------- compiler/rustc_parse/src/lexer/unicode_chars.rs | 60 +++-- 5 files changed, 349 insertions(+), 417 deletions(-) create mode 100644 compiler/rustc_parse/src/lexer/diagnostics.rs (limited to 'compiler/rustc_parse/src/lexer') diff --git a/compiler/rustc_parse/src/lexer/diagnostics.rs b/compiler/rustc_parse/src/lexer/diagnostics.rs new file mode 100644 index 000000000..27f4428d3 --- /dev/null +++ b/compiler/rustc_parse/src/lexer/diagnostics.rs @@ -0,0 +1,119 @@ +use super::UnmatchedDelim; +use rustc_ast::token::Delimiter; +use rustc_errors::Diagnostic; +use rustc_span::source_map::SourceMap; +use rustc_span::Span; + +#[derive(Default)] +pub struct TokenTreeDiagInfo { + /// Stack of open delimiters and their spans. Used for error message. + pub open_braces: Vec<(Delimiter, Span)>, + pub unmatched_delims: Vec, + + /// Used only for error recovery when arriving to EOF with mismatched braces. + pub last_unclosed_found_span: Option, + + /// Collect empty block spans that might have been auto-inserted by editors. + pub empty_block_spans: Vec, + + /// Collect the spans of braces (Open, Close). Used only + /// for detecting if blocks are empty and only braces. + pub matching_block_spans: Vec<(Span, Span)>, +} + +pub fn same_identation_level(sm: &SourceMap, open_sp: Span, close_sp: Span) -> bool { + match (sm.span_to_margin(open_sp), sm.span_to_margin(close_sp)) { + (Some(open_padding), Some(close_padding)) => open_padding == close_padding, + _ => false, + } +} + +// When we get a `)` or `]` for `{`, we should emit help message here +// it's more friendly compared to report `unmatched error` in later phase +pub fn report_missing_open_delim( + err: &mut Diagnostic, + unmatched_delims: &[UnmatchedDelim], +) -> bool { + let mut reported_missing_open = false; + for unmatch_brace in unmatched_delims.iter() { + if let Some(delim) = unmatch_brace.found_delim + && matches!(delim, Delimiter::Parenthesis | Delimiter::Bracket) + { + let missed_open = match delim { + Delimiter::Parenthesis => "(", + Delimiter::Bracket => "[", + _ => unreachable!(), + }; + err.span_label( + unmatch_brace.found_span.shrink_to_lo(), + format!("missing open `{}` for this delimiter", missed_open), + ); + reported_missing_open = true; + } + } + reported_missing_open +} + +pub fn report_suspicious_mismatch_block( + err: &mut Diagnostic, + diag_info: &TokenTreeDiagInfo, + sm: &SourceMap, + delim: Delimiter, +) { + if report_missing_open_delim(err, &diag_info.unmatched_delims) { + return; + } + + let mut matched_spans: Vec<(Span, bool)> = diag_info + .matching_block_spans + .iter() + .map(|&(open, close)| (open.with_hi(close.lo()), same_identation_level(sm, open, close))) + .collect(); + + // sort by `lo`, so the large block spans in the front + matched_spans.sort_by(|a, b| a.0.lo().cmp(&b.0.lo())); + + // We use larger block whose identation is well to cover those inner mismatched blocks + // O(N^2) here, but we are on error reporting path, so it is fine + for i in 0..matched_spans.len() { + let (block_span, same_ident) = matched_spans[i]; + if same_ident { + for j in i + 1..matched_spans.len() { + let (inner_block, inner_same_ident) = matched_spans[j]; + if block_span.contains(inner_block) && !inner_same_ident { + matched_spans[j] = (inner_block, true); + } + } + } + } + + // Find the inner-most span candidate for final report + let candidate_span = + matched_spans.into_iter().rev().find(|&(_, same_ident)| !same_ident).map(|(span, _)| span); + + if let Some(block_span) = candidate_span { + err.span_label(block_span.shrink_to_lo(), "this delimiter might not be properly closed..."); + err.span_label( + block_span.shrink_to_hi(), + "...as it matches this but it has different indentation", + ); + + // If there is a empty block in the mismatched span, note it + if delim == Delimiter::Brace { + for span in diag_info.empty_block_spans.iter() { + if block_span.contains(*span) { + err.span_label(*span, "block is empty, you might have not meant to close it"); + break; + } + } + } + } else { + // If there is no suspicious span, give the last properly closed block may help + if let Some(parent) = diag_info.matching_block_spans.last() + && diag_info.open_braces.last().is_none() + && diag_info.empty_block_spans.iter().all(|&sp| sp != parent.0.to(parent.1)) { + err.span_label(parent.0, "this opening brace..."); + err.span_label(parent.1, "...matches this closing brace"); + } + } +} diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 9fe8d9836..59958a309 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -1,11 +1,11 @@ +use crate::errors; use crate::lexer::unicode_chars::UNICODE_ARRAY; +use crate::make_unclosed_delims_error; use rustc_ast::ast::{self, AttrStyle}; use rustc_ast::token::{self, CommentKind, Delimiter, Token, TokenKind}; use rustc_ast::tokenstream::TokenStream; use rustc_ast::util::unicode::contains_text_flow_control_chars; -use rustc_errors::{ - error_code, Applicability, DiagnosticBuilder, ErrorGuaranteed, PResult, StashKey, -}; +use rustc_errors::{error_code, Applicability, Diagnostic, DiagnosticBuilder, StashKey}; use rustc_lexer::unescape::{self, Mode}; use rustc_lexer::Cursor; use rustc_lexer::{Base, DocStyle, RawStrError}; @@ -17,6 +17,7 @@ use rustc_session::parse::ParseSess; use rustc_span::symbol::{sym, Symbol}; use rustc_span::{edition::Edition, BytePos, Pos, Span}; +mod diagnostics; mod tokentrees; mod unescape_error_reporting; mod unicode_chars; @@ -31,7 +32,7 @@ use unescape_error_reporting::{emit_unescape_error, escaped_char}; rustc_data_structures::static_assert_size!(rustc_lexer::Token, 12); #[derive(Clone, Debug)] -pub struct UnmatchedBrace { +pub struct UnmatchedDelim { pub expected_delim: Delimiter, pub found_delim: Option, pub found_span: Span, @@ -44,7 +45,7 @@ pub(crate) fn parse_token_trees<'a>( mut src: &'a str, mut start_pos: BytePos, override_span: Option, -) -> (PResult<'a, TokenStream>, Vec) { +) -> Result> { // Skip `#!`, if present. if let Some(shebang_len) = rustc_lexer::strip_shebang(src) { src = &src[shebang_len..]; @@ -61,7 +62,29 @@ pub(crate) fn parse_token_trees<'a>( override_span, nbsp_is_whitespace: false, }; - tokentrees::TokenTreesReader::parse_all_token_trees(string_reader) + let (token_trees, unmatched_delims) = + tokentrees::TokenTreesReader::parse_all_token_trees(string_reader); + match token_trees { + Ok(stream) if unmatched_delims.is_empty() => Ok(stream), + _ => { + // Return error if there are unmatched delimiters or unclosng delimiters. + // We emit delimiter mismatch errors first, then emit the unclosing delimiter mismatch + // because the delimiter mismatch is more likely to be the root cause of error + + let mut buffer = Vec::with_capacity(1); + // Not using `emit_unclosed_delims` to use `db.buffer` + for unmatched in unmatched_delims { + if let Some(err) = make_unclosed_delims_error(unmatched, &sess) { + err.buffer(&mut buffer); + } + } + if let Err(err) = token_trees { + // Add unclosing delimiter error + err.buffer(&mut buffer); + } + Err(buffer) + } + } } struct StringReader<'a> { @@ -150,7 +173,7 @@ impl<'a> StringReader<'a> { let span = self.mk_sp(start, self.pos); self.sess.symbol_gallery.insert(sym, span); if !sym.can_be_raw() { - self.err_span(span, &format!("`{}` cannot be a raw identifier", sym)); + self.sess.emit_err(errors::CannotBeRawIdent { span, ident: sym }); } self.sess.raw_identifier_spans.borrow_mut().push(span); token::Ident(sym, true) @@ -261,27 +284,24 @@ impl<'a> StringReader<'a> { self.nbsp_is_whitespace = true; } let repeats = it.take_while(|c1| *c1 == c).count(); - let mut err = - self.struct_err_span_char(start, self.pos + Pos::from_usize(repeats * c.len_utf8()), "unknown start of token", c); // FIXME: the lexer could be used to turn the ASCII version of unicode // homoglyphs, instead of keeping a table in `check_for_substitution`into the // token. Ideally, this should be inside `rustc_lexer`. However, we should // first remove compound tokens like `<<` from `rustc_lexer`, and then add // fancier error recovery to it, as there will be less overall work to do this // way. - let token = unicode_chars::check_for_substitution(self, start, c, &mut err, repeats+1); - if c == '\x00' { - err.help("source files must contain UTF-8 encoded text, unexpected null bytes might occur when a different encoding is used"); - } - if repeats > 0 { - if repeats == 1 { - err.note(format!("character appears once more")); - } else { - err.note(format!("character appears {repeats} more times")); - } - swallow_next_invalid = repeats; - } - err.emit(); + let (token, sugg) = unicode_chars::check_for_substitution(self, start, c, repeats+1); + self.sess.emit_err(errors::UnknownTokenStart { + span: self.mk_sp(start, self.pos + Pos::from_usize(repeats * c.len_utf8())), + escaped: escaped_char(c), + sugg, + null: if c == '\x00' {Some(errors::UnknownTokenNull)} else {None}, + repeat: if repeats > 0 { + swallow_next_invalid = repeats; + Some(errors::UnknownTokenRepeat { repeats }) + } else {None} + }); + if let Some(token) = token { token } else { @@ -296,26 +316,6 @@ impl<'a> StringReader<'a> { } } - /// Report a fatal lexical error with a given span. - fn fatal_span(&self, sp: Span, m: &str) -> ! { - self.sess.span_diagnostic.span_fatal(sp, m) - } - - /// Report a lexical error with a given span. - fn err_span(&self, sp: Span, m: &str) { - self.sess.span_diagnostic.struct_span_err(sp, m).emit(); - } - - /// Report a fatal error spanning [`from_pos`, `to_pos`). - fn fatal_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) -> ! { - self.fatal_span(self.mk_sp(from_pos, to_pos), m) - } - - /// Report a lexical error spanning [`from_pos`, `to_pos`). - fn err_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) { - self.err_span(self.mk_sp(from_pos, to_pos), m) - } - fn struct_fatal_span_char( &self, from_pos: BytePos, @@ -328,18 +328,6 @@ impl<'a> StringReader<'a> { .struct_span_fatal(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c))) } - fn struct_err_span_char( - &self, - from_pos: BytePos, - to_pos: BytePos, - m: &str, - c: char, - ) -> DiagnosticBuilder<'a, ErrorGuaranteed> { - self.sess - .span_diagnostic - .struct_span_err(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c))) - } - /// Detect usages of Unicode codepoints changing the direction of the text on screen and loudly /// complain about it. fn lint_unicode_text_flow(&self, start: BytePos) { @@ -367,14 +355,12 @@ impl<'a> StringReader<'a> { ) -> TokenKind { if content.contains('\r') { for (idx, _) in content.char_indices().filter(|&(_, c)| c == '\r') { - self.err_span_( + let span = self.mk_sp( content_start + BytePos(idx as u32), content_start + BytePos(idx as u32 + 1), - match comment_kind { - CommentKind::Line => "bare CR not allowed in doc-comment", - CommentKind::Block => "bare CR not allowed in block doc-comment", - }, ); + let block = matches!(comment_kind, CommentKind::Block); + self.sess.emit_err(errors::CrDocComment { span, block }); } } @@ -453,26 +439,20 @@ impl<'a> StringReader<'a> { } rustc_lexer::LiteralKind::Int { base, empty_int } => { if empty_int { - self.sess - .span_diagnostic - .struct_span_err_with_code( - self.mk_sp(start, end), - "no valid digits found for number", - error_code!(E0768), - ) - .emit(); + let span = self.mk_sp(start, end); + self.sess.emit_err(errors::NoDigitsLiteral { span }); (token::Integer, sym::integer(0)) } else { if matches!(base, Base::Binary | Base::Octal) { let base = base as u32; let s = self.str_from_to(start + BytePos(2), end); for (idx, c) in s.char_indices() { + let span = self.mk_sp( + start + BytePos::from_usize(2 + idx), + start + BytePos::from_usize(2 + idx + c.len_utf8()), + ); if c != '_' && c.to_digit(base).is_none() { - self.err_span_( - start + BytePos::from_usize(2 + idx), - start + BytePos::from_usize(2 + idx + c.len_utf8()), - &format!("invalid digit for a base {} literal", base), - ); + self.sess.emit_err(errors::InvalidDigitLiteral { span, base }); } } } @@ -481,19 +461,18 @@ impl<'a> StringReader<'a> { } rustc_lexer::LiteralKind::Float { base, empty_exponent } => { if empty_exponent { - self.err_span_(start, self.pos, "expected at least one digit in exponent"); + let span = self.mk_sp(start, self.pos); + self.sess.emit_err(errors::EmptyExponentFloat { span }); } - match base { - Base::Hexadecimal => { - self.err_span_(start, end, "hexadecimal float literal is not supported") - } - Base::Octal => { - self.err_span_(start, end, "octal float literal is not supported") - } - Base::Binary => { - self.err_span_(start, end, "binary float literal is not supported") - } - _ => {} + let base = match base { + Base::Hexadecimal => Some("hexadecimal"), + Base::Octal => Some("octal"), + Base::Binary => Some("binary"), + _ => None, + }; + if let Some(base) = base { + let span = self.mk_sp(start, end); + self.sess.emit_err(errors::FloatLiteralUnsupportedBase { span, base }); } (token::Float, self.symbol_from_to(start, end)) } @@ -643,54 +622,34 @@ impl<'a> StringReader<'a> { // identifier tokens. fn report_unknown_prefix(&self, start: BytePos) { let prefix_span = self.mk_sp(start, self.pos); - let prefix_str = self.str_from_to(start, self.pos); - let msg = format!("prefix `{}` is unknown", prefix_str); + let prefix = self.str_from_to(start, self.pos); let expn_data = prefix_span.ctxt().outer_expn_data(); if expn_data.edition >= Edition::Edition2021 { // In Rust 2021, this is a hard error. - let mut err = self.sess.span_diagnostic.struct_span_err(prefix_span, &msg); - err.span_label(prefix_span, "unknown prefix"); - if prefix_str == "rb" { - err.span_suggestion_verbose( - prefix_span, - "use `br` for a raw byte string", - "br", - Applicability::MaybeIncorrect, - ); + let sugg = if prefix == "rb" { + Some(errors::UnknownPrefixSugg::UseBr(prefix_span)) } else if expn_data.is_root() { - err.span_suggestion_verbose( - prefix_span.shrink_to_hi(), - "consider inserting whitespace here", - " ", - Applicability::MaybeIncorrect, - ); - } - err.note("prefixed identifiers and literals are reserved since Rust 2021"); - err.emit(); + Some(errors::UnknownPrefixSugg::Whitespace(prefix_span.shrink_to_hi())) + } else { + None + }; + self.sess.emit_err(errors::UnknownPrefix { span: prefix_span, prefix, sugg }); } else { // Before Rust 2021, only emit a lint for migration. self.sess.buffer_lint_with_diagnostic( &RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, prefix_span, ast::CRATE_NODE_ID, - &msg, + &format!("prefix `{prefix}` is unknown"), BuiltinLintDiagnostics::ReservedPrefix(prefix_span), ); } } - fn report_too_many_hashes(&self, start: BytePos, found: u32) -> ! { - self.fatal_span_( - start, - self.pos, - &format!( - "too many `#` symbols: raw strings may be delimited \ - by up to 255 `#` symbols, but found {}", - found - ), - ) + fn report_too_many_hashes(&self, start: BytePos, num: u32) -> ! { + self.sess.emit_fatal(errors::TooManyHashes { span: self.mk_sp(start, self.pos), num }); } fn cook_quoted( diff --git a/compiler/rustc_parse/src/lexer/tokentrees.rs b/compiler/rustc_parse/src/lexer/tokentrees.rs index b2701817d..36fd1e37d 100644 --- a/compiler/rustc_parse/src/lexer/tokentrees.rs +++ b/compiler/rustc_parse/src/lexer/tokentrees.rs @@ -1,47 +1,31 @@ -use super::{StringReader, UnmatchedBrace}; +use super::diagnostics::report_suspicious_mismatch_block; +use super::diagnostics::same_identation_level; +use super::diagnostics::TokenTreeDiagInfo; +use super::{StringReader, UnmatchedDelim}; use rustc_ast::token::{self, Delimiter, Token}; use rustc_ast::tokenstream::{DelimSpan, Spacing, TokenStream, TokenTree}; use rustc_ast_pretty::pprust::token_to_string; -use rustc_data_structures::fx::FxHashMap; use rustc_errors::{PErr, PResult}; -use rustc_span::Span; pub(super) struct TokenTreesReader<'a> { string_reader: StringReader<'a>, /// The "next" token, which has been obtained from the `StringReader` but /// not yet handled by the `TokenTreesReader`. token: Token, - /// Stack of open delimiters and their spans. Used for error message. - open_braces: Vec<(Delimiter, Span)>, - unmatched_braces: Vec, - /// The type and spans for all braces - /// - /// Used only for error recovery when arriving to EOF with mismatched braces. - matching_delim_spans: Vec<(Delimiter, Span, Span)>, - last_unclosed_found_span: Option, - /// Collect empty block spans that might have been auto-inserted by editors. - last_delim_empty_block_spans: FxHashMap, - /// Collect the spans of braces (Open, Close). Used only - /// for detecting if blocks are empty and only braces. - matching_block_spans: Vec<(Span, Span)>, + diag_info: TokenTreeDiagInfo, } impl<'a> TokenTreesReader<'a> { pub(super) fn parse_all_token_trees( string_reader: StringReader<'a>, - ) -> (PResult<'a, TokenStream>, Vec) { + ) -> (PResult<'a, TokenStream>, Vec) { let mut tt_reader = TokenTreesReader { string_reader, token: Token::dummy(), - open_braces: Vec::new(), - unmatched_braces: Vec::new(), - matching_delim_spans: Vec::new(), - last_unclosed_found_span: None, - last_delim_empty_block_spans: FxHashMap::default(), - matching_block_spans: Vec::new(), + diag_info: TokenTreeDiagInfo::default(), }; let res = tt_reader.parse_token_trees(/* is_delimited */ false); - (res, tt_reader.unmatched_braces) + (res, tt_reader.diag_info.unmatched_delims) } // Parse a stream of tokens into a list of `TokenTree`s. @@ -50,7 +34,7 @@ impl<'a> TokenTreesReader<'a> { let mut buf = Vec::new(); loop { match self.token.kind { - token::OpenDelim(delim) => buf.push(self.parse_token_tree_open_delim(delim)), + token::OpenDelim(delim) => buf.push(self.parse_token_tree_open_delim(delim)?), token::CloseDelim(delim) => { return if is_delimited { Ok(TokenStream::new(buf)) @@ -59,10 +43,11 @@ impl<'a> TokenTreesReader<'a> { }; } token::Eof => { - if is_delimited { - self.eof_err().emit(); - } - return Ok(TokenStream::new(buf)); + return if is_delimited { + Err(self.eof_err()) + } else { + Ok(TokenStream::new(buf)) + }; } _ => { // Get the next normal token. This might require getting multiple adjacent @@ -92,9 +77,9 @@ impl<'a> TokenTreesReader<'a> { fn eof_err(&mut self) -> PErr<'a> { let msg = "this file contains an unclosed delimiter"; let mut err = self.string_reader.sess.span_diagnostic.struct_span_err(self.token.span, msg); - for &(_, sp) in &self.open_braces { + for &(_, sp) in &self.diag_info.open_braces { err.span_label(sp, "unclosed delimiter"); - self.unmatched_braces.push(UnmatchedBrace { + self.diag_info.unmatched_delims.push(UnmatchedDelim { expected_delim: Delimiter::Brace, found_delim: None, found_span: self.token.span, @@ -103,69 +88,53 @@ impl<'a> TokenTreesReader<'a> { }); } - if let Some((delim, _)) = self.open_braces.last() { - if let Some((_, open_sp, close_sp)) = - self.matching_delim_spans.iter().find(|(d, open_sp, close_sp)| { - let sm = self.string_reader.sess.source_map(); - if let Some(close_padding) = sm.span_to_margin(*close_sp) { - if let Some(open_padding) = sm.span_to_margin(*open_sp) { - return delim == d && close_padding != open_padding; - } - } - false - }) - // these are in reverse order as they get inserted on close, but - { - // we want the last open/first close - err.span_label(*open_sp, "this delimiter might not be properly closed..."); - err.span_label(*close_sp, "...as it matches this but it has different indentation"); - } + if let Some((delim, _)) = self.diag_info.open_braces.last() { + report_suspicious_mismatch_block( + &mut err, + &self.diag_info, + &self.string_reader.sess.source_map(), + *delim, + ) } err } - fn parse_token_tree_open_delim(&mut self, open_delim: Delimiter) -> TokenTree { + fn parse_token_tree_open_delim(&mut self, open_delim: Delimiter) -> PResult<'a, TokenTree> { // The span for beginning of the delimited section let pre_span = self.token.span; - self.open_braces.push((open_delim, self.token.span)); + self.diag_info.open_braces.push((open_delim, self.token.span)); // Parse the token trees within the delimiters. // We stop at any delimiter so we can try to recover if the user // uses an incorrect delimiter. - let tts = self.parse_token_trees(/* is_delimited */ true).unwrap(); + let tts = self.parse_token_trees(/* is_delimited */ true)?; // Expand to cover the entire delimited token tree let delim_span = DelimSpan::from_pair(pre_span, self.token.span); + let sm = self.string_reader.sess.source_map(); match self.token.kind { // Correct delimiter. token::CloseDelim(close_delim) if close_delim == open_delim => { - let (open_brace, open_brace_span) = self.open_braces.pop().unwrap(); + let (open_brace, open_brace_span) = self.diag_info.open_braces.pop().unwrap(); let close_brace_span = self.token.span; - if tts.is_empty() { + if tts.is_empty() && close_delim == Delimiter::Brace { let empty_block_span = open_brace_span.to(close_brace_span); - let sm = self.string_reader.sess.source_map(); if !sm.is_multiline(empty_block_span) { // Only track if the block is in the form of `{}`, otherwise it is // likely that it was written on purpose. - self.last_delim_empty_block_spans.insert(open_delim, empty_block_span); + self.diag_info.empty_block_spans.push(empty_block_span); } } - //only add braces + // only add braces if let (Delimiter::Brace, Delimiter::Brace) = (open_brace, open_delim) { - self.matching_block_spans.push((open_brace_span, close_brace_span)); + // Add all the matching spans, we will sort by span later + self.diag_info.matching_block_spans.push((open_brace_span, close_brace_span)); } - if self.open_braces.is_empty() { - // Clear up these spans to avoid suggesting them as we've found - // properly matched delimiters so far for an entire block. - self.matching_delim_spans.clear(); - } else { - self.matching_delim_spans.push((open_brace, open_brace_span, close_brace_span)); - } // Move past the closing delimiter. self.token = self.string_reader.next_token().0; } @@ -174,28 +143,25 @@ impl<'a> TokenTreesReader<'a> { let mut unclosed_delimiter = None; let mut candidate = None; - if self.last_unclosed_found_span != Some(self.token.span) { + if self.diag_info.last_unclosed_found_span != Some(self.token.span) { // do not complain about the same unclosed delimiter multiple times - self.last_unclosed_found_span = Some(self.token.span); + self.diag_info.last_unclosed_found_span = Some(self.token.span); // This is a conservative error: only report the last unclosed // delimiter. The previous unclosed delimiters could actually be // closed! The parser just hasn't gotten to them yet. - if let Some(&(_, sp)) = self.open_braces.last() { + if let Some(&(_, sp)) = self.diag_info.open_braces.last() { unclosed_delimiter = Some(sp); }; - let sm = self.string_reader.sess.source_map(); - if let Some(current_padding) = sm.span_to_margin(self.token.span) { - for (brace, brace_span) in &self.open_braces { - if let Some(padding) = sm.span_to_margin(*brace_span) { - // high likelihood of these two corresponding - if current_padding == padding && brace == &close_delim { - candidate = Some(*brace_span); - } - } + for (brace, brace_span) in &self.diag_info.open_braces { + if same_identation_level(&sm, self.token.span, *brace_span) + && brace == &close_delim + { + // high likelihood of these two corresponding + candidate = Some(*brace_span); } } - let (tok, _) = self.open_braces.pop().unwrap(); - self.unmatched_braces.push(UnmatchedBrace { + let (tok, _) = self.diag_info.open_braces.pop().unwrap(); + self.diag_info.unmatched_delims.push(UnmatchedDelim { expected_delim: tok, found_delim: Some(close_delim), found_span: self.token.span, @@ -203,7 +169,7 @@ impl<'a> TokenTreesReader<'a> { candidate_span: candidate, }); } else { - self.open_braces.pop(); + self.diag_info.open_braces.pop(); } // If the incorrect delimiter matches an earlier opening @@ -213,7 +179,7 @@ impl<'a> TokenTreesReader<'a> { // fn foo() { // bar(baz( // } // Incorrect delimiter but matches the earlier `{` - if !self.open_braces.iter().any(|&(b, _)| b == close_delim) { + if !self.diag_info.open_braces.iter().any(|&(b, _)| b == close_delim) { self.token = self.string_reader.next_token().0; } } @@ -225,7 +191,7 @@ impl<'a> TokenTreesReader<'a> { _ => unreachable!(), } - TokenTree::Delimited(delim_span, open_delim, tts) + Ok(TokenTree::Delimited(delim_span, open_delim, tts)) } fn close_delim_err(&mut self, delim: Delimiter) -> PErr<'a> { @@ -236,22 +202,12 @@ impl<'a> TokenTreesReader<'a> { let mut err = self.string_reader.sess.span_diagnostic.struct_span_err(self.token.span, &msg); - // Braces are added at the end, so the last element is the biggest block - if let Some(parent) = self.matching_block_spans.last() { - if let Some(span) = self.last_delim_empty_block_spans.remove(&delim) { - // Check if the (empty block) is in the last properly closed block - if (parent.0.to(parent.1)).contains(span) { - err.span_label(span, "block is empty, you might have not meant to close it"); - } else { - err.span_label(parent.0, "this opening brace..."); - err.span_label(parent.1, "...matches this closing brace"); - } - } else { - err.span_label(parent.0, "this opening brace..."); - err.span_label(parent.1, "...matches this closing brace"); - } - } - + report_suspicious_mismatch_block( + &mut err, + &self.diag_info, + &self.string_reader.sess.source_map(), + delim, + ); err.span_label(self.token.span, "unexpected closing delimiter"); err } diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index 6373f5b4f..0d12ec608 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -3,10 +3,12 @@ use std::iter::once; use std::ops::Range; -use rustc_errors::{pluralize, Applicability, Handler}; +use rustc_errors::{Applicability, Handler}; use rustc_lexer::unescape::{EscapeError, Mode}; use rustc_span::{BytePos, Span}; +use crate::errors::{MoreThanOneCharNote, MoreThanOneCharSugg, NoBraceUnicodeSub, UnescapeError}; + pub(crate) fn emit_unescape_error( handler: &Handler, // interior part of the literal, without quotes @@ -31,53 +33,32 @@ pub(crate) fn emit_unescape_error( }; match error { EscapeError::LoneSurrogateUnicodeEscape => { - handler - .struct_span_err(span, "invalid unicode character escape") - .span_label(span, "invalid escape") - .help("unicode escape must not be a surrogate") - .emit(); + handler.emit_err(UnescapeError::InvalidUnicodeEscape { span, surrogate: true }); } EscapeError::OutOfRangeUnicodeEscape => { - handler - .struct_span_err(span, "invalid unicode character escape") - .span_label(span, "invalid escape") - .help("unicode escape must be at most 10FFFF") - .emit(); + handler.emit_err(UnescapeError::InvalidUnicodeEscape { span, surrogate: false }); } EscapeError::MoreThanOneChar => { use unicode_normalization::{char::is_combining_mark, UnicodeNormalization}; + let mut sugg = None; + let mut note = None; - let mut has_help = false; - let mut handler = handler.struct_span_err( - span_with_quotes, - "character literal may only contain one codepoint", - ); - - if lit.chars().skip(1).all(|c| is_combining_mark(c)) { - let escaped_marks = - lit.chars().skip(1).map(|c| c.escape_default().to_string()).collect::>(); - handler.span_note( - span, - &format!( - "this `{}` is followed by the combining mark{} `{}`", - lit.chars().next().unwrap(), - pluralize!(escaped_marks.len()), - escaped_marks.join(""), - ), - ); + let lit_chars = lit.chars().collect::>(); + let (first, rest) = lit_chars.split_first().unwrap(); + if rest.iter().copied().all(is_combining_mark) { let normalized = lit.nfc().to_string(); if normalized.chars().count() == 1 { - has_help = true; - handler.span_suggestion( - span, - &format!( - "consider using the normalized form `{}` of this character", - normalized.chars().next().unwrap().escape_default() - ), - normalized, - Applicability::MachineApplicable, - ); + let ch = normalized.chars().next().unwrap().escape_default().to_string(); + sugg = Some(MoreThanOneCharSugg::NormalizedForm { span, ch, normalized }); } + let escaped_marks = + rest.iter().map(|c| c.escape_default().to_string()).collect::>(); + note = Some(MoreThanOneCharNote::AllCombining { + span, + chr: format!("{first}"), + len: escaped_marks.len(), + escaped_marks: escaped_marks.join(""), + }); } else { let printable: Vec = lit .chars() @@ -87,32 +68,18 @@ pub(crate) fn emit_unescape_error( }) .collect(); - if let [ch] = printable.as_slice() { - has_help = true; - - handler.span_note( + if let &[ch] = printable.as_slice() { + sugg = + Some(MoreThanOneCharSugg::RemoveNonPrinting { span, ch: ch.to_string() }); + note = Some(MoreThanOneCharNote::NonPrinting { span, - &format!( - "there are non-printing characters, the full sequence is `{}`", - lit.escape_default(), - ), - ); - - handler.span_suggestion( - span, - "consider removing the non-printing characters", - ch, - Applicability::MaybeIncorrect, - ); + escaped: lit.escape_default().to_string(), + }); } - } - - if !has_help { - let (prefix, msg) = if mode.is_byte() { - ("b", "if you meant to write a byte string literal, use double quotes") - } else { - ("", "if you meant to write a `str` literal, use double quotes") - }; + }; + let sugg = sugg.unwrap_or_else(|| { + let is_byte = mode.is_byte(); + let prefix = if is_byte { "b" } else { "" }; let mut escaped = String::with_capacity(lit.len()); let mut chrs = lit.chars().peekable(); while let Some(first) = chrs.next() { @@ -129,54 +96,32 @@ pub(crate) fn emit_unescape_error( (c, _) => escaped.push(c), }; } - handler.span_suggestion( - span_with_quotes, - msg, - format!("{prefix}\"{escaped}\""), - Applicability::MachineApplicable, - ); - } - - handler.emit(); + let sugg = format!("{prefix}\"{escaped}\""); + MoreThanOneCharSugg::Quotes { span: span_with_quotes, is_byte, sugg } + }); + handler.emit_err(UnescapeError::MoreThanOneChar { + span: span_with_quotes, + note, + suggestion: sugg, + }); } EscapeError::EscapeOnlyChar => { let (c, char_span) = last_char(); - - let msg = if mode.is_byte() { - "byte constant must be escaped" - } else { - "character constant must be escaped" - }; - handler - .struct_span_err(span, &format!("{}: `{}`", msg, escaped_char(c))) - .span_suggestion( - char_span, - "escape the character", - c.escape_default(), - Applicability::MachineApplicable, - ) - .emit(); + handler.emit_err(UnescapeError::EscapeOnlyChar { + span, + char_span, + escaped_sugg: c.escape_default().to_string(), + escaped_msg: escaped_char(c), + byte: mode.is_byte(), + }); } EscapeError::BareCarriageReturn => { - let msg = if mode.in_double_quotes() { - "bare CR not allowed in string, use `\\r` instead" - } else { - "character constant must be escaped: `\\r`" - }; - handler - .struct_span_err(span, msg) - .span_suggestion( - span, - "escape the character", - "\\r", - Applicability::MachineApplicable, - ) - .emit(); + let double_quotes = mode.in_double_quotes(); + handler.emit_err(UnescapeError::BareCr { span, double_quotes }); } EscapeError::BareCarriageReturnInRawString => { assert!(mode.in_double_quotes()); - let msg = "bare CR not allowed in raw string"; - handler.span_err(span, msg); + handler.emit_err(UnescapeError::BareCrRawString(span)); } EscapeError::InvalidEscape => { let (c, span) = last_char(); @@ -213,22 +158,13 @@ pub(crate) fn emit_unescape_error( diag.emit(); } EscapeError::TooShortHexEscape => { - handler.span_err(span, "numeric character escape is too short"); + handler.emit_err(UnescapeError::TooShortHexEscape(span)); } EscapeError::InvalidCharInHexEscape | EscapeError::InvalidCharInUnicodeEscape => { let (c, span) = last_char(); - - let msg = if error == EscapeError::InvalidCharInHexEscape { - "invalid character in numeric character escape" - } else { - "invalid character in unicode escape" - }; - let c = escaped_char(c); - - handler - .struct_span_err(span, &format!("{}: `{}`", msg, c)) - .span_label(span, msg) - .emit(); + let is_hex = error == EscapeError::InvalidCharInHexEscape; + let ch = escaped_char(c); + handler.emit_err(UnescapeError::InvalidCharInEscape { span, is_hex, ch }); } EscapeError::NonAsciiCharInByte => { let (c, span) = last_char(); @@ -278,41 +214,22 @@ pub(crate) fn emit_unescape_error( err.emit(); } EscapeError::OutOfRangeHexEscape => { - handler - .struct_span_err(span, "out of range hex escape") - .span_label(span, "must be a character in the range [\\x00-\\x7f]") - .emit(); + handler.emit_err(UnescapeError::OutOfRangeHexEscape(span)); } EscapeError::LeadingUnderscoreUnicodeEscape => { let (c, span) = last_char(); - let msg = "invalid start of unicode escape"; - handler - .struct_span_err(span, &format!("{}: `{}`", msg, c)) - .span_label(span, msg) - .emit(); + handler.emit_err(UnescapeError::LeadingUnderscoreUnicodeEscape { + span, + ch: escaped_char(c), + }); } EscapeError::OverlongUnicodeEscape => { - handler - .struct_span_err(span, "overlong unicode escape") - .span_label(span, "must have at most 6 hex digits") - .emit(); + handler.emit_err(UnescapeError::OverlongUnicodeEscape(span)); } EscapeError::UnclosedUnicodeEscape => { - handler - .struct_span_err(span, "unterminated unicode escape") - .span_label(span, "missing a closing `}`") - .span_suggestion_verbose( - span.shrink_to_hi(), - "terminate the unicode escape", - "}", - Applicability::MaybeIncorrect, - ) - .emit(); + handler.emit_err(UnescapeError::UnclosedUnicodeEscape(span, span.shrink_to_hi())); } EscapeError::NoBraceInUnicodeEscape => { - let msg = "incorrect unicode escape sequence"; - let mut diag = handler.struct_span_err(span, msg); - let mut suggestion = "\\u{".to_owned(); let mut suggestion_len = 0; let (c, char_span) = last_char(); @@ -322,54 +239,37 @@ pub(crate) fn emit_unescape_error( suggestion_len += c.len_utf8(); } - if suggestion_len > 0 { + let (label, sub) = if suggestion_len > 0 { suggestion.push('}'); let hi = char_span.lo() + BytePos(suggestion_len as u32); - diag.span_suggestion( - span.with_hi(hi), - "format of unicode escape sequences uses braces", - suggestion, - Applicability::MaybeIncorrect, - ); + (None, NoBraceUnicodeSub::Suggestion { span: span.with_hi(hi), suggestion }) } else { - diag.span_label(span, msg); - diag.help("format of unicode escape sequences is `\\u{...}`"); - } - - diag.emit(); + (Some(span), NoBraceUnicodeSub::Help) + }; + handler.emit_err(UnescapeError::NoBraceInUnicodeEscape { span, label, sub }); } EscapeError::UnicodeEscapeInByte => { - let msg = "unicode escape in byte string"; - handler - .struct_span_err(span, msg) - .span_label(span, msg) - .help("unicode escape sequences cannot be used as a byte or in a byte string") - .emit(); + handler.emit_err(UnescapeError::UnicodeEscapeInByte(span)); } EscapeError::EmptyUnicodeEscape => { - handler - .struct_span_err(span, "empty unicode escape") - .span_label(span, "this escape must have at least 1 hex digit") - .emit(); + handler.emit_err(UnescapeError::EmptyUnicodeEscape(span)); } EscapeError::ZeroChars => { - let msg = "empty character literal"; - handler.struct_span_err(span, msg).span_label(span, msg).emit(); + handler.emit_err(UnescapeError::ZeroChars(span)); } EscapeError::LoneSlash => { - let msg = "invalid trailing slash in literal"; - handler.struct_span_err(span, msg).span_label(span, msg).emit(); + handler.emit_err(UnescapeError::LoneSlash(span)); } EscapeError::UnskippedWhitespaceWarning => { let (c, char_span) = last_char(); - let msg = - format!("non-ASCII whitespace symbol '{}' is not skipped", c.escape_unicode()); - handler.struct_span_warn(span, &msg).span_label(char_span, &msg).emit(); + handler.emit_warning(UnescapeError::UnskippedWhitespace { + span, + ch: escaped_char(c), + char_span, + }); } EscapeError::MultipleSkippedLinesWarning => { - let msg = "multiple lines skipped by escaped newline"; - let bottom_msg = "skipping everything up to and including this point"; - handler.struct_span_warn(span, msg).span_label(span, bottom_msg).emit(); + handler.emit_warning(UnescapeError::MultipleSkippedLinesWarning(span)); } } } diff --git a/compiler/rustc_parse/src/lexer/unicode_chars.rs b/compiler/rustc_parse/src/lexer/unicode_chars.rs index 34d003ccf..d4f971d5b 100644 --- a/compiler/rustc_parse/src/lexer/unicode_chars.rs +++ b/compiler/rustc_parse/src/lexer/unicode_chars.rs @@ -2,8 +2,10 @@ //! use super::StringReader; -use crate::token::{self, Delimiter}; -use rustc_errors::{Applicability, Diagnostic}; +use crate::{ + errors::TokenSubstitution, + token::{self, Delimiter}, +}; use rustc_span::{symbol::kw, BytePos, Pos, Span}; #[rustfmt::skip] // for line breaks @@ -338,48 +340,44 @@ pub(super) fn check_for_substitution<'a>( reader: &StringReader<'a>, pos: BytePos, ch: char, - err: &mut Diagnostic, count: usize, -) -> Option { - let &(_, u_name, ascii_str) = UNICODE_ARRAY.iter().find(|&&(c, _, _)| c == ch)?; +) -> (Option, Option) { + let Some(&(_, u_name, ascii_str)) = UNICODE_ARRAY.iter().find(|&&(c, _, _)| c == ch) else { + return (None, None); + }; let span = Span::with_root_ctxt(pos, pos + Pos::from_usize(ch.len_utf8() * count)); let Some((_, ascii_name, token)) = ASCII_ARRAY.iter().find(|&&(s, _, _)| s == ascii_str) else { let msg = format!("substitution character not found for '{}'", ch); reader.sess.span_diagnostic.span_bug_no_panic(span, &msg); - return None; + return (None, None); }; // special help suggestion for "directed" double quotes - if let Some(s) = peek_delimited(&reader.src[reader.src_index(pos)..], '“', '”') { - let msg = format!( - "Unicode characters '“' (Left Double Quotation Mark) and \ - '”' (Right Double Quotation Mark) look like '{}' ({}), but are not", - ascii_str, ascii_name - ); - err.span_suggestion( - Span::with_root_ctxt( - pos, - pos + Pos::from_usize('“'.len_utf8() + s.len() + '”'.len_utf8()), - ), - &msg, - format!("\"{}\"", s), - Applicability::MaybeIncorrect, + let sugg = if let Some(s) = peek_delimited(&reader.src[reader.src_index(pos)..], '“', '”') { + let span = Span::with_root_ctxt( + pos, + pos + Pos::from_usize('“'.len_utf8() + s.len() + '”'.len_utf8()), ); + Some(TokenSubstitution::DirectedQuotes { + span, + suggestion: format!("\"{s}\""), + ascii_str, + ascii_name, + }) } else { - let msg = format!( - "Unicode character '{}' ({}) looks like '{}' ({}), but it is not", - ch, u_name, ascii_str, ascii_name - ); - err.span_suggestion( + let suggestion = ascii_str.to_string().repeat(count); + Some(TokenSubstitution::Other { span, - &msg, - ascii_str.to_string().repeat(count), - Applicability::MaybeIncorrect, - ); - } - token.clone() + suggestion, + ch: ch.to_string(), + u_name, + ascii_str, + ascii_name, + }) + }; + (token.clone(), sugg) } /// Extract string if found at current position with given delimiters -- cgit v1.2.3