summaryrefslogtreecommitdiffstats
path: root/compiler/rustc_parse/src/lexer
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/rustc_parse/src/lexer')
-rw-r--r--compiler/rustc_parse/src/lexer/diagnostics.rs119
-rw-r--r--compiler/rustc_parse/src/lexer/mod.rs189
-rw-r--r--compiler/rustc_parse/src/lexer/tokentrees.rs150
-rw-r--r--compiler/rustc_parse/src/lexer/unescape_error_reporting.rs248
-rw-r--r--compiler/rustc_parse/src/lexer/unicode_chars.rs60
5 files changed, 349 insertions, 417 deletions
diff --git a/compiler/rustc_parse/src/lexer/diagnostics.rs b/compiler/rustc_parse/src/lexer/diagnostics.rs
new file mode 100644
index 000000000..27f4428d3
--- /dev/null
+++ b/compiler/rustc_parse/src/lexer/diagnostics.rs
@@ -0,0 +1,119 @@
+use super::UnmatchedDelim;
+use rustc_ast::token::Delimiter;
+use rustc_errors::Diagnostic;
+use rustc_span::source_map::SourceMap;
+use rustc_span::Span;
+
+#[derive(Default)]
+pub struct TokenTreeDiagInfo {
+ /// Stack of open delimiters and their spans. Used for error message.
+ pub open_braces: Vec<(Delimiter, Span)>,
+ pub unmatched_delims: Vec<UnmatchedDelim>,
+
+ /// Used only for error recovery when arriving to EOF with mismatched braces.
+ pub last_unclosed_found_span: Option<Span>,
+
+ /// Collect empty block spans that might have been auto-inserted by editors.
+ pub empty_block_spans: Vec<Span>,
+
+ /// Collect the spans of braces (Open, Close). Used only
+ /// for detecting if blocks are empty and only braces.
+ pub matching_block_spans: Vec<(Span, Span)>,
+}
+
+pub fn same_identation_level(sm: &SourceMap, open_sp: Span, close_sp: Span) -> bool {
+ match (sm.span_to_margin(open_sp), sm.span_to_margin(close_sp)) {
+ (Some(open_padding), Some(close_padding)) => open_padding == close_padding,
+ _ => false,
+ }
+}
+
+// When we get a `)` or `]` for `{`, we should emit help message here
+// it's more friendly compared to report `unmatched error` in later phase
+pub fn report_missing_open_delim(
+ err: &mut Diagnostic,
+ unmatched_delims: &[UnmatchedDelim],
+) -> bool {
+ let mut reported_missing_open = false;
+ for unmatch_brace in unmatched_delims.iter() {
+ if let Some(delim) = unmatch_brace.found_delim
+ && matches!(delim, Delimiter::Parenthesis | Delimiter::Bracket)
+ {
+ let missed_open = match delim {
+ Delimiter::Parenthesis => "(",
+ Delimiter::Bracket => "[",
+ _ => unreachable!(),
+ };
+ err.span_label(
+ unmatch_brace.found_span.shrink_to_lo(),
+ format!("missing open `{}` for this delimiter", missed_open),
+ );
+ reported_missing_open = true;
+ }
+ }
+ reported_missing_open
+}
+
+pub fn report_suspicious_mismatch_block(
+ err: &mut Diagnostic,
+ diag_info: &TokenTreeDiagInfo,
+ sm: &SourceMap,
+ delim: Delimiter,
+) {
+ if report_missing_open_delim(err, &diag_info.unmatched_delims) {
+ return;
+ }
+
+ let mut matched_spans: Vec<(Span, bool)> = diag_info
+ .matching_block_spans
+ .iter()
+ .map(|&(open, close)| (open.with_hi(close.lo()), same_identation_level(sm, open, close)))
+ .collect();
+
+ // sort by `lo`, so the large block spans in the front
+ matched_spans.sort_by(|a, b| a.0.lo().cmp(&b.0.lo()));
+
+ // We use larger block whose identation is well to cover those inner mismatched blocks
+ // O(N^2) here, but we are on error reporting path, so it is fine
+ for i in 0..matched_spans.len() {
+ let (block_span, same_ident) = matched_spans[i];
+ if same_ident {
+ for j in i + 1..matched_spans.len() {
+ let (inner_block, inner_same_ident) = matched_spans[j];
+ if block_span.contains(inner_block) && !inner_same_ident {
+ matched_spans[j] = (inner_block, true);
+ }
+ }
+ }
+ }
+
+ // Find the inner-most span candidate for final report
+ let candidate_span =
+ matched_spans.into_iter().rev().find(|&(_, same_ident)| !same_ident).map(|(span, _)| span);
+
+ if let Some(block_span) = candidate_span {
+ err.span_label(block_span.shrink_to_lo(), "this delimiter might not be properly closed...");
+ err.span_label(
+ block_span.shrink_to_hi(),
+ "...as it matches this but it has different indentation",
+ );
+
+ // If there is a empty block in the mismatched span, note it
+ if delim == Delimiter::Brace {
+ for span in diag_info.empty_block_spans.iter() {
+ if block_span.contains(*span) {
+ err.span_label(*span, "block is empty, you might have not meant to close it");
+ break;
+ }
+ }
+ }
+ } else {
+ // If there is no suspicious span, give the last properly closed block may help
+ if let Some(parent) = diag_info.matching_block_spans.last()
+ && diag_info.open_braces.last().is_none()
+ && diag_info.empty_block_spans.iter().all(|&sp| sp != parent.0.to(parent.1)) {
+ err.span_label(parent.0, "this opening brace...");
+ err.span_label(parent.1, "...matches this closing brace");
+ }
+ }
+}
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
index 9fe8d9836..59958a309 100644
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@@ -1,11 +1,11 @@
+use crate::errors;
use crate::lexer::unicode_chars::UNICODE_ARRAY;
+use crate::make_unclosed_delims_error;
use rustc_ast::ast::{self, AttrStyle};
use rustc_ast::token::{self, CommentKind, Delimiter, Token, TokenKind};
use rustc_ast::tokenstream::TokenStream;
use rustc_ast::util::unicode::contains_text_flow_control_chars;
-use rustc_errors::{
- error_code, Applicability, DiagnosticBuilder, ErrorGuaranteed, PResult, StashKey,
-};
+use rustc_errors::{error_code, Applicability, Diagnostic, DiagnosticBuilder, StashKey};
use rustc_lexer::unescape::{self, Mode};
use rustc_lexer::Cursor;
use rustc_lexer::{Base, DocStyle, RawStrError};
@@ -17,6 +17,7 @@ use rustc_session::parse::ParseSess;
use rustc_span::symbol::{sym, Symbol};
use rustc_span::{edition::Edition, BytePos, Pos, Span};
+mod diagnostics;
mod tokentrees;
mod unescape_error_reporting;
mod unicode_chars;
@@ -31,7 +32,7 @@ use unescape_error_reporting::{emit_unescape_error, escaped_char};
rustc_data_structures::static_assert_size!(rustc_lexer::Token, 12);
#[derive(Clone, Debug)]
-pub struct UnmatchedBrace {
+pub struct UnmatchedDelim {
pub expected_delim: Delimiter,
pub found_delim: Option<Delimiter>,
pub found_span: Span,
@@ -44,7 +45,7 @@ pub(crate) fn parse_token_trees<'a>(
mut src: &'a str,
mut start_pos: BytePos,
override_span: Option<Span>,
-) -> (PResult<'a, TokenStream>, Vec<UnmatchedBrace>) {
+) -> Result<TokenStream, Vec<Diagnostic>> {
// Skip `#!`, if present.
if let Some(shebang_len) = rustc_lexer::strip_shebang(src) {
src = &src[shebang_len..];
@@ -61,7 +62,29 @@ pub(crate) fn parse_token_trees<'a>(
override_span,
nbsp_is_whitespace: false,
};
- tokentrees::TokenTreesReader::parse_all_token_trees(string_reader)
+ let (token_trees, unmatched_delims) =
+ tokentrees::TokenTreesReader::parse_all_token_trees(string_reader);
+ match token_trees {
+ Ok(stream) if unmatched_delims.is_empty() => Ok(stream),
+ _ => {
+ // Return error if there are unmatched delimiters or unclosng delimiters.
+ // We emit delimiter mismatch errors first, then emit the unclosing delimiter mismatch
+ // because the delimiter mismatch is more likely to be the root cause of error
+
+ let mut buffer = Vec::with_capacity(1);
+ // Not using `emit_unclosed_delims` to use `db.buffer`
+ for unmatched in unmatched_delims {
+ if let Some(err) = make_unclosed_delims_error(unmatched, &sess) {
+ err.buffer(&mut buffer);
+ }
+ }
+ if let Err(err) = token_trees {
+ // Add unclosing delimiter error
+ err.buffer(&mut buffer);
+ }
+ Err(buffer)
+ }
+ }
}
struct StringReader<'a> {
@@ -150,7 +173,7 @@ impl<'a> StringReader<'a> {
let span = self.mk_sp(start, self.pos);
self.sess.symbol_gallery.insert(sym, span);
if !sym.can_be_raw() {
- self.err_span(span, &format!("`{}` cannot be a raw identifier", sym));
+ self.sess.emit_err(errors::CannotBeRawIdent { span, ident: sym });
}
self.sess.raw_identifier_spans.borrow_mut().push(span);
token::Ident(sym, true)
@@ -261,27 +284,24 @@ impl<'a> StringReader<'a> {
self.nbsp_is_whitespace = true;
}
let repeats = it.take_while(|c1| *c1 == c).count();
- let mut err =
- self.struct_err_span_char(start, self.pos + Pos::from_usize(repeats * c.len_utf8()), "unknown start of token", c);
// FIXME: the lexer could be used to turn the ASCII version of unicode
// homoglyphs, instead of keeping a table in `check_for_substitution`into the
// token. Ideally, this should be inside `rustc_lexer`. However, we should
// first remove compound tokens like `<<` from `rustc_lexer`, and then add
// fancier error recovery to it, as there will be less overall work to do this
// way.
- let token = unicode_chars::check_for_substitution(self, start, c, &mut err, repeats+1);
- if c == '\x00' {
- err.help("source files must contain UTF-8 encoded text, unexpected null bytes might occur when a different encoding is used");
- }
- if repeats > 0 {
- if repeats == 1 {
- err.note(format!("character appears once more"));
- } else {
- err.note(format!("character appears {repeats} more times"));
- }
- swallow_next_invalid = repeats;
- }
- err.emit();
+ let (token, sugg) = unicode_chars::check_for_substitution(self, start, c, repeats+1);
+ self.sess.emit_err(errors::UnknownTokenStart {
+ span: self.mk_sp(start, self.pos + Pos::from_usize(repeats * c.len_utf8())),
+ escaped: escaped_char(c),
+ sugg,
+ null: if c == '\x00' {Some(errors::UnknownTokenNull)} else {None},
+ repeat: if repeats > 0 {
+ swallow_next_invalid = repeats;
+ Some(errors::UnknownTokenRepeat { repeats })
+ } else {None}
+ });
+
if let Some(token) = token {
token
} else {
@@ -296,26 +316,6 @@ impl<'a> StringReader<'a> {
}
}
- /// Report a fatal lexical error with a given span.
- fn fatal_span(&self, sp: Span, m: &str) -> ! {
- self.sess.span_diagnostic.span_fatal(sp, m)
- }
-
- /// Report a lexical error with a given span.
- fn err_span(&self, sp: Span, m: &str) {
- self.sess.span_diagnostic.struct_span_err(sp, m).emit();
- }
-
- /// Report a fatal error spanning [`from_pos`, `to_pos`).
- fn fatal_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) -> ! {
- self.fatal_span(self.mk_sp(from_pos, to_pos), m)
- }
-
- /// Report a lexical error spanning [`from_pos`, `to_pos`).
- fn err_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) {
- self.err_span(self.mk_sp(from_pos, to_pos), m)
- }
-
fn struct_fatal_span_char(
&self,
from_pos: BytePos,
@@ -328,18 +328,6 @@ impl<'a> StringReader<'a> {
.struct_span_fatal(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c)))
}
- fn struct_err_span_char(
- &self,
- from_pos: BytePos,
- to_pos: BytePos,
- m: &str,
- c: char,
- ) -> DiagnosticBuilder<'a, ErrorGuaranteed> {
- self.sess
- .span_diagnostic
- .struct_span_err(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c)))
- }
-
/// Detect usages of Unicode codepoints changing the direction of the text on screen and loudly
/// complain about it.
fn lint_unicode_text_flow(&self, start: BytePos) {
@@ -367,14 +355,12 @@ impl<'a> StringReader<'a> {
) -> TokenKind {
if content.contains('\r') {
for (idx, _) in content.char_indices().filter(|&(_, c)| c == '\r') {
- self.err_span_(
+ let span = self.mk_sp(
content_start + BytePos(idx as u32),
content_start + BytePos(idx as u32 + 1),
- match comment_kind {
- CommentKind::Line => "bare CR not allowed in doc-comment",
- CommentKind::Block => "bare CR not allowed in block doc-comment",
- },
);
+ let block = matches!(comment_kind, CommentKind::Block);
+ self.sess.emit_err(errors::CrDocComment { span, block });
}
}
@@ -453,26 +439,20 @@ impl<'a> StringReader<'a> {
}
rustc_lexer::LiteralKind::Int { base, empty_int } => {
if empty_int {
- self.sess
- .span_diagnostic
- .struct_span_err_with_code(
- self.mk_sp(start, end),
- "no valid digits found for number",
- error_code!(E0768),
- )
- .emit();
+ let span = self.mk_sp(start, end);
+ self.sess.emit_err(errors::NoDigitsLiteral { span });
(token::Integer, sym::integer(0))
} else {
if matches!(base, Base::Binary | Base::Octal) {
let base = base as u32;
let s = self.str_from_to(start + BytePos(2), end);
for (idx, c) in s.char_indices() {
+ let span = self.mk_sp(
+ start + BytePos::from_usize(2 + idx),
+ start + BytePos::from_usize(2 + idx + c.len_utf8()),
+ );
if c != '_' && c.to_digit(base).is_none() {
- self.err_span_(
- start + BytePos::from_usize(2 + idx),
- start + BytePos::from_usize(2 + idx + c.len_utf8()),
- &format!("invalid digit for a base {} literal", base),
- );
+ self.sess.emit_err(errors::InvalidDigitLiteral { span, base });
}
}
}
@@ -481,19 +461,18 @@ impl<'a> StringReader<'a> {
}
rustc_lexer::LiteralKind::Float { base, empty_exponent } => {
if empty_exponent {
- self.err_span_(start, self.pos, "expected at least one digit in exponent");
+ let span = self.mk_sp(start, self.pos);
+ self.sess.emit_err(errors::EmptyExponentFloat { span });
}
- match base {
- Base::Hexadecimal => {
- self.err_span_(start, end, "hexadecimal float literal is not supported")
- }
- Base::Octal => {
- self.err_span_(start, end, "octal float literal is not supported")
- }
- Base::Binary => {
- self.err_span_(start, end, "binary float literal is not supported")
- }
- _ => {}
+ let base = match base {
+ Base::Hexadecimal => Some("hexadecimal"),
+ Base::Octal => Some("octal"),
+ Base::Binary => Some("binary"),
+ _ => None,
+ };
+ if let Some(base) = base {
+ let span = self.mk_sp(start, end);
+ self.sess.emit_err(errors::FloatLiteralUnsupportedBase { span, base });
}
(token::Float, self.symbol_from_to(start, end))
}
@@ -643,54 +622,34 @@ impl<'a> StringReader<'a> {
// identifier tokens.
fn report_unknown_prefix(&self, start: BytePos) {
let prefix_span = self.mk_sp(start, self.pos);
- let prefix_str = self.str_from_to(start, self.pos);
- let msg = format!("prefix `{}` is unknown", prefix_str);
+ let prefix = self.str_from_to(start, self.pos);
let expn_data = prefix_span.ctxt().outer_expn_data();
if expn_data.edition >= Edition::Edition2021 {
// In Rust 2021, this is a hard error.
- let mut err = self.sess.span_diagnostic.struct_span_err(prefix_span, &msg);
- err.span_label(prefix_span, "unknown prefix");
- if prefix_str == "rb" {
- err.span_suggestion_verbose(
- prefix_span,
- "use `br` for a raw byte string",
- "br",
- Applicability::MaybeIncorrect,
- );
+ let sugg = if prefix == "rb" {
+ Some(errors::UnknownPrefixSugg::UseBr(prefix_span))
} else if expn_data.is_root() {
- err.span_suggestion_verbose(
- prefix_span.shrink_to_hi(),
- "consider inserting whitespace here",
- " ",
- Applicability::MaybeIncorrect,
- );
- }
- err.note("prefixed identifiers and literals are reserved since Rust 2021");
- err.emit();
+ Some(errors::UnknownPrefixSugg::Whitespace(prefix_span.shrink_to_hi()))
+ } else {
+ None
+ };
+ self.sess.emit_err(errors::UnknownPrefix { span: prefix_span, prefix, sugg });
} else {
// Before Rust 2021, only emit a lint for migration.
self.sess.buffer_lint_with_diagnostic(
&RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX,
prefix_span,
ast::CRATE_NODE_ID,
- &msg,
+ &format!("prefix `{prefix}` is unknown"),
BuiltinLintDiagnostics::ReservedPrefix(prefix_span),
);
}
}
- fn report_too_many_hashes(&self, start: BytePos, found: u32) -> ! {
- self.fatal_span_(
- start,
- self.pos,
- &format!(
- "too many `#` symbols: raw strings may be delimited \
- by up to 255 `#` symbols, but found {}",
- found
- ),
- )
+ fn report_too_many_hashes(&self, start: BytePos, num: u32) -> ! {
+ self.sess.emit_fatal(errors::TooManyHashes { span: self.mk_sp(start, self.pos), num });
}
fn cook_quoted(
diff --git a/compiler/rustc_parse/src/lexer/tokentrees.rs b/compiler/rustc_parse/src/lexer/tokentrees.rs
index b2701817d..36fd1e37d 100644
--- a/compiler/rustc_parse/src/lexer/tokentrees.rs
+++ b/compiler/rustc_parse/src/lexer/tokentrees.rs
@@ -1,47 +1,31 @@
-use super::{StringReader, UnmatchedBrace};
+use super::diagnostics::report_suspicious_mismatch_block;
+use super::diagnostics::same_identation_level;
+use super::diagnostics::TokenTreeDiagInfo;
+use super::{StringReader, UnmatchedDelim};
use rustc_ast::token::{self, Delimiter, Token};
use rustc_ast::tokenstream::{DelimSpan, Spacing, TokenStream, TokenTree};
use rustc_ast_pretty::pprust::token_to_string;
-use rustc_data_structures::fx::FxHashMap;
use rustc_errors::{PErr, PResult};
-use rustc_span::Span;
pub(super) struct TokenTreesReader<'a> {
string_reader: StringReader<'a>,
/// The "next" token, which has been obtained from the `StringReader` but
/// not yet handled by the `TokenTreesReader`.
token: Token,
- /// Stack of open delimiters and their spans. Used for error message.
- open_braces: Vec<(Delimiter, Span)>,
- unmatched_braces: Vec<UnmatchedBrace>,
- /// The type and spans for all braces
- ///
- /// Used only for error recovery when arriving to EOF with mismatched braces.
- matching_delim_spans: Vec<(Delimiter, Span, Span)>,
- last_unclosed_found_span: Option<Span>,
- /// Collect empty block spans that might have been auto-inserted by editors.
- last_delim_empty_block_spans: FxHashMap<Delimiter, Span>,
- /// Collect the spans of braces (Open, Close). Used only
- /// for detecting if blocks are empty and only braces.
- matching_block_spans: Vec<(Span, Span)>,
+ diag_info: TokenTreeDiagInfo,
}
impl<'a> TokenTreesReader<'a> {
pub(super) fn parse_all_token_trees(
string_reader: StringReader<'a>,
- ) -> (PResult<'a, TokenStream>, Vec<UnmatchedBrace>) {
+ ) -> (PResult<'a, TokenStream>, Vec<UnmatchedDelim>) {
let mut tt_reader = TokenTreesReader {
string_reader,
token: Token::dummy(),
- open_braces: Vec::new(),
- unmatched_braces: Vec::new(),
- matching_delim_spans: Vec::new(),
- last_unclosed_found_span: None,
- last_delim_empty_block_spans: FxHashMap::default(),
- matching_block_spans: Vec::new(),
+ diag_info: TokenTreeDiagInfo::default(),
};
let res = tt_reader.parse_token_trees(/* is_delimited */ false);
- (res, tt_reader.unmatched_braces)
+ (res, tt_reader.diag_info.unmatched_delims)
}
// Parse a stream of tokens into a list of `TokenTree`s.
@@ -50,7 +34,7 @@ impl<'a> TokenTreesReader<'a> {
let mut buf = Vec::new();
loop {
match self.token.kind {
- token::OpenDelim(delim) => buf.push(self.parse_token_tree_open_delim(delim)),
+ token::OpenDelim(delim) => buf.push(self.parse_token_tree_open_delim(delim)?),
token::CloseDelim(delim) => {
return if is_delimited {
Ok(TokenStream::new(buf))
@@ -59,10 +43,11 @@ impl<'a> TokenTreesReader<'a> {
};
}
token::Eof => {
- if is_delimited {
- self.eof_err().emit();
- }
- return Ok(TokenStream::new(buf));
+ return if is_delimited {
+ Err(self.eof_err())
+ } else {
+ Ok(TokenStream::new(buf))
+ };
}
_ => {
// Get the next normal token. This might require getting multiple adjacent
@@ -92,9 +77,9 @@ impl<'a> TokenTreesReader<'a> {
fn eof_err(&mut self) -> PErr<'a> {
let msg = "this file contains an unclosed delimiter";
let mut err = self.string_reader.sess.span_diagnostic.struct_span_err(self.token.span, msg);
- for &(_, sp) in &self.open_braces {
+ for &(_, sp) in &self.diag_info.open_braces {
err.span_label(sp, "unclosed delimiter");
- self.unmatched_braces.push(UnmatchedBrace {
+ self.diag_info.unmatched_delims.push(UnmatchedDelim {
expected_delim: Delimiter::Brace,
found_delim: None,
found_span: self.token.span,
@@ -103,69 +88,53 @@ impl<'a> TokenTreesReader<'a> {
});
}
- if let Some((delim, _)) = self.open_braces.last() {
- if let Some((_, open_sp, close_sp)) =
- self.matching_delim_spans.iter().find(|(d, open_sp, close_sp)| {
- let sm = self.string_reader.sess.source_map();
- if let Some(close_padding) = sm.span_to_margin(*close_sp) {
- if let Some(open_padding) = sm.span_to_margin(*open_sp) {
- return delim == d && close_padding != open_padding;
- }
- }
- false
- })
- // these are in reverse order as they get inserted on close, but
- {
- // we want the last open/first close
- err.span_label(*open_sp, "this delimiter might not be properly closed...");
- err.span_label(*close_sp, "...as it matches this but it has different indentation");
- }
+ if let Some((delim, _)) = self.diag_info.open_braces.last() {
+ report_suspicious_mismatch_block(
+ &mut err,
+ &self.diag_info,
+ &self.string_reader.sess.source_map(),
+ *delim,
+ )
}
err
}
- fn parse_token_tree_open_delim(&mut self, open_delim: Delimiter) -> TokenTree {
+ fn parse_token_tree_open_delim(&mut self, open_delim: Delimiter) -> PResult<'a, TokenTree> {
// The span for beginning of the delimited section
let pre_span = self.token.span;
- self.open_braces.push((open_delim, self.token.span));
+ self.diag_info.open_braces.push((open_delim, self.token.span));
// Parse the token trees within the delimiters.
// We stop at any delimiter so we can try to recover if the user
// uses an incorrect delimiter.
- let tts = self.parse_token_trees(/* is_delimited */ true).unwrap();
+ let tts = self.parse_token_trees(/* is_delimited */ true)?;
// Expand to cover the entire delimited token tree
let delim_span = DelimSpan::from_pair(pre_span, self.token.span);
+ let sm = self.string_reader.sess.source_map();
match self.token.kind {
// Correct delimiter.
token::CloseDelim(close_delim) if close_delim == open_delim => {
- let (open_brace, open_brace_span) = self.open_braces.pop().unwrap();
+ let (open_brace, open_brace_span) = self.diag_info.open_braces.pop().unwrap();
let close_brace_span = self.token.span;
- if tts.is_empty() {
+ if tts.is_empty() && close_delim == Delimiter::Brace {
let empty_block_span = open_brace_span.to(close_brace_span);
- let sm = self.string_reader.sess.source_map();
if !sm.is_multiline(empty_block_span) {
// Only track if the block is in the form of `{}`, otherwise it is
// likely that it was written on purpose.
- self.last_delim_empty_block_spans.insert(open_delim, empty_block_span);
+ self.diag_info.empty_block_spans.push(empty_block_span);
}
}
- //only add braces
+ // only add braces
if let (Delimiter::Brace, Delimiter::Brace) = (open_brace, open_delim) {
- self.matching_block_spans.push((open_brace_span, close_brace_span));
+ // Add all the matching spans, we will sort by span later
+ self.diag_info.matching_block_spans.push((open_brace_span, close_brace_span));
}
- if self.open_braces.is_empty() {
- // Clear up these spans to avoid suggesting them as we've found
- // properly matched delimiters so far for an entire block.
- self.matching_delim_spans.clear();
- } else {
- self.matching_delim_spans.push((open_brace, open_brace_span, close_brace_span));
- }
// Move past the closing delimiter.
self.token = self.string_reader.next_token().0;
}
@@ -174,28 +143,25 @@ impl<'a> TokenTreesReader<'a> {
let mut unclosed_delimiter = None;
let mut candidate = None;
- if self.last_unclosed_found_span != Some(self.token.span) {
+ if self.diag_info.last_unclosed_found_span != Some(self.token.span) {
// do not complain about the same unclosed delimiter multiple times
- self.last_unclosed_found_span = Some(self.token.span);
+ self.diag_info.last_unclosed_found_span = Some(self.token.span);
// This is a conservative error: only report the last unclosed
// delimiter. The previous unclosed delimiters could actually be
// closed! The parser just hasn't gotten to them yet.
- if let Some(&(_, sp)) = self.open_braces.last() {
+ if let Some(&(_, sp)) = self.diag_info.open_braces.last() {
unclosed_delimiter = Some(sp);
};
- let sm = self.string_reader.sess.source_map();
- if let Some(current_padding) = sm.span_to_margin(self.token.span) {
- for (brace, brace_span) in &self.open_braces {
- if let Some(padding) = sm.span_to_margin(*brace_span) {
- // high likelihood of these two corresponding
- if current_padding == padding && brace == &close_delim {
- candidate = Some(*brace_span);
- }
- }
+ for (brace, brace_span) in &self.diag_info.open_braces {
+ if same_identation_level(&sm, self.token.span, *brace_span)
+ && brace == &close_delim
+ {
+ // high likelihood of these two corresponding
+ candidate = Some(*brace_span);
}
}
- let (tok, _) = self.open_braces.pop().unwrap();
- self.unmatched_braces.push(UnmatchedBrace {
+ let (tok, _) = self.diag_info.open_braces.pop().unwrap();
+ self.diag_info.unmatched_delims.push(UnmatchedDelim {
expected_delim: tok,
found_delim: Some(close_delim),
found_span: self.token.span,
@@ -203,7 +169,7 @@ impl<'a> TokenTreesReader<'a> {
candidate_span: candidate,
});
} else {
- self.open_braces.pop();
+ self.diag_info.open_braces.pop();
}
// If the incorrect delimiter matches an earlier opening
@@ -213,7 +179,7 @@ impl<'a> TokenTreesReader<'a> {
// fn foo() {
// bar(baz(
// } // Incorrect delimiter but matches the earlier `{`
- if !self.open_braces.iter().any(|&(b, _)| b == close_delim) {
+ if !self.diag_info.open_braces.iter().any(|&(b, _)| b == close_delim) {
self.token = self.string_reader.next_token().0;
}
}
@@ -225,7 +191,7 @@ impl<'a> TokenTreesReader<'a> {
_ => unreachable!(),
}
- TokenTree::Delimited(delim_span, open_delim, tts)
+ Ok(TokenTree::Delimited(delim_span, open_delim, tts))
}
fn close_delim_err(&mut self, delim: Delimiter) -> PErr<'a> {
@@ -236,22 +202,12 @@ impl<'a> TokenTreesReader<'a> {
let mut err =
self.string_reader.sess.span_diagnostic.struct_span_err(self.token.span, &msg);
- // Braces are added at the end, so the last element is the biggest block
- if let Some(parent) = self.matching_block_spans.last() {
- if let Some(span) = self.last_delim_empty_block_spans.remove(&delim) {
- // Check if the (empty block) is in the last properly closed block
- if (parent.0.to(parent.1)).contains(span) {
- err.span_label(span, "block is empty, you might have not meant to close it");
- } else {
- err.span_label(parent.0, "this opening brace...");
- err.span_label(parent.1, "...matches this closing brace");
- }
- } else {
- err.span_label(parent.0, "this opening brace...");
- err.span_label(parent.1, "...matches this closing brace");
- }
- }
-
+ report_suspicious_mismatch_block(
+ &mut err,
+ &self.diag_info,
+ &self.string_reader.sess.source_map(),
+ delim,
+ );
err.span_label(self.token.span, "unexpected closing delimiter");
err
}
diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
index 6373f5b4f..0d12ec608 100644
--- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
+++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
@@ -3,10 +3,12 @@
use std::iter::once;
use std::ops::Range;
-use rustc_errors::{pluralize, Applicability, Handler};
+use rustc_errors::{Applicability, Handler};
use rustc_lexer::unescape::{EscapeError, Mode};
use rustc_span::{BytePos, Span};
+use crate::errors::{MoreThanOneCharNote, MoreThanOneCharSugg, NoBraceUnicodeSub, UnescapeError};
+
pub(crate) fn emit_unescape_error(
handler: &Handler,
// interior part of the literal, without quotes
@@ -31,53 +33,32 @@ pub(crate) fn emit_unescape_error(
};
match error {
EscapeError::LoneSurrogateUnicodeEscape => {
- handler
- .struct_span_err(span, "invalid unicode character escape")
- .span_label(span, "invalid escape")
- .help("unicode escape must not be a surrogate")
- .emit();
+ handler.emit_err(UnescapeError::InvalidUnicodeEscape { span, surrogate: true });
}
EscapeError::OutOfRangeUnicodeEscape => {
- handler
- .struct_span_err(span, "invalid unicode character escape")
- .span_label(span, "invalid escape")
- .help("unicode escape must be at most 10FFFF")
- .emit();
+ handler.emit_err(UnescapeError::InvalidUnicodeEscape { span, surrogate: false });
}
EscapeError::MoreThanOneChar => {
use unicode_normalization::{char::is_combining_mark, UnicodeNormalization};
+ let mut sugg = None;
+ let mut note = None;
- let mut has_help = false;
- let mut handler = handler.struct_span_err(
- span_with_quotes,
- "character literal may only contain one codepoint",
- );
-
- if lit.chars().skip(1).all(|c| is_combining_mark(c)) {
- let escaped_marks =
- lit.chars().skip(1).map(|c| c.escape_default().to_string()).collect::<Vec<_>>();
- handler.span_note(
- span,
- &format!(
- "this `{}` is followed by the combining mark{} `{}`",
- lit.chars().next().unwrap(),
- pluralize!(escaped_marks.len()),
- escaped_marks.join(""),
- ),
- );
+ let lit_chars = lit.chars().collect::<Vec<_>>();
+ let (first, rest) = lit_chars.split_first().unwrap();
+ if rest.iter().copied().all(is_combining_mark) {
let normalized = lit.nfc().to_string();
if normalized.chars().count() == 1 {
- has_help = true;
- handler.span_suggestion(
- span,
- &format!(
- "consider using the normalized form `{}` of this character",
- normalized.chars().next().unwrap().escape_default()
- ),
- normalized,
- Applicability::MachineApplicable,
- );
+ let ch = normalized.chars().next().unwrap().escape_default().to_string();
+ sugg = Some(MoreThanOneCharSugg::NormalizedForm { span, ch, normalized });
}
+ let escaped_marks =
+ rest.iter().map(|c| c.escape_default().to_string()).collect::<Vec<_>>();
+ note = Some(MoreThanOneCharNote::AllCombining {
+ span,
+ chr: format!("{first}"),
+ len: escaped_marks.len(),
+ escaped_marks: escaped_marks.join(""),
+ });
} else {
let printable: Vec<char> = lit
.chars()
@@ -87,32 +68,18 @@ pub(crate) fn emit_unescape_error(
})
.collect();
- if let [ch] = printable.as_slice() {
- has_help = true;
-
- handler.span_note(
+ if let &[ch] = printable.as_slice() {
+ sugg =
+ Some(MoreThanOneCharSugg::RemoveNonPrinting { span, ch: ch.to_string() });
+ note = Some(MoreThanOneCharNote::NonPrinting {
span,
- &format!(
- "there are non-printing characters, the full sequence is `{}`",
- lit.escape_default(),
- ),
- );
-
- handler.span_suggestion(
- span,
- "consider removing the non-printing characters",
- ch,
- Applicability::MaybeIncorrect,
- );
+ escaped: lit.escape_default().to_string(),
+ });
}
- }
-
- if !has_help {
- let (prefix, msg) = if mode.is_byte() {
- ("b", "if you meant to write a byte string literal, use double quotes")
- } else {
- ("", "if you meant to write a `str` literal, use double quotes")
- };
+ };
+ let sugg = sugg.unwrap_or_else(|| {
+ let is_byte = mode.is_byte();
+ let prefix = if is_byte { "b" } else { "" };
let mut escaped = String::with_capacity(lit.len());
let mut chrs = lit.chars().peekable();
while let Some(first) = chrs.next() {
@@ -129,54 +96,32 @@ pub(crate) fn emit_unescape_error(
(c, _) => escaped.push(c),
};
}
- handler.span_suggestion(
- span_with_quotes,
- msg,
- format!("{prefix}\"{escaped}\""),
- Applicability::MachineApplicable,
- );
- }
-
- handler.emit();
+ let sugg = format!("{prefix}\"{escaped}\"");
+ MoreThanOneCharSugg::Quotes { span: span_with_quotes, is_byte, sugg }
+ });
+ handler.emit_err(UnescapeError::MoreThanOneChar {
+ span: span_with_quotes,
+ note,
+ suggestion: sugg,
+ });
}
EscapeError::EscapeOnlyChar => {
let (c, char_span) = last_char();
-
- let msg = if mode.is_byte() {
- "byte constant must be escaped"
- } else {
- "character constant must be escaped"
- };
- handler
- .struct_span_err(span, &format!("{}: `{}`", msg, escaped_char(c)))
- .span_suggestion(
- char_span,
- "escape the character",
- c.escape_default(),
- Applicability::MachineApplicable,
- )
- .emit();
+ handler.emit_err(UnescapeError::EscapeOnlyChar {
+ span,
+ char_span,
+ escaped_sugg: c.escape_default().to_string(),
+ escaped_msg: escaped_char(c),
+ byte: mode.is_byte(),
+ });
}
EscapeError::BareCarriageReturn => {
- let msg = if mode.in_double_quotes() {
- "bare CR not allowed in string, use `\\r` instead"
- } else {
- "character constant must be escaped: `\\r`"
- };
- handler
- .struct_span_err(span, msg)
- .span_suggestion(
- span,
- "escape the character",
- "\\r",
- Applicability::MachineApplicable,
- )
- .emit();
+ let double_quotes = mode.in_double_quotes();
+ handler.emit_err(UnescapeError::BareCr { span, double_quotes });
}
EscapeError::BareCarriageReturnInRawString => {
assert!(mode.in_double_quotes());
- let msg = "bare CR not allowed in raw string";
- handler.span_err(span, msg);
+ handler.emit_err(UnescapeError::BareCrRawString(span));
}
EscapeError::InvalidEscape => {
let (c, span) = last_char();
@@ -213,22 +158,13 @@ pub(crate) fn emit_unescape_error(
diag.emit();
}
EscapeError::TooShortHexEscape => {
- handler.span_err(span, "numeric character escape is too short");
+ handler.emit_err(UnescapeError::TooShortHexEscape(span));
}
EscapeError::InvalidCharInHexEscape | EscapeError::InvalidCharInUnicodeEscape => {
let (c, span) = last_char();
-
- let msg = if error == EscapeError::InvalidCharInHexEscape {
- "invalid character in numeric character escape"
- } else {
- "invalid character in unicode escape"
- };
- let c = escaped_char(c);
-
- handler
- .struct_span_err(span, &format!("{}: `{}`", msg, c))
- .span_label(span, msg)
- .emit();
+ let is_hex = error == EscapeError::InvalidCharInHexEscape;
+ let ch = escaped_char(c);
+ handler.emit_err(UnescapeError::InvalidCharInEscape { span, is_hex, ch });
}
EscapeError::NonAsciiCharInByte => {
let (c, span) = last_char();
@@ -278,41 +214,22 @@ pub(crate) fn emit_unescape_error(
err.emit();
}
EscapeError::OutOfRangeHexEscape => {
- handler
- .struct_span_err(span, "out of range hex escape")
- .span_label(span, "must be a character in the range [\\x00-\\x7f]")
- .emit();
+ handler.emit_err(UnescapeError::OutOfRangeHexEscape(span));
}
EscapeError::LeadingUnderscoreUnicodeEscape => {
let (c, span) = last_char();
- let msg = "invalid start of unicode escape";
- handler
- .struct_span_err(span, &format!("{}: `{}`", msg, c))
- .span_label(span, msg)
- .emit();
+ handler.emit_err(UnescapeError::LeadingUnderscoreUnicodeEscape {
+ span,
+ ch: escaped_char(c),
+ });
}
EscapeError::OverlongUnicodeEscape => {
- handler
- .struct_span_err(span, "overlong unicode escape")
- .span_label(span, "must have at most 6 hex digits")
- .emit();
+ handler.emit_err(UnescapeError::OverlongUnicodeEscape(span));
}
EscapeError::UnclosedUnicodeEscape => {
- handler
- .struct_span_err(span, "unterminated unicode escape")
- .span_label(span, "missing a closing `}`")
- .span_suggestion_verbose(
- span.shrink_to_hi(),
- "terminate the unicode escape",
- "}",
- Applicability::MaybeIncorrect,
- )
- .emit();
+ handler.emit_err(UnescapeError::UnclosedUnicodeEscape(span, span.shrink_to_hi()));
}
EscapeError::NoBraceInUnicodeEscape => {
- let msg = "incorrect unicode escape sequence";
- let mut diag = handler.struct_span_err(span, msg);
-
let mut suggestion = "\\u{".to_owned();
let mut suggestion_len = 0;
let (c, char_span) = last_char();
@@ -322,54 +239,37 @@ pub(crate) fn emit_unescape_error(
suggestion_len += c.len_utf8();
}
- if suggestion_len > 0 {
+ let (label, sub) = if suggestion_len > 0 {
suggestion.push('}');
let hi = char_span.lo() + BytePos(suggestion_len as u32);
- diag.span_suggestion(
- span.with_hi(hi),
- "format of unicode escape sequences uses braces",
- suggestion,
- Applicability::MaybeIncorrect,
- );
+ (None, NoBraceUnicodeSub::Suggestion { span: span.with_hi(hi), suggestion })
} else {
- diag.span_label(span, msg);
- diag.help("format of unicode escape sequences is `\\u{...}`");
- }
-
- diag.emit();
+ (Some(span), NoBraceUnicodeSub::Help)
+ };
+ handler.emit_err(UnescapeError::NoBraceInUnicodeEscape { span, label, sub });
}
EscapeError::UnicodeEscapeInByte => {
- let msg = "unicode escape in byte string";
- handler
- .struct_span_err(span, msg)
- .span_label(span, msg)
- .help("unicode escape sequences cannot be used as a byte or in a byte string")
- .emit();
+ handler.emit_err(UnescapeError::UnicodeEscapeInByte(span));
}
EscapeError::EmptyUnicodeEscape => {
- handler
- .struct_span_err(span, "empty unicode escape")
- .span_label(span, "this escape must have at least 1 hex digit")
- .emit();
+ handler.emit_err(UnescapeError::EmptyUnicodeEscape(span));
}
EscapeError::ZeroChars => {
- let msg = "empty character literal";
- handler.struct_span_err(span, msg).span_label(span, msg).emit();
+ handler.emit_err(UnescapeError::ZeroChars(span));
}
EscapeError::LoneSlash => {
- let msg = "invalid trailing slash in literal";
- handler.struct_span_err(span, msg).span_label(span, msg).emit();
+ handler.emit_err(UnescapeError::LoneSlash(span));
}
EscapeError::UnskippedWhitespaceWarning => {
let (c, char_span) = last_char();
- let msg =
- format!("non-ASCII whitespace symbol '{}' is not skipped", c.escape_unicode());
- handler.struct_span_warn(span, &msg).span_label(char_span, &msg).emit();
+ handler.emit_warning(UnescapeError::UnskippedWhitespace {
+ span,
+ ch: escaped_char(c),
+ char_span,
+ });
}
EscapeError::MultipleSkippedLinesWarning => {
- let msg = "multiple lines skipped by escaped newline";
- let bottom_msg = "skipping everything up to and including this point";
- handler.struct_span_warn(span, msg).span_label(span, bottom_msg).emit();
+ handler.emit_warning(UnescapeError::MultipleSkippedLinesWarning(span));
}
}
}
diff --git a/compiler/rustc_parse/src/lexer/unicode_chars.rs b/compiler/rustc_parse/src/lexer/unicode_chars.rs
index 34d003ccf..d4f971d5b 100644
--- a/compiler/rustc_parse/src/lexer/unicode_chars.rs
+++ b/compiler/rustc_parse/src/lexer/unicode_chars.rs
@@ -2,8 +2,10 @@
//! <https://www.unicode.org/Public/security/10.0.0/confusables.txt>
use super::StringReader;
-use crate::token::{self, Delimiter};
-use rustc_errors::{Applicability, Diagnostic};
+use crate::{
+ errors::TokenSubstitution,
+ token::{self, Delimiter},
+};
use rustc_span::{symbol::kw, BytePos, Pos, Span};
#[rustfmt::skip] // for line breaks
@@ -338,48 +340,44 @@ pub(super) fn check_for_substitution<'a>(
reader: &StringReader<'a>,
pos: BytePos,
ch: char,
- err: &mut Diagnostic,
count: usize,
-) -> Option<token::TokenKind> {
- let &(_, u_name, ascii_str) = UNICODE_ARRAY.iter().find(|&&(c, _, _)| c == ch)?;
+) -> (Option<token::TokenKind>, Option<TokenSubstitution>) {
+ let Some(&(_, u_name, ascii_str)) = UNICODE_ARRAY.iter().find(|&&(c, _, _)| c == ch) else {
+ return (None, None);
+ };
let span = Span::with_root_ctxt(pos, pos + Pos::from_usize(ch.len_utf8() * count));
let Some((_, ascii_name, token)) = ASCII_ARRAY.iter().find(|&&(s, _, _)| s == ascii_str) else {
let msg = format!("substitution character not found for '{}'", ch);
reader.sess.span_diagnostic.span_bug_no_panic(span, &msg);
- return None;
+ return (None, None);
};
// special help suggestion for "directed" double quotes
- if let Some(s) = peek_delimited(&reader.src[reader.src_index(pos)..], '“', '”') {
- let msg = format!(
- "Unicode characters '“' (Left Double Quotation Mark) and \
- '”' (Right Double Quotation Mark) look like '{}' ({}), but are not",
- ascii_str, ascii_name
- );
- err.span_suggestion(
- Span::with_root_ctxt(
- pos,
- pos + Pos::from_usize('“'.len_utf8() + s.len() + '”'.len_utf8()),
- ),
- &msg,
- format!("\"{}\"", s),
- Applicability::MaybeIncorrect,
+ let sugg = if let Some(s) = peek_delimited(&reader.src[reader.src_index(pos)..], '“', '”') {
+ let span = Span::with_root_ctxt(
+ pos,
+ pos + Pos::from_usize('“'.len_utf8() + s.len() + '”'.len_utf8()),
);
+ Some(TokenSubstitution::DirectedQuotes {
+ span,
+ suggestion: format!("\"{s}\""),
+ ascii_str,
+ ascii_name,
+ })
} else {
- let msg = format!(
- "Unicode character '{}' ({}) looks like '{}' ({}), but it is not",
- ch, u_name, ascii_str, ascii_name
- );
- err.span_suggestion(
+ let suggestion = ascii_str.to_string().repeat(count);
+ Some(TokenSubstitution::Other {
span,
- &msg,
- ascii_str.to_string().repeat(count),
- Applicability::MaybeIncorrect,
- );
- }
- token.clone()
+ suggestion,
+ ch: ch.to_string(),
+ u_name,
+ ascii_str,
+ ascii_name,
+ })
+ };
+ (token.clone(), sugg)
}
/// Extract string if found at current position with given delimiters