summaryrefslogtreecommitdiffstats
path: root/compiler/rustc_parse/src/lexer/mod.rs
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/rustc_parse/src/lexer/mod.rs')
-rw-r--r--compiler/rustc_parse/src/lexer/mod.rs189
1 files changed, 74 insertions, 115 deletions
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
index 9fe8d9836..59958a309 100644
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@@ -1,11 +1,11 @@
+use crate::errors;
use crate::lexer::unicode_chars::UNICODE_ARRAY;
+use crate::make_unclosed_delims_error;
use rustc_ast::ast::{self, AttrStyle};
use rustc_ast::token::{self, CommentKind, Delimiter, Token, TokenKind};
use rustc_ast::tokenstream::TokenStream;
use rustc_ast::util::unicode::contains_text_flow_control_chars;
-use rustc_errors::{
- error_code, Applicability, DiagnosticBuilder, ErrorGuaranteed, PResult, StashKey,
-};
+use rustc_errors::{error_code, Applicability, Diagnostic, DiagnosticBuilder, StashKey};
use rustc_lexer::unescape::{self, Mode};
use rustc_lexer::Cursor;
use rustc_lexer::{Base, DocStyle, RawStrError};
@@ -17,6 +17,7 @@ use rustc_session::parse::ParseSess;
use rustc_span::symbol::{sym, Symbol};
use rustc_span::{edition::Edition, BytePos, Pos, Span};
+mod diagnostics;
mod tokentrees;
mod unescape_error_reporting;
mod unicode_chars;
@@ -31,7 +32,7 @@ use unescape_error_reporting::{emit_unescape_error, escaped_char};
rustc_data_structures::static_assert_size!(rustc_lexer::Token, 12);
#[derive(Clone, Debug)]
-pub struct UnmatchedBrace {
+pub struct UnmatchedDelim {
pub expected_delim: Delimiter,
pub found_delim: Option<Delimiter>,
pub found_span: Span,
@@ -44,7 +45,7 @@ pub(crate) fn parse_token_trees<'a>(
mut src: &'a str,
mut start_pos: BytePos,
override_span: Option<Span>,
-) -> (PResult<'a, TokenStream>, Vec<UnmatchedBrace>) {
+) -> Result<TokenStream, Vec<Diagnostic>> {
// Skip `#!`, if present.
if let Some(shebang_len) = rustc_lexer::strip_shebang(src) {
src = &src[shebang_len..];
@@ -61,7 +62,29 @@ pub(crate) fn parse_token_trees<'a>(
override_span,
nbsp_is_whitespace: false,
};
- tokentrees::TokenTreesReader::parse_all_token_trees(string_reader)
+ let (token_trees, unmatched_delims) =
+ tokentrees::TokenTreesReader::parse_all_token_trees(string_reader);
+ match token_trees {
+ Ok(stream) if unmatched_delims.is_empty() => Ok(stream),
+ _ => {
+ // Return error if there are unmatched delimiters or unclosng delimiters.
+ // We emit delimiter mismatch errors first, then emit the unclosing delimiter mismatch
+ // because the delimiter mismatch is more likely to be the root cause of error
+
+ let mut buffer = Vec::with_capacity(1);
+ // Not using `emit_unclosed_delims` to use `db.buffer`
+ for unmatched in unmatched_delims {
+ if let Some(err) = make_unclosed_delims_error(unmatched, &sess) {
+ err.buffer(&mut buffer);
+ }
+ }
+ if let Err(err) = token_trees {
+ // Add unclosing delimiter error
+ err.buffer(&mut buffer);
+ }
+ Err(buffer)
+ }
+ }
}
struct StringReader<'a> {
@@ -150,7 +173,7 @@ impl<'a> StringReader<'a> {
let span = self.mk_sp(start, self.pos);
self.sess.symbol_gallery.insert(sym, span);
if !sym.can_be_raw() {
- self.err_span(span, &format!("`{}` cannot be a raw identifier", sym));
+ self.sess.emit_err(errors::CannotBeRawIdent { span, ident: sym });
}
self.sess.raw_identifier_spans.borrow_mut().push(span);
token::Ident(sym, true)
@@ -261,27 +284,24 @@ impl<'a> StringReader<'a> {
self.nbsp_is_whitespace = true;
}
let repeats = it.take_while(|c1| *c1 == c).count();
- let mut err =
- self.struct_err_span_char(start, self.pos + Pos::from_usize(repeats * c.len_utf8()), "unknown start of token", c);
// FIXME: the lexer could be used to turn the ASCII version of unicode
// homoglyphs, instead of keeping a table in `check_for_substitution`into the
// token. Ideally, this should be inside `rustc_lexer`. However, we should
// first remove compound tokens like `<<` from `rustc_lexer`, and then add
// fancier error recovery to it, as there will be less overall work to do this
// way.
- let token = unicode_chars::check_for_substitution(self, start, c, &mut err, repeats+1);
- if c == '\x00' {
- err.help("source files must contain UTF-8 encoded text, unexpected null bytes might occur when a different encoding is used");
- }
- if repeats > 0 {
- if repeats == 1 {
- err.note(format!("character appears once more"));
- } else {
- err.note(format!("character appears {repeats} more times"));
- }
- swallow_next_invalid = repeats;
- }
- err.emit();
+ let (token, sugg) = unicode_chars::check_for_substitution(self, start, c, repeats+1);
+ self.sess.emit_err(errors::UnknownTokenStart {
+ span: self.mk_sp(start, self.pos + Pos::from_usize(repeats * c.len_utf8())),
+ escaped: escaped_char(c),
+ sugg,
+ null: if c == '\x00' {Some(errors::UnknownTokenNull)} else {None},
+ repeat: if repeats > 0 {
+ swallow_next_invalid = repeats;
+ Some(errors::UnknownTokenRepeat { repeats })
+ } else {None}
+ });
+
if let Some(token) = token {
token
} else {
@@ -296,26 +316,6 @@ impl<'a> StringReader<'a> {
}
}
- /// Report a fatal lexical error with a given span.
- fn fatal_span(&self, sp: Span, m: &str) -> ! {
- self.sess.span_diagnostic.span_fatal(sp, m)
- }
-
- /// Report a lexical error with a given span.
- fn err_span(&self, sp: Span, m: &str) {
- self.sess.span_diagnostic.struct_span_err(sp, m).emit();
- }
-
- /// Report a fatal error spanning [`from_pos`, `to_pos`).
- fn fatal_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) -> ! {
- self.fatal_span(self.mk_sp(from_pos, to_pos), m)
- }
-
- /// Report a lexical error spanning [`from_pos`, `to_pos`).
- fn err_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) {
- self.err_span(self.mk_sp(from_pos, to_pos), m)
- }
-
fn struct_fatal_span_char(
&self,
from_pos: BytePos,
@@ -328,18 +328,6 @@ impl<'a> StringReader<'a> {
.struct_span_fatal(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c)))
}
- fn struct_err_span_char(
- &self,
- from_pos: BytePos,
- to_pos: BytePos,
- m: &str,
- c: char,
- ) -> DiagnosticBuilder<'a, ErrorGuaranteed> {
- self.sess
- .span_diagnostic
- .struct_span_err(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c)))
- }
-
/// Detect usages of Unicode codepoints changing the direction of the text on screen and loudly
/// complain about it.
fn lint_unicode_text_flow(&self, start: BytePos) {
@@ -367,14 +355,12 @@ impl<'a> StringReader<'a> {
) -> TokenKind {
if content.contains('\r') {
for (idx, _) in content.char_indices().filter(|&(_, c)| c == '\r') {
- self.err_span_(
+ let span = self.mk_sp(
content_start + BytePos(idx as u32),
content_start + BytePos(idx as u32 + 1),
- match comment_kind {
- CommentKind::Line => "bare CR not allowed in doc-comment",
- CommentKind::Block => "bare CR not allowed in block doc-comment",
- },
);
+ let block = matches!(comment_kind, CommentKind::Block);
+ self.sess.emit_err(errors::CrDocComment { span, block });
}
}
@@ -453,26 +439,20 @@ impl<'a> StringReader<'a> {
}
rustc_lexer::LiteralKind::Int { base, empty_int } => {
if empty_int {
- self.sess
- .span_diagnostic
- .struct_span_err_with_code(
- self.mk_sp(start, end),
- "no valid digits found for number",
- error_code!(E0768),
- )
- .emit();
+ let span = self.mk_sp(start, end);
+ self.sess.emit_err(errors::NoDigitsLiteral { span });
(token::Integer, sym::integer(0))
} else {
if matches!(base, Base::Binary | Base::Octal) {
let base = base as u32;
let s = self.str_from_to(start + BytePos(2), end);
for (idx, c) in s.char_indices() {
+ let span = self.mk_sp(
+ start + BytePos::from_usize(2 + idx),
+ start + BytePos::from_usize(2 + idx + c.len_utf8()),
+ );
if c != '_' && c.to_digit(base).is_none() {
- self.err_span_(
- start + BytePos::from_usize(2 + idx),
- start + BytePos::from_usize(2 + idx + c.len_utf8()),
- &format!("invalid digit for a base {} literal", base),
- );
+ self.sess.emit_err(errors::InvalidDigitLiteral { span, base });
}
}
}
@@ -481,19 +461,18 @@ impl<'a> StringReader<'a> {
}
rustc_lexer::LiteralKind::Float { base, empty_exponent } => {
if empty_exponent {
- self.err_span_(start, self.pos, "expected at least one digit in exponent");
+ let span = self.mk_sp(start, self.pos);
+ self.sess.emit_err(errors::EmptyExponentFloat { span });
}
- match base {
- Base::Hexadecimal => {
- self.err_span_(start, end, "hexadecimal float literal is not supported")
- }
- Base::Octal => {
- self.err_span_(start, end, "octal float literal is not supported")
- }
- Base::Binary => {
- self.err_span_(start, end, "binary float literal is not supported")
- }
- _ => {}
+ let base = match base {
+ Base::Hexadecimal => Some("hexadecimal"),
+ Base::Octal => Some("octal"),
+ Base::Binary => Some("binary"),
+ _ => None,
+ };
+ if let Some(base) = base {
+ let span = self.mk_sp(start, end);
+ self.sess.emit_err(errors::FloatLiteralUnsupportedBase { span, base });
}
(token::Float, self.symbol_from_to(start, end))
}
@@ -643,54 +622,34 @@ impl<'a> StringReader<'a> {
// identifier tokens.
fn report_unknown_prefix(&self, start: BytePos) {
let prefix_span = self.mk_sp(start, self.pos);
- let prefix_str = self.str_from_to(start, self.pos);
- let msg = format!("prefix `{}` is unknown", prefix_str);
+ let prefix = self.str_from_to(start, self.pos);
let expn_data = prefix_span.ctxt().outer_expn_data();
if expn_data.edition >= Edition::Edition2021 {
// In Rust 2021, this is a hard error.
- let mut err = self.sess.span_diagnostic.struct_span_err(prefix_span, &msg);
- err.span_label(prefix_span, "unknown prefix");
- if prefix_str == "rb" {
- err.span_suggestion_verbose(
- prefix_span,
- "use `br` for a raw byte string",
- "br",
- Applicability::MaybeIncorrect,
- );
+ let sugg = if prefix == "rb" {
+ Some(errors::UnknownPrefixSugg::UseBr(prefix_span))
} else if expn_data.is_root() {
- err.span_suggestion_verbose(
- prefix_span.shrink_to_hi(),
- "consider inserting whitespace here",
- " ",
- Applicability::MaybeIncorrect,
- );
- }
- err.note("prefixed identifiers and literals are reserved since Rust 2021");
- err.emit();
+ Some(errors::UnknownPrefixSugg::Whitespace(prefix_span.shrink_to_hi()))
+ } else {
+ None
+ };
+ self.sess.emit_err(errors::UnknownPrefix { span: prefix_span, prefix, sugg });
} else {
// Before Rust 2021, only emit a lint for migration.
self.sess.buffer_lint_with_diagnostic(
&RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX,
prefix_span,
ast::CRATE_NODE_ID,
- &msg,
+ &format!("prefix `{prefix}` is unknown"),
BuiltinLintDiagnostics::ReservedPrefix(prefix_span),
);
}
}
- fn report_too_many_hashes(&self, start: BytePos, found: u32) -> ! {
- self.fatal_span_(
- start,
- self.pos,
- &format!(
- "too many `#` symbols: raw strings may be delimited \
- by up to 255 `#` symbols, but found {}",
- found
- ),
- )
+ fn report_too_many_hashes(&self, start: BytePos, num: u32) -> ! {
+ self.sess.emit_fatal(errors::TooManyHashes { span: self.mk_sp(start, self.pos), num });
}
fn cook_quoted(