summaryrefslogtreecommitdiffstats
path: root/compiler/rustc_parse/src/lexer
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--compiler/rustc_parse/src/lexer/mod.rs399
-rw-r--r--compiler/rustc_parse/src/lexer/tokentrees.rs428
-rw-r--r--compiler/rustc_parse/src/lexer/unescape_error_reporting.rs27
3 files changed, 415 insertions, 439 deletions
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
index 848e142e5..462bce16a 100644
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@@ -1,10 +1,13 @@
use crate::lexer::unicode_chars::UNICODE_ARRAY;
use rustc_ast::ast::{self, AttrStyle};
use rustc_ast::token::{self, CommentKind, Delimiter, Token, TokenKind};
-use rustc_ast::tokenstream::{Spacing, TokenStream};
+use rustc_ast::tokenstream::TokenStream;
use rustc_ast::util::unicode::contains_text_flow_control_chars;
-use rustc_errors::{error_code, Applicability, DiagnosticBuilder, ErrorGuaranteed, PResult};
+use rustc_errors::{
+ error_code, Applicability, DiagnosticBuilder, ErrorGuaranteed, PResult, StashKey,
+};
use rustc_lexer::unescape::{self, Mode};
+use rustc_lexer::Cursor;
use rustc_lexer::{Base, DocStyle, RawStrError};
use rustc_session::lint::builtin::{
RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
@@ -14,8 +17,6 @@ use rustc_session::parse::ParseSess;
use rustc_span::symbol::{sym, Symbol};
use rustc_span::{edition::Edition, BytePos, Pos, Span};
-use tracing::debug;
-
mod tokentrees;
mod unescape_error_reporting;
mod unicode_chars;
@@ -40,11 +41,20 @@ pub struct UnmatchedBrace {
pub(crate) fn parse_token_trees<'a>(
sess: &'a ParseSess,
- src: &'a str,
- start_pos: BytePos,
+ mut src: &'a str,
+ mut start_pos: BytePos,
override_span: Option<Span>,
) -> (PResult<'a, TokenStream>, Vec<UnmatchedBrace>) {
- StringReader { sess, start_pos, pos: start_pos, src, override_span }.into_token_trees()
+ // Skip `#!`, if present.
+ if let Some(shebang_len) = rustc_lexer::strip_shebang(src) {
+ src = &src[shebang_len..];
+ start_pos = start_pos + BytePos::from_usize(shebang_len);
+ }
+
+ let cursor = Cursor::new(src);
+ let string_reader =
+ StringReader { sess, start_pos, pos: start_pos, src, cursor, override_span };
+ tokentrees::TokenTreesReader::parse_all_token_trees(string_reader)
}
struct StringReader<'a> {
@@ -55,6 +65,8 @@ struct StringReader<'a> {
pos: BytePos,
/// Source text to tokenize.
src: &'a str,
+ /// Cursor for getting lexer tokens.
+ cursor: Cursor<'a>,
override_span: Option<Span>,
}
@@ -63,42 +75,198 @@ impl<'a> StringReader<'a> {
self.override_span.unwrap_or_else(|| Span::with_root_ctxt(lo, hi))
}
- /// Returns the next token, and info about preceding whitespace, if any.
- fn next_token(&mut self) -> (Spacing, Token) {
- let mut spacing = Spacing::Joint;
-
- // Skip `#!` at the start of the file
- if self.pos == self.start_pos
- && let Some(shebang_len) = rustc_lexer::strip_shebang(self.src)
- {
- self.pos = self.pos + BytePos::from_usize(shebang_len);
- spacing = Spacing::Alone;
- }
+ /// Returns the next token, paired with a bool indicating if the token was
+ /// preceded by whitespace.
+ fn next_token(&mut self) -> (Token, bool) {
+ let mut preceded_by_whitespace = false;
// Skip trivial (whitespace & comments) tokens
loop {
- let start_src_index = self.src_index(self.pos);
- let text: &str = &self.src[start_src_index..];
-
- if text.is_empty() {
- let span = self.mk_sp(self.pos, self.pos);
- return (spacing, Token::new(token::Eof, span));
- }
-
- let token = rustc_lexer::first_token(text);
-
+ let token = self.cursor.advance_token();
let start = self.pos;
self.pos = self.pos + BytePos(token.len);
debug!("next_token: {:?}({:?})", token.kind, self.str_from(start));
- match self.cook_lexer_token(token.kind, start) {
- Some(kind) => {
+ // Now "cook" the token, converting the simple `rustc_lexer::TokenKind` enum into a
+ // rich `rustc_ast::TokenKind`. This turns strings into interned symbols and runs
+ // additional validation.
+ let kind = match token.kind {
+ rustc_lexer::TokenKind::LineComment { doc_style } => {
+ // Skip non-doc comments
+ let Some(doc_style) = doc_style else {
+ self.lint_unicode_text_flow(start);
+ preceded_by_whitespace = true;
+ continue;
+ };
+
+ // Opening delimiter of the length 3 is not included into the symbol.
+ let content_start = start + BytePos(3);
+ let content = self.str_from(content_start);
+ self.cook_doc_comment(content_start, content, CommentKind::Line, doc_style)
+ }
+ rustc_lexer::TokenKind::BlockComment { doc_style, terminated } => {
+ if !terminated {
+ self.report_unterminated_block_comment(start, doc_style);
+ }
+
+ // Skip non-doc comments
+ let Some(doc_style) = doc_style else {
+ self.lint_unicode_text_flow(start);
+ preceded_by_whitespace = true;
+ continue;
+ };
+
+ // Opening delimiter of the length 3 and closing delimiter of the length 2
+ // are not included into the symbol.
+ let content_start = start + BytePos(3);
+ let content_end = self.pos - BytePos(if terminated { 2 } else { 0 });
+ let content = self.str_from_to(content_start, content_end);
+ self.cook_doc_comment(content_start, content, CommentKind::Block, doc_style)
+ }
+ rustc_lexer::TokenKind::Whitespace => {
+ preceded_by_whitespace = true;
+ continue;
+ }
+ rustc_lexer::TokenKind::Ident => {
+ let sym = nfc_normalize(self.str_from(start));
let span = self.mk_sp(start, self.pos);
- return (spacing, Token::new(kind, span));
+ self.sess.symbol_gallery.insert(sym, span);
+ token::Ident(sym, false)
}
- None => spacing = Spacing::Alone,
- }
+ rustc_lexer::TokenKind::RawIdent => {
+ let sym = nfc_normalize(self.str_from(start + BytePos(2)));
+ let span = self.mk_sp(start, self.pos);
+ self.sess.symbol_gallery.insert(sym, span);
+ if !sym.can_be_raw() {
+ self.err_span(span, &format!("`{}` cannot be a raw identifier", sym));
+ }
+ self.sess.raw_identifier_spans.borrow_mut().push(span);
+ token::Ident(sym, true)
+ }
+ rustc_lexer::TokenKind::UnknownPrefix => {
+ self.report_unknown_prefix(start);
+ let sym = nfc_normalize(self.str_from(start));
+ let span = self.mk_sp(start, self.pos);
+ self.sess.symbol_gallery.insert(sym, span);
+ token::Ident(sym, false)
+ }
+ rustc_lexer::TokenKind::InvalidIdent
+ // Do not recover an identifier with emoji if the codepoint is a confusable
+ // with a recoverable substitution token, like `➖`.
+ if !UNICODE_ARRAY
+ .iter()
+ .any(|&(c, _, _)| {
+ let sym = self.str_from(start);
+ sym.chars().count() == 1 && c == sym.chars().next().unwrap()
+ }) =>
+ {
+ let sym = nfc_normalize(self.str_from(start));
+ let span = self.mk_sp(start, self.pos);
+ self.sess.bad_unicode_identifiers.borrow_mut().entry(sym).or_default()
+ .push(span);
+ token::Ident(sym, false)
+ }
+ rustc_lexer::TokenKind::Literal { kind, suffix_start } => {
+ let suffix_start = start + BytePos(suffix_start);
+ let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind);
+ let suffix = if suffix_start < self.pos {
+ let string = self.str_from(suffix_start);
+ if string == "_" {
+ self.sess
+ .span_diagnostic
+ .struct_span_warn(
+ self.mk_sp(suffix_start, self.pos),
+ "underscore literal suffix is not allowed",
+ )
+ .warn(
+ "this was previously accepted by the compiler but is \
+ being phased out; it will become a hard error in \
+ a future release!",
+ )
+ .note(
+ "see issue #42326 \
+ <https://github.com/rust-lang/rust/issues/42326> \
+ for more information",
+ )
+ .emit();
+ None
+ } else {
+ Some(Symbol::intern(string))
+ }
+ } else {
+ None
+ };
+ token::Literal(token::Lit { kind, symbol, suffix })
+ }
+ rustc_lexer::TokenKind::Lifetime { starts_with_number } => {
+ // Include the leading `'` in the real identifier, for macro
+ // expansion purposes. See #12512 for the gory details of why
+ // this is necessary.
+ let lifetime_name = self.str_from(start);
+ if starts_with_number {
+ let span = self.mk_sp(start, self.pos);
+ let mut diag = self.sess.struct_err("lifetimes cannot start with a number");
+ diag.set_span(span);
+ diag.stash(span, StashKey::LifetimeIsChar);
+ }
+ let ident = Symbol::intern(lifetime_name);
+ token::Lifetime(ident)
+ }
+ rustc_lexer::TokenKind::Semi => token::Semi,
+ rustc_lexer::TokenKind::Comma => token::Comma,
+ rustc_lexer::TokenKind::Dot => token::Dot,
+ rustc_lexer::TokenKind::OpenParen => token::OpenDelim(Delimiter::Parenthesis),
+ rustc_lexer::TokenKind::CloseParen => token::CloseDelim(Delimiter::Parenthesis),
+ rustc_lexer::TokenKind::OpenBrace => token::OpenDelim(Delimiter::Brace),
+ rustc_lexer::TokenKind::CloseBrace => token::CloseDelim(Delimiter::Brace),
+ rustc_lexer::TokenKind::OpenBracket => token::OpenDelim(Delimiter::Bracket),
+ rustc_lexer::TokenKind::CloseBracket => token::CloseDelim(Delimiter::Bracket),
+ rustc_lexer::TokenKind::At => token::At,
+ rustc_lexer::TokenKind::Pound => token::Pound,
+ rustc_lexer::TokenKind::Tilde => token::Tilde,
+ rustc_lexer::TokenKind::Question => token::Question,
+ rustc_lexer::TokenKind::Colon => token::Colon,
+ rustc_lexer::TokenKind::Dollar => token::Dollar,
+ rustc_lexer::TokenKind::Eq => token::Eq,
+ rustc_lexer::TokenKind::Bang => token::Not,
+ rustc_lexer::TokenKind::Lt => token::Lt,
+ rustc_lexer::TokenKind::Gt => token::Gt,
+ rustc_lexer::TokenKind::Minus => token::BinOp(token::Minus),
+ rustc_lexer::TokenKind::And => token::BinOp(token::And),
+ rustc_lexer::TokenKind::Or => token::BinOp(token::Or),
+ rustc_lexer::TokenKind::Plus => token::BinOp(token::Plus),
+ rustc_lexer::TokenKind::Star => token::BinOp(token::Star),
+ rustc_lexer::TokenKind::Slash => token::BinOp(token::Slash),
+ rustc_lexer::TokenKind::Caret => token::BinOp(token::Caret),
+ rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent),
+
+ rustc_lexer::TokenKind::Unknown | rustc_lexer::TokenKind::InvalidIdent => {
+ let c = self.str_from(start).chars().next().unwrap();
+ let mut err =
+ self.struct_err_span_char(start, self.pos, "unknown start of token", c);
+ // FIXME: the lexer could be used to turn the ASCII version of unicode
+ // homoglyphs, instead of keeping a table in `check_for_substitution`into the
+ // token. Ideally, this should be inside `rustc_lexer`. However, we should
+ // first remove compound tokens like `<<` from `rustc_lexer`, and then add
+ // fancier error recovery to it, as there will be less overall work to do this
+ // way.
+ let token = unicode_chars::check_for_substitution(self, start, c, &mut err);
+ if c == '\x00' {
+ err.help("source files must contain UTF-8 encoded text, unexpected null bytes might occur when a different encoding is used");
+ }
+ err.emit();
+ if let Some(token) = token {
+ token
+ } else {
+ preceded_by_whitespace = true;
+ continue;
+ }
+ }
+ rustc_lexer::TokenKind::Eof => token::Eof,
+ };
+ let span = self.mk_sp(start, self.pos);
+ return (Token::new(kind, span), preceded_by_whitespace);
}
}
@@ -164,171 +332,6 @@ impl<'a> StringReader<'a> {
}
}
- /// Turns simple `rustc_lexer::TokenKind` enum into a rich
- /// `rustc_ast::TokenKind`. This turns strings into interned
- /// symbols and runs additional validation.
- fn cook_lexer_token(&self, token: rustc_lexer::TokenKind, start: BytePos) -> Option<TokenKind> {
- Some(match token {
- rustc_lexer::TokenKind::LineComment { doc_style } => {
- // Skip non-doc comments
- let Some(doc_style) = doc_style else {
- self.lint_unicode_text_flow(start);
- return None;
- };
-
- // Opening delimiter of the length 3 is not included into the symbol.
- let content_start = start + BytePos(3);
- let content = self.str_from(content_start);
- self.cook_doc_comment(content_start, content, CommentKind::Line, doc_style)
- }
- rustc_lexer::TokenKind::BlockComment { doc_style, terminated } => {
- if !terminated {
- self.report_unterminated_block_comment(start, doc_style);
- }
-
- // Skip non-doc comments
- let Some(doc_style) = doc_style else {
- self.lint_unicode_text_flow(start);
- return None;
- };
-
- // Opening delimiter of the length 3 and closing delimiter of the length 2
- // are not included into the symbol.
- let content_start = start + BytePos(3);
- let content_end = self.pos - BytePos(if terminated { 2 } else { 0 });
- let content = self.str_from_to(content_start, content_end);
- self.cook_doc_comment(content_start, content, CommentKind::Block, doc_style)
- }
- rustc_lexer::TokenKind::Whitespace => return None,
- rustc_lexer::TokenKind::Ident
- | rustc_lexer::TokenKind::RawIdent
- | rustc_lexer::TokenKind::UnknownPrefix => {
- let is_raw_ident = token == rustc_lexer::TokenKind::RawIdent;
- let is_unknown_prefix = token == rustc_lexer::TokenKind::UnknownPrefix;
- let mut ident_start = start;
- if is_raw_ident {
- ident_start = ident_start + BytePos(2);
- }
- if is_unknown_prefix {
- self.report_unknown_prefix(start);
- }
- let sym = nfc_normalize(self.str_from(ident_start));
- let span = self.mk_sp(start, self.pos);
- self.sess.symbol_gallery.insert(sym, span);
- if is_raw_ident {
- if !sym.can_be_raw() {
- self.err_span(span, &format!("`{}` cannot be a raw identifier", sym));
- }
- self.sess.raw_identifier_spans.borrow_mut().push(span);
- }
- token::Ident(sym, is_raw_ident)
- }
- rustc_lexer::TokenKind::InvalidIdent
- // Do not recover an identifier with emoji if the codepoint is a confusable
- // with a recoverable substitution token, like `➖`.
- if !UNICODE_ARRAY
- .iter()
- .any(|&(c, _, _)| {
- let sym = self.str_from(start);
- sym.chars().count() == 1 && c == sym.chars().next().unwrap()
- })
- =>
- {
- let sym = nfc_normalize(self.str_from(start));
- let span = self.mk_sp(start, self.pos);
- self.sess.bad_unicode_identifiers.borrow_mut().entry(sym).or_default().push(span);
- token::Ident(sym, false)
- }
- rustc_lexer::TokenKind::Literal { kind, suffix_start } => {
- let suffix_start = start + BytePos(suffix_start);
- let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind);
- let suffix = if suffix_start < self.pos {
- let string = self.str_from(suffix_start);
- if string == "_" {
- self.sess
- .span_diagnostic
- .struct_span_warn(
- self.mk_sp(suffix_start, self.pos),
- "underscore literal suffix is not allowed",
- )
- .warn(
- "this was previously accepted by the compiler but is \
- being phased out; it will become a hard error in \
- a future release!",
- )
- .note(
- "see issue #42326 \
- <https://github.com/rust-lang/rust/issues/42326> \
- for more information",
- )
- .emit();
- None
- } else {
- Some(Symbol::intern(string))
- }
- } else {
- None
- };
- token::Literal(token::Lit { kind, symbol, suffix })
- }
- rustc_lexer::TokenKind::Lifetime { starts_with_number } => {
- // Include the leading `'` in the real identifier, for macro
- // expansion purposes. See #12512 for the gory details of why
- // this is necessary.
- let lifetime_name = self.str_from(start);
- if starts_with_number {
- self.err_span_(start, self.pos, "lifetimes cannot start with a number");
- }
- let ident = Symbol::intern(lifetime_name);
- token::Lifetime(ident)
- }
- rustc_lexer::TokenKind::Semi => token::Semi,
- rustc_lexer::TokenKind::Comma => token::Comma,
- rustc_lexer::TokenKind::Dot => token::Dot,
- rustc_lexer::TokenKind::OpenParen => token::OpenDelim(Delimiter::Parenthesis),
- rustc_lexer::TokenKind::CloseParen => token::CloseDelim(Delimiter::Parenthesis),
- rustc_lexer::TokenKind::OpenBrace => token::OpenDelim(Delimiter::Brace),
- rustc_lexer::TokenKind::CloseBrace => token::CloseDelim(Delimiter::Brace),
- rustc_lexer::TokenKind::OpenBracket => token::OpenDelim(Delimiter::Bracket),
- rustc_lexer::TokenKind::CloseBracket => token::CloseDelim(Delimiter::Bracket),
- rustc_lexer::TokenKind::At => token::At,
- rustc_lexer::TokenKind::Pound => token::Pound,
- rustc_lexer::TokenKind::Tilde => token::Tilde,
- rustc_lexer::TokenKind::Question => token::Question,
- rustc_lexer::TokenKind::Colon => token::Colon,
- rustc_lexer::TokenKind::Dollar => token::Dollar,
- rustc_lexer::TokenKind::Eq => token::Eq,
- rustc_lexer::TokenKind::Bang => token::Not,
- rustc_lexer::TokenKind::Lt => token::Lt,
- rustc_lexer::TokenKind::Gt => token::Gt,
- rustc_lexer::TokenKind::Minus => token::BinOp(token::Minus),
- rustc_lexer::TokenKind::And => token::BinOp(token::And),
- rustc_lexer::TokenKind::Or => token::BinOp(token::Or),
- rustc_lexer::TokenKind::Plus => token::BinOp(token::Plus),
- rustc_lexer::TokenKind::Star => token::BinOp(token::Star),
- rustc_lexer::TokenKind::Slash => token::BinOp(token::Slash),
- rustc_lexer::TokenKind::Caret => token::BinOp(token::Caret),
- rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent),
-
- rustc_lexer::TokenKind::Unknown | rustc_lexer::TokenKind::InvalidIdent => {
- let c = self.str_from(start).chars().next().unwrap();
- let mut err =
- self.struct_err_span_char(start, self.pos, "unknown start of token", c);
- // FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs,
- // instead of keeping a table in `check_for_substitution`into the token. Ideally,
- // this should be inside `rustc_lexer`. However, we should first remove compound
- // tokens like `<<` from `rustc_lexer`, and then add fancier error recovery to it,
- // as there will be less overall work to do this way.
- let token = unicode_chars::check_for_substitution(self, start, c, &mut err);
- if c == '\x00' {
- err.help("source files must contain UTF-8 encoded text, unexpected null bytes might occur when a different encoding is used");
- }
- err.emit();
- token?
- }
- })
- }
-
fn cook_doc_comment(
&self,
content_start: BytePos,
diff --git a/compiler/rustc_parse/src/lexer/tokentrees.rs b/compiler/rustc_parse/src/lexer/tokentrees.rs
index aa70912dc..b2701817d 100644
--- a/compiler/rustc_parse/src/lexer/tokentrees.rs
+++ b/compiler/rustc_parse/src/lexer/tokentrees.rs
@@ -1,31 +1,15 @@
use super::{StringReader, UnmatchedBrace};
-
use rustc_ast::token::{self, Delimiter, Token};
use rustc_ast::tokenstream::{DelimSpan, Spacing, TokenStream, TokenTree};
use rustc_ast_pretty::pprust::token_to_string;
use rustc_data_structures::fx::FxHashMap;
-use rustc_errors::PResult;
+use rustc_errors::{PErr, PResult};
use rustc_span::Span;
-impl<'a> StringReader<'a> {
- pub(super) fn into_token_trees(self) -> (PResult<'a, TokenStream>, Vec<UnmatchedBrace>) {
- let mut tt_reader = TokenTreesReader {
- string_reader: self,
- token: Token::dummy(),
- open_braces: Vec::new(),
- unmatched_braces: Vec::new(),
- matching_delim_spans: Vec::new(),
- last_unclosed_found_span: None,
- last_delim_empty_block_spans: FxHashMap::default(),
- matching_block_spans: Vec::new(),
- };
- let res = tt_reader.parse_all_token_trees();
- (res, tt_reader.unmatched_braces)
- }
-}
-
-struct TokenTreesReader<'a> {
+pub(super) struct TokenTreesReader<'a> {
string_reader: StringReader<'a>,
+ /// The "next" token, which has been obtained from the `StringReader` but
+ /// not yet handled by the `TokenTreesReader`.
token: Token,
/// Stack of open delimiters and their spans. Used for error message.
open_braces: Vec<(Delimiter, Span)>,
@@ -43,254 +27,232 @@ struct TokenTreesReader<'a> {
}
impl<'a> TokenTreesReader<'a> {
- // Parse a stream of tokens into a list of `TokenTree`s, up to an `Eof`.
- fn parse_all_token_trees(&mut self) -> PResult<'a, TokenStream> {
- let mut buf = TokenStreamBuilder::default();
-
- self.bump();
- while self.token != token::Eof {
- buf.push(self.parse_token_tree()?);
- }
-
- Ok(buf.into_token_stream())
+ pub(super) fn parse_all_token_trees(
+ string_reader: StringReader<'a>,
+ ) -> (PResult<'a, TokenStream>, Vec<UnmatchedBrace>) {
+ let mut tt_reader = TokenTreesReader {
+ string_reader,
+ token: Token::dummy(),
+ open_braces: Vec::new(),
+ unmatched_braces: Vec::new(),
+ matching_delim_spans: Vec::new(),
+ last_unclosed_found_span: None,
+ last_delim_empty_block_spans: FxHashMap::default(),
+ matching_block_spans: Vec::new(),
+ };
+ let res = tt_reader.parse_token_trees(/* is_delimited */ false);
+ (res, tt_reader.unmatched_braces)
}
- // Parse a stream of tokens into a list of `TokenTree`s, up to a `CloseDelim`.
- fn parse_token_trees_until_close_delim(&mut self) -> TokenStream {
- let mut buf = TokenStreamBuilder::default();
+ // Parse a stream of tokens into a list of `TokenTree`s.
+ fn parse_token_trees(&mut self, is_delimited: bool) -> PResult<'a, TokenStream> {
+ self.token = self.string_reader.next_token().0;
+ let mut buf = Vec::new();
loop {
- if let token::CloseDelim(..) = self.token.kind {
- return buf.into_token_stream();
- }
-
- match self.parse_token_tree() {
- Ok(tree) => buf.push(tree),
- Err(mut e) => {
- e.emit();
- return buf.into_token_stream();
+ match self.token.kind {
+ token::OpenDelim(delim) => buf.push(self.parse_token_tree_open_delim(delim)),
+ token::CloseDelim(delim) => {
+ return if is_delimited {
+ Ok(TokenStream::new(buf))
+ } else {
+ Err(self.close_delim_err(delim))
+ };
+ }
+ token::Eof => {
+ if is_delimited {
+ self.eof_err().emit();
+ }
+ return Ok(TokenStream::new(buf));
+ }
+ _ => {
+ // Get the next normal token. This might require getting multiple adjacent
+ // single-char tokens and joining them together.
+ let (this_spacing, next_tok) = loop {
+ let (next_tok, is_next_tok_preceded_by_whitespace) =
+ self.string_reader.next_token();
+ if !is_next_tok_preceded_by_whitespace {
+ if let Some(glued) = self.token.glue(&next_tok) {
+ self.token = glued;
+ } else {
+ let this_spacing =
+ if next_tok.is_op() { Spacing::Joint } else { Spacing::Alone };
+ break (this_spacing, next_tok);
+ }
+ } else {
+ break (Spacing::Alone, next_tok);
+ }
+ };
+ let this_tok = std::mem::replace(&mut self.token, next_tok);
+ buf.push(TokenTree::Token(this_tok, this_spacing));
}
}
}
}
- fn parse_token_tree(&mut self) -> PResult<'a, TokenTree> {
- let sm = self.string_reader.sess.source_map();
-
- match self.token.kind {
- token::Eof => {
- let msg = "this file contains an unclosed delimiter";
- let mut err =
- self.string_reader.sess.span_diagnostic.struct_span_err(self.token.span, msg);
- for &(_, sp) in &self.open_braces {
- err.span_label(sp, "unclosed delimiter");
- self.unmatched_braces.push(UnmatchedBrace {
- expected_delim: Delimiter::Brace,
- found_delim: None,
- found_span: self.token.span,
- unclosed_span: Some(sp),
- candidate_span: None,
- });
- }
+ fn eof_err(&mut self) -> PErr<'a> {
+ let msg = "this file contains an unclosed delimiter";
+ let mut err = self.string_reader.sess.span_diagnostic.struct_span_err(self.token.span, msg);
+ for &(_, sp) in &self.open_braces {
+ err.span_label(sp, "unclosed delimiter");
+ self.unmatched_braces.push(UnmatchedBrace {
+ expected_delim: Delimiter::Brace,
+ found_delim: None,
+ found_span: self.token.span,
+ unclosed_span: Some(sp),
+ candidate_span: None,
+ });
+ }
- if let Some((delim, _)) = self.open_braces.last() {
- if let Some((_, open_sp, close_sp)) =
- self.matching_delim_spans.iter().find(|(d, open_sp, close_sp)| {
- if let Some(close_padding) = sm.span_to_margin(*close_sp) {
- if let Some(open_padding) = sm.span_to_margin(*open_sp) {
- return delim == d && close_padding != open_padding;
- }
- }
- false
- })
- // these are in reverse order as they get inserted on close, but
- {
- // we want the last open/first close
- err.span_label(*open_sp, "this delimiter might not be properly closed...");
- err.span_label(
- *close_sp,
- "...as it matches this but it has different indentation",
- );
+ if let Some((delim, _)) = self.open_braces.last() {
+ if let Some((_, open_sp, close_sp)) =
+ self.matching_delim_spans.iter().find(|(d, open_sp, close_sp)| {
+ let sm = self.string_reader.sess.source_map();
+ if let Some(close_padding) = sm.span_to_margin(*close_sp) {
+ if let Some(open_padding) = sm.span_to_margin(*open_sp) {
+ return delim == d && close_padding != open_padding;
+ }
}
- }
- Err(err)
+ false
+ })
+ // these are in reverse order as they get inserted on close, but
+ {
+ // we want the last open/first close
+ err.span_label(*open_sp, "this delimiter might not be properly closed...");
+ err.span_label(*close_sp, "...as it matches this but it has different indentation");
}
- token::OpenDelim(delim) => {
- // The span for beginning of the delimited section
- let pre_span = self.token.span;
-
- // Parse the open delimiter.
- self.open_braces.push((delim, self.token.span));
- self.bump();
+ }
+ err
+ }
- // Parse the token trees within the delimiters.
- // We stop at any delimiter so we can try to recover if the user
- // uses an incorrect delimiter.
- let tts = self.parse_token_trees_until_close_delim();
+ fn parse_token_tree_open_delim(&mut self, open_delim: Delimiter) -> TokenTree {
+ // The span for beginning of the delimited section
+ let pre_span = self.token.span;
- // Expand to cover the entire delimited token tree
- let delim_span = DelimSpan::from_pair(pre_span, self.token.span);
+ self.open_braces.push((open_delim, self.token.span));
- match self.token.kind {
- // Correct delimiter.
- token::CloseDelim(d) if d == delim => {
- let (open_brace, open_brace_span) = self.open_braces.pop().unwrap();
- let close_brace_span = self.token.span;
+ // Parse the token trees within the delimiters.
+ // We stop at any delimiter so we can try to recover if the user
+ // uses an incorrect delimiter.
+ let tts = self.parse_token_trees(/* is_delimited */ true).unwrap();
- if tts.is_empty() {
- let empty_block_span = open_brace_span.to(close_brace_span);
- if !sm.is_multiline(empty_block_span) {
- // Only track if the block is in the form of `{}`, otherwise it is
- // likely that it was written on purpose.
- self.last_delim_empty_block_spans.insert(delim, empty_block_span);
- }
- }
+ // Expand to cover the entire delimited token tree
+ let delim_span = DelimSpan::from_pair(pre_span, self.token.span);
- //only add braces
- if let (Delimiter::Brace, Delimiter::Brace) = (open_brace, delim) {
- self.matching_block_spans.push((open_brace_span, close_brace_span));
- }
+ match self.token.kind {
+ // Correct delimiter.
+ token::CloseDelim(close_delim) if close_delim == open_delim => {
+ let (open_brace, open_brace_span) = self.open_braces.pop().unwrap();
+ let close_brace_span = self.token.span;
- if self.open_braces.is_empty() {
- // Clear up these spans to avoid suggesting them as we've found
- // properly matched delimiters so far for an entire block.
- self.matching_delim_spans.clear();
- } else {
- self.matching_delim_spans.push((
- open_brace,
- open_brace_span,
- close_brace_span,
- ));
- }
- // Parse the closing delimiter.
- self.bump();
+ if tts.is_empty() {
+ let empty_block_span = open_brace_span.to(close_brace_span);
+ let sm = self.string_reader.sess.source_map();
+ if !sm.is_multiline(empty_block_span) {
+ // Only track if the block is in the form of `{}`, otherwise it is
+ // likely that it was written on purpose.
+ self.last_delim_empty_block_spans.insert(open_delim, empty_block_span);
}
- // Incorrect delimiter.
- token::CloseDelim(other) => {
- let mut unclosed_delimiter = None;
- let mut candidate = None;
-
- if self.last_unclosed_found_span != Some(self.token.span) {
- // do not complain about the same unclosed delimiter multiple times
- self.last_unclosed_found_span = Some(self.token.span);
- // This is a conservative error: only report the last unclosed
- // delimiter. The previous unclosed delimiters could actually be
- // closed! The parser just hasn't gotten to them yet.
- if let Some(&(_, sp)) = self.open_braces.last() {
- unclosed_delimiter = Some(sp);
- };
- if let Some(current_padding) = sm.span_to_margin(self.token.span) {
- for (brace, brace_span) in &self.open_braces {
- if let Some(padding) = sm.span_to_margin(*brace_span) {
- // high likelihood of these two corresponding
- if current_padding == padding && brace == &other {
- candidate = Some(*brace_span);
- }
- }
- }
- }
- let (tok, _) = self.open_braces.pop().unwrap();
- self.unmatched_braces.push(UnmatchedBrace {
- expected_delim: tok,
- found_delim: Some(other),
- found_span: self.token.span,
- unclosed_span: unclosed_delimiter,
- candidate_span: candidate,
- });
- } else {
- self.open_braces.pop();
- }
+ }
- // If the incorrect delimiter matches an earlier opening
- // delimiter, then don't consume it (it can be used to
- // close the earlier one). Otherwise, consume it.
- // E.g., we try to recover from:
- // fn foo() {
- // bar(baz(
- // } // Incorrect delimiter but matches the earlier `{`
- if !self.open_braces.iter().any(|&(b, _)| b == other) {
- self.bump();
- }
- }
- token::Eof => {
- // Silently recover, the EOF token will be seen again
- // and an error emitted then. Thus we don't pop from
- // self.open_braces here.
- }
- _ => {}
+ //only add braces
+ if let (Delimiter::Brace, Delimiter::Brace) = (open_brace, open_delim) {
+ self.matching_block_spans.push((open_brace_span, close_brace_span));
}
- Ok(TokenTree::Delimited(delim_span, delim, tts))
+ if self.open_braces.is_empty() {
+ // Clear up these spans to avoid suggesting them as we've found
+ // properly matched delimiters so far for an entire block.
+ self.matching_delim_spans.clear();
+ } else {
+ self.matching_delim_spans.push((open_brace, open_brace_span, close_brace_span));
+ }
+ // Move past the closing delimiter.
+ self.token = self.string_reader.next_token().0;
}
- token::CloseDelim(delim) => {
- // An unexpected closing delimiter (i.e., there is no
- // matching opening delimiter).
- let token_str = token_to_string(&self.token);
- let msg = format!("unexpected closing delimiter: `{}`", token_str);
- let mut err =
- self.string_reader.sess.span_diagnostic.struct_span_err(self.token.span, &msg);
+ // Incorrect delimiter.
+ token::CloseDelim(close_delim) => {
+ let mut unclosed_delimiter = None;
+ let mut candidate = None;
- // Braces are added at the end, so the last element is the biggest block
- if let Some(parent) = self.matching_block_spans.last() {
- if let Some(span) = self.last_delim_empty_block_spans.remove(&delim) {
- // Check if the (empty block) is in the last properly closed block
- if (parent.0.to(parent.1)).contains(span) {
- err.span_label(
- span,
- "block is empty, you might have not meant to close it",
- );
- } else {
- err.span_label(parent.0, "this opening brace...");
-
- err.span_label(parent.1, "...matches this closing brace");
+ if self.last_unclosed_found_span != Some(self.token.span) {
+ // do not complain about the same unclosed delimiter multiple times
+ self.last_unclosed_found_span = Some(self.token.span);
+ // This is a conservative error: only report the last unclosed
+ // delimiter. The previous unclosed delimiters could actually be
+ // closed! The parser just hasn't gotten to them yet.
+ if let Some(&(_, sp)) = self.open_braces.last() {
+ unclosed_delimiter = Some(sp);
+ };
+ let sm = self.string_reader.sess.source_map();
+ if let Some(current_padding) = sm.span_to_margin(self.token.span) {
+ for (brace, brace_span) in &self.open_braces {
+ if let Some(padding) = sm.span_to_margin(*brace_span) {
+ // high likelihood of these two corresponding
+ if current_padding == padding && brace == &close_delim {
+ candidate = Some(*brace_span);
+ }
+ }
}
- } else {
- err.span_label(parent.0, "this opening brace...");
-
- err.span_label(parent.1, "...matches this closing brace");
}
+ let (tok, _) = self.open_braces.pop().unwrap();
+ self.unmatched_braces.push(UnmatchedBrace {
+ expected_delim: tok,
+ found_delim: Some(close_delim),
+ found_span: self.token.span,
+ unclosed_span: unclosed_delimiter,
+ candidate_span: candidate,
+ });
+ } else {
+ self.open_braces.pop();
}
- err.span_label(self.token.span, "unexpected closing delimiter");
- Err(err)
- }
- _ => {
- let tok = self.token.take();
- let mut spacing = self.bump();
- if !self.token.is_op() {
- spacing = Spacing::Alone;
+ // If the incorrect delimiter matches an earlier opening
+ // delimiter, then don't consume it (it can be used to
+ // close the earlier one). Otherwise, consume it.
+ // E.g., we try to recover from:
+ // fn foo() {
+ // bar(baz(
+ // } // Incorrect delimiter but matches the earlier `{`
+ if !self.open_braces.iter().any(|&(b, _)| b == close_delim) {
+ self.token = self.string_reader.next_token().0;
}
- Ok(TokenTree::Token(tok, spacing))
}
+ token::Eof => {
+ // Silently recover, the EOF token will be seen again
+ // and an error emitted then. Thus we don't pop from
+ // self.open_braces here.
+ }
+ _ => unreachable!(),
}
- }
- fn bump(&mut self) -> Spacing {
- let (spacing, token) = self.string_reader.next_token();
- self.token = token;
- spacing
+ TokenTree::Delimited(delim_span, open_delim, tts)
}
-}
-#[derive(Default)]
-struct TokenStreamBuilder {
- buf: Vec<TokenTree>,
-}
+ fn close_delim_err(&mut self, delim: Delimiter) -> PErr<'a> {
+ // An unexpected closing delimiter (i.e., there is no
+ // matching opening delimiter).
+ let token_str = token_to_string(&self.token);
+ let msg = format!("unexpected closing delimiter: `{}`", token_str);
+ let mut err =
+ self.string_reader.sess.span_diagnostic.struct_span_err(self.token.span, &msg);
-impl TokenStreamBuilder {
- #[inline(always)]
- fn push(&mut self, tree: TokenTree) {
- if let Some(TokenTree::Token(prev_token, Spacing::Joint)) = self.buf.last()
- && let TokenTree::Token(token, joint) = &tree
- && let Some(glued) = prev_token.glue(token)
- {
- self.buf.pop();
- self.buf.push(TokenTree::Token(glued, *joint));
- } else {
- self.buf.push(tree)
+ // Braces are added at the end, so the last element is the biggest block
+ if let Some(parent) = self.matching_block_spans.last() {
+ if let Some(span) = self.last_delim_empty_block_spans.remove(&delim) {
+ // Check if the (empty block) is in the last properly closed block
+ if (parent.0.to(parent.1)).contains(span) {
+ err.span_label(span, "block is empty, you might have not meant to close it");
+ } else {
+ err.span_label(parent.0, "this opening brace...");
+ err.span_label(parent.1, "...matches this closing brace");
+ }
+ } else {
+ err.span_label(parent.0, "this opening brace...");
+ err.span_label(parent.1, "...matches this closing brace");
+ }
}
- }
- fn into_token_stream(self) -> TokenStream {
- TokenStream::new(self.buf)
+ err.span_label(self.token.span, "unexpected closing delimiter");
+ err
}
}
diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
index 273827864..f075de714 100644
--- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
+++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
@@ -20,13 +20,9 @@ pub(crate) fn emit_unescape_error(
range: Range<usize>,
error: EscapeError,
) {
- tracing::debug!(
+ debug!(
"emit_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}",
- lit,
- span_with_quotes,
- mode,
- range,
- error
+ lit, span_with_quotes, mode, range, error
);
let last_char = || {
let c = lit[range.clone()].chars().rev().next().unwrap();
@@ -117,11 +113,26 @@ pub(crate) fn emit_unescape_error(
} else {
("", "if you meant to write a `str` literal, use double quotes")
};
-
+ let mut escaped = String::with_capacity(lit.len());
+ let mut chrs = lit.chars().peekable();
+ while let Some(first) = chrs.next() {
+ match (first, chrs.peek()) {
+ ('\\', Some('"')) => {
+ escaped.push('\\');
+ escaped.push('"');
+ chrs.next();
+ }
+ ('"', _) => {
+ escaped.push('\\');
+ escaped.push('"')
+ }
+ (c, _) => escaped.push(c),
+ };
+ }
handler.span_suggestion(
span_with_quotes,
msg,
- format!("{}\"{}\"", prefix, lit),
+ format!("{prefix}\"{escaped}\""),
Applicability::MachineApplicable,
);
}