diff options
Diffstat (limited to '')
-rw-r--r-- | compiler/rustc_parse/src/lexer/mod.rs | 397 | ||||
-rw-r--r-- | compiler/rustc_parse/src/lexer/tokentrees.rs | 428 | ||||
-rw-r--r-- | compiler/rustc_parse/src/lexer/unescape_error_reporting.rs | 19 |
3 files changed, 413 insertions, 431 deletions
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 63819a2f9..462bce16a 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -1,10 +1,13 @@ use crate::lexer::unicode_chars::UNICODE_ARRAY; use rustc_ast::ast::{self, AttrStyle}; use rustc_ast::token::{self, CommentKind, Delimiter, Token, TokenKind}; -use rustc_ast::tokenstream::{Spacing, TokenStream}; +use rustc_ast::tokenstream::TokenStream; use rustc_ast::util::unicode::contains_text_flow_control_chars; -use rustc_errors::{error_code, Applicability, DiagnosticBuilder, ErrorGuaranteed, PResult}; +use rustc_errors::{ + error_code, Applicability, DiagnosticBuilder, ErrorGuaranteed, PResult, StashKey, +}; use rustc_lexer::unescape::{self, Mode}; +use rustc_lexer::Cursor; use rustc_lexer::{Base, DocStyle, RawStrError}; use rustc_session::lint::builtin::{ RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, TEXT_DIRECTION_CODEPOINT_IN_COMMENT, @@ -38,11 +41,20 @@ pub struct UnmatchedBrace { pub(crate) fn parse_token_trees<'a>( sess: &'a ParseSess, - src: &'a str, - start_pos: BytePos, + mut src: &'a str, + mut start_pos: BytePos, override_span: Option<Span>, ) -> (PResult<'a, TokenStream>, Vec<UnmatchedBrace>) { - StringReader { sess, start_pos, pos: start_pos, src, override_span }.into_token_trees() + // Skip `#!`, if present. + if let Some(shebang_len) = rustc_lexer::strip_shebang(src) { + src = &src[shebang_len..]; + start_pos = start_pos + BytePos::from_usize(shebang_len); + } + + let cursor = Cursor::new(src); + let string_reader = + StringReader { sess, start_pos, pos: start_pos, src, cursor, override_span }; + tokentrees::TokenTreesReader::parse_all_token_trees(string_reader) } struct StringReader<'a> { @@ -53,6 +65,8 @@ struct StringReader<'a> { pos: BytePos, /// Source text to tokenize. src: &'a str, + /// Cursor for getting lexer tokens. + cursor: Cursor<'a>, override_span: Option<Span>, } @@ -61,42 +75,198 @@ impl<'a> StringReader<'a> { self.override_span.unwrap_or_else(|| Span::with_root_ctxt(lo, hi)) } - /// Returns the next token, and info about preceding whitespace, if any. - fn next_token(&mut self) -> (Spacing, Token) { - let mut spacing = Spacing::Joint; - - // Skip `#!` at the start of the file - if self.pos == self.start_pos - && let Some(shebang_len) = rustc_lexer::strip_shebang(self.src) - { - self.pos = self.pos + BytePos::from_usize(shebang_len); - spacing = Spacing::Alone; - } + /// Returns the next token, paired with a bool indicating if the token was + /// preceded by whitespace. + fn next_token(&mut self) -> (Token, bool) { + let mut preceded_by_whitespace = false; // Skip trivial (whitespace & comments) tokens loop { - let start_src_index = self.src_index(self.pos); - let text: &str = &self.src[start_src_index..]; - - if text.is_empty() { - let span = self.mk_sp(self.pos, self.pos); - return (spacing, Token::new(token::Eof, span)); - } - - let token = rustc_lexer::first_token(text); - + let token = self.cursor.advance_token(); let start = self.pos; self.pos = self.pos + BytePos(token.len); debug!("next_token: {:?}({:?})", token.kind, self.str_from(start)); - match self.cook_lexer_token(token.kind, start) { - Some(kind) => { + // Now "cook" the token, converting the simple `rustc_lexer::TokenKind` enum into a + // rich `rustc_ast::TokenKind`. This turns strings into interned symbols and runs + // additional validation. + let kind = match token.kind { + rustc_lexer::TokenKind::LineComment { doc_style } => { + // Skip non-doc comments + let Some(doc_style) = doc_style else { + self.lint_unicode_text_flow(start); + preceded_by_whitespace = true; + continue; + }; + + // Opening delimiter of the length 3 is not included into the symbol. + let content_start = start + BytePos(3); + let content = self.str_from(content_start); + self.cook_doc_comment(content_start, content, CommentKind::Line, doc_style) + } + rustc_lexer::TokenKind::BlockComment { doc_style, terminated } => { + if !terminated { + self.report_unterminated_block_comment(start, doc_style); + } + + // Skip non-doc comments + let Some(doc_style) = doc_style else { + self.lint_unicode_text_flow(start); + preceded_by_whitespace = true; + continue; + }; + + // Opening delimiter of the length 3 and closing delimiter of the length 2 + // are not included into the symbol. + let content_start = start + BytePos(3); + let content_end = self.pos - BytePos(if terminated { 2 } else { 0 }); + let content = self.str_from_to(content_start, content_end); + self.cook_doc_comment(content_start, content, CommentKind::Block, doc_style) + } + rustc_lexer::TokenKind::Whitespace => { + preceded_by_whitespace = true; + continue; + } + rustc_lexer::TokenKind::Ident => { + let sym = nfc_normalize(self.str_from(start)); let span = self.mk_sp(start, self.pos); - return (spacing, Token::new(kind, span)); + self.sess.symbol_gallery.insert(sym, span); + token::Ident(sym, false) } - None => spacing = Spacing::Alone, - } + rustc_lexer::TokenKind::RawIdent => { + let sym = nfc_normalize(self.str_from(start + BytePos(2))); + let span = self.mk_sp(start, self.pos); + self.sess.symbol_gallery.insert(sym, span); + if !sym.can_be_raw() { + self.err_span(span, &format!("`{}` cannot be a raw identifier", sym)); + } + self.sess.raw_identifier_spans.borrow_mut().push(span); + token::Ident(sym, true) + } + rustc_lexer::TokenKind::UnknownPrefix => { + self.report_unknown_prefix(start); + let sym = nfc_normalize(self.str_from(start)); + let span = self.mk_sp(start, self.pos); + self.sess.symbol_gallery.insert(sym, span); + token::Ident(sym, false) + } + rustc_lexer::TokenKind::InvalidIdent + // Do not recover an identifier with emoji if the codepoint is a confusable + // with a recoverable substitution token, like `➖`. + if !UNICODE_ARRAY + .iter() + .any(|&(c, _, _)| { + let sym = self.str_from(start); + sym.chars().count() == 1 && c == sym.chars().next().unwrap() + }) => + { + let sym = nfc_normalize(self.str_from(start)); + let span = self.mk_sp(start, self.pos); + self.sess.bad_unicode_identifiers.borrow_mut().entry(sym).or_default() + .push(span); + token::Ident(sym, false) + } + rustc_lexer::TokenKind::Literal { kind, suffix_start } => { + let suffix_start = start + BytePos(suffix_start); + let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind); + let suffix = if suffix_start < self.pos { + let string = self.str_from(suffix_start); + if string == "_" { + self.sess + .span_diagnostic + .struct_span_warn( + self.mk_sp(suffix_start, self.pos), + "underscore literal suffix is not allowed", + ) + .warn( + "this was previously accepted by the compiler but is \ + being phased out; it will become a hard error in \ + a future release!", + ) + .note( + "see issue #42326 \ + <https://github.com/rust-lang/rust/issues/42326> \ + for more information", + ) + .emit(); + None + } else { + Some(Symbol::intern(string)) + } + } else { + None + }; + token::Literal(token::Lit { kind, symbol, suffix }) + } + rustc_lexer::TokenKind::Lifetime { starts_with_number } => { + // Include the leading `'` in the real identifier, for macro + // expansion purposes. See #12512 for the gory details of why + // this is necessary. + let lifetime_name = self.str_from(start); + if starts_with_number { + let span = self.mk_sp(start, self.pos); + let mut diag = self.sess.struct_err("lifetimes cannot start with a number"); + diag.set_span(span); + diag.stash(span, StashKey::LifetimeIsChar); + } + let ident = Symbol::intern(lifetime_name); + token::Lifetime(ident) + } + rustc_lexer::TokenKind::Semi => token::Semi, + rustc_lexer::TokenKind::Comma => token::Comma, + rustc_lexer::TokenKind::Dot => token::Dot, + rustc_lexer::TokenKind::OpenParen => token::OpenDelim(Delimiter::Parenthesis), + rustc_lexer::TokenKind::CloseParen => token::CloseDelim(Delimiter::Parenthesis), + rustc_lexer::TokenKind::OpenBrace => token::OpenDelim(Delimiter::Brace), + rustc_lexer::TokenKind::CloseBrace => token::CloseDelim(Delimiter::Brace), + rustc_lexer::TokenKind::OpenBracket => token::OpenDelim(Delimiter::Bracket), + rustc_lexer::TokenKind::CloseBracket => token::CloseDelim(Delimiter::Bracket), + rustc_lexer::TokenKind::At => token::At, + rustc_lexer::TokenKind::Pound => token::Pound, + rustc_lexer::TokenKind::Tilde => token::Tilde, + rustc_lexer::TokenKind::Question => token::Question, + rustc_lexer::TokenKind::Colon => token::Colon, + rustc_lexer::TokenKind::Dollar => token::Dollar, + rustc_lexer::TokenKind::Eq => token::Eq, + rustc_lexer::TokenKind::Bang => token::Not, + rustc_lexer::TokenKind::Lt => token::Lt, + rustc_lexer::TokenKind::Gt => token::Gt, + rustc_lexer::TokenKind::Minus => token::BinOp(token::Minus), + rustc_lexer::TokenKind::And => token::BinOp(token::And), + rustc_lexer::TokenKind::Or => token::BinOp(token::Or), + rustc_lexer::TokenKind::Plus => token::BinOp(token::Plus), + rustc_lexer::TokenKind::Star => token::BinOp(token::Star), + rustc_lexer::TokenKind::Slash => token::BinOp(token::Slash), + rustc_lexer::TokenKind::Caret => token::BinOp(token::Caret), + rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent), + + rustc_lexer::TokenKind::Unknown | rustc_lexer::TokenKind::InvalidIdent => { + let c = self.str_from(start).chars().next().unwrap(); + let mut err = + self.struct_err_span_char(start, self.pos, "unknown start of token", c); + // FIXME: the lexer could be used to turn the ASCII version of unicode + // homoglyphs, instead of keeping a table in `check_for_substitution`into the + // token. Ideally, this should be inside `rustc_lexer`. However, we should + // first remove compound tokens like `<<` from `rustc_lexer`, and then add + // fancier error recovery to it, as there will be less overall work to do this + // way. + let token = unicode_chars::check_for_substitution(self, start, c, &mut err); + if c == '\x00' { + err.help("source files must contain UTF-8 encoded text, unexpected null bytes might occur when a different encoding is used"); + } + err.emit(); + if let Some(token) = token { + token + } else { + preceded_by_whitespace = true; + continue; + } + } + rustc_lexer::TokenKind::Eof => token::Eof, + }; + let span = self.mk_sp(start, self.pos); + return (Token::new(kind, span), preceded_by_whitespace); } } @@ -162,171 +332,6 @@ impl<'a> StringReader<'a> { } } - /// Turns simple `rustc_lexer::TokenKind` enum into a rich - /// `rustc_ast::TokenKind`. This turns strings into interned - /// symbols and runs additional validation. - fn cook_lexer_token(&self, token: rustc_lexer::TokenKind, start: BytePos) -> Option<TokenKind> { - Some(match token { - rustc_lexer::TokenKind::LineComment { doc_style } => { - // Skip non-doc comments - let Some(doc_style) = doc_style else { - self.lint_unicode_text_flow(start); - return None; - }; - - // Opening delimiter of the length 3 is not included into the symbol. - let content_start = start + BytePos(3); - let content = self.str_from(content_start); - self.cook_doc_comment(content_start, content, CommentKind::Line, doc_style) - } - rustc_lexer::TokenKind::BlockComment { doc_style, terminated } => { - if !terminated { - self.report_unterminated_block_comment(start, doc_style); - } - - // Skip non-doc comments - let Some(doc_style) = doc_style else { - self.lint_unicode_text_flow(start); - return None; - }; - - // Opening delimiter of the length 3 and closing delimiter of the length 2 - // are not included into the symbol. - let content_start = start + BytePos(3); - let content_end = self.pos - BytePos(if terminated { 2 } else { 0 }); - let content = self.str_from_to(content_start, content_end); - self.cook_doc_comment(content_start, content, CommentKind::Block, doc_style) - } - rustc_lexer::TokenKind::Whitespace => return None, - rustc_lexer::TokenKind::Ident - | rustc_lexer::TokenKind::RawIdent - | rustc_lexer::TokenKind::UnknownPrefix => { - let is_raw_ident = token == rustc_lexer::TokenKind::RawIdent; - let is_unknown_prefix = token == rustc_lexer::TokenKind::UnknownPrefix; - let mut ident_start = start; - if is_raw_ident { - ident_start = ident_start + BytePos(2); - } - if is_unknown_prefix { - self.report_unknown_prefix(start); - } - let sym = nfc_normalize(self.str_from(ident_start)); - let span = self.mk_sp(start, self.pos); - self.sess.symbol_gallery.insert(sym, span); - if is_raw_ident { - if !sym.can_be_raw() { - self.err_span(span, &format!("`{}` cannot be a raw identifier", sym)); - } - self.sess.raw_identifier_spans.borrow_mut().push(span); - } - token::Ident(sym, is_raw_ident) - } - rustc_lexer::TokenKind::InvalidIdent - // Do not recover an identifier with emoji if the codepoint is a confusable - // with a recoverable substitution token, like `➖`. - if !UNICODE_ARRAY - .iter() - .any(|&(c, _, _)| { - let sym = self.str_from(start); - sym.chars().count() == 1 && c == sym.chars().next().unwrap() - }) - => - { - let sym = nfc_normalize(self.str_from(start)); - let span = self.mk_sp(start, self.pos); - self.sess.bad_unicode_identifiers.borrow_mut().entry(sym).or_default().push(span); - token::Ident(sym, false) - } - rustc_lexer::TokenKind::Literal { kind, suffix_start } => { - let suffix_start = start + BytePos(suffix_start); - let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind); - let suffix = if suffix_start < self.pos { - let string = self.str_from(suffix_start); - if string == "_" { - self.sess - .span_diagnostic - .struct_span_warn( - self.mk_sp(suffix_start, self.pos), - "underscore literal suffix is not allowed", - ) - .warn( - "this was previously accepted by the compiler but is \ - being phased out; it will become a hard error in \ - a future release!", - ) - .note( - "see issue #42326 \ - <https://github.com/rust-lang/rust/issues/42326> \ - for more information", - ) - .emit(); - None - } else { - Some(Symbol::intern(string)) - } - } else { - None - }; - token::Literal(token::Lit { kind, symbol, suffix }) - } - rustc_lexer::TokenKind::Lifetime { starts_with_number } => { - // Include the leading `'` in the real identifier, for macro - // expansion purposes. See #12512 for the gory details of why - // this is necessary. - let lifetime_name = self.str_from(start); - if starts_with_number { - self.err_span_(start, self.pos, "lifetimes cannot start with a number"); - } - let ident = Symbol::intern(lifetime_name); - token::Lifetime(ident) - } - rustc_lexer::TokenKind::Semi => token::Semi, - rustc_lexer::TokenKind::Comma => token::Comma, - rustc_lexer::TokenKind::Dot => token::Dot, - rustc_lexer::TokenKind::OpenParen => token::OpenDelim(Delimiter::Parenthesis), - rustc_lexer::TokenKind::CloseParen => token::CloseDelim(Delimiter::Parenthesis), - rustc_lexer::TokenKind::OpenBrace => token::OpenDelim(Delimiter::Brace), - rustc_lexer::TokenKind::CloseBrace => token::CloseDelim(Delimiter::Brace), - rustc_lexer::TokenKind::OpenBracket => token::OpenDelim(Delimiter::Bracket), - rustc_lexer::TokenKind::CloseBracket => token::CloseDelim(Delimiter::Bracket), - rustc_lexer::TokenKind::At => token::At, - rustc_lexer::TokenKind::Pound => token::Pound, - rustc_lexer::TokenKind::Tilde => token::Tilde, - rustc_lexer::TokenKind::Question => token::Question, - rustc_lexer::TokenKind::Colon => token::Colon, - rustc_lexer::TokenKind::Dollar => token::Dollar, - rustc_lexer::TokenKind::Eq => token::Eq, - rustc_lexer::TokenKind::Bang => token::Not, - rustc_lexer::TokenKind::Lt => token::Lt, - rustc_lexer::TokenKind::Gt => token::Gt, - rustc_lexer::TokenKind::Minus => token::BinOp(token::Minus), - rustc_lexer::TokenKind::And => token::BinOp(token::And), - rustc_lexer::TokenKind::Or => token::BinOp(token::Or), - rustc_lexer::TokenKind::Plus => token::BinOp(token::Plus), - rustc_lexer::TokenKind::Star => token::BinOp(token::Star), - rustc_lexer::TokenKind::Slash => token::BinOp(token::Slash), - rustc_lexer::TokenKind::Caret => token::BinOp(token::Caret), - rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent), - - rustc_lexer::TokenKind::Unknown | rustc_lexer::TokenKind::InvalidIdent => { - let c = self.str_from(start).chars().next().unwrap(); - let mut err = - self.struct_err_span_char(start, self.pos, "unknown start of token", c); - // FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs, - // instead of keeping a table in `check_for_substitution`into the token. Ideally, - // this should be inside `rustc_lexer`. However, we should first remove compound - // tokens like `<<` from `rustc_lexer`, and then add fancier error recovery to it, - // as there will be less overall work to do this way. - let token = unicode_chars::check_for_substitution(self, start, c, &mut err); - if c == '\x00' { - err.help("source files must contain UTF-8 encoded text, unexpected null bytes might occur when a different encoding is used"); - } - err.emit(); - token? - } - }) - } - fn cook_doc_comment( &self, content_start: BytePos, diff --git a/compiler/rustc_parse/src/lexer/tokentrees.rs b/compiler/rustc_parse/src/lexer/tokentrees.rs index aa70912dc..b2701817d 100644 --- a/compiler/rustc_parse/src/lexer/tokentrees.rs +++ b/compiler/rustc_parse/src/lexer/tokentrees.rs @@ -1,31 +1,15 @@ use super::{StringReader, UnmatchedBrace}; - use rustc_ast::token::{self, Delimiter, Token}; use rustc_ast::tokenstream::{DelimSpan, Spacing, TokenStream, TokenTree}; use rustc_ast_pretty::pprust::token_to_string; use rustc_data_structures::fx::FxHashMap; -use rustc_errors::PResult; +use rustc_errors::{PErr, PResult}; use rustc_span::Span; -impl<'a> StringReader<'a> { - pub(super) fn into_token_trees(self) -> (PResult<'a, TokenStream>, Vec<UnmatchedBrace>) { - let mut tt_reader = TokenTreesReader { - string_reader: self, - token: Token::dummy(), - open_braces: Vec::new(), - unmatched_braces: Vec::new(), - matching_delim_spans: Vec::new(), - last_unclosed_found_span: None, - last_delim_empty_block_spans: FxHashMap::default(), - matching_block_spans: Vec::new(), - }; - let res = tt_reader.parse_all_token_trees(); - (res, tt_reader.unmatched_braces) - } -} - -struct TokenTreesReader<'a> { +pub(super) struct TokenTreesReader<'a> { string_reader: StringReader<'a>, + /// The "next" token, which has been obtained from the `StringReader` but + /// not yet handled by the `TokenTreesReader`. token: Token, /// Stack of open delimiters and their spans. Used for error message. open_braces: Vec<(Delimiter, Span)>, @@ -43,254 +27,232 @@ struct TokenTreesReader<'a> { } impl<'a> TokenTreesReader<'a> { - // Parse a stream of tokens into a list of `TokenTree`s, up to an `Eof`. - fn parse_all_token_trees(&mut self) -> PResult<'a, TokenStream> { - let mut buf = TokenStreamBuilder::default(); - - self.bump(); - while self.token != token::Eof { - buf.push(self.parse_token_tree()?); - } - - Ok(buf.into_token_stream()) + pub(super) fn parse_all_token_trees( + string_reader: StringReader<'a>, + ) -> (PResult<'a, TokenStream>, Vec<UnmatchedBrace>) { + let mut tt_reader = TokenTreesReader { + string_reader, + token: Token::dummy(), + open_braces: Vec::new(), + unmatched_braces: Vec::new(), + matching_delim_spans: Vec::new(), + last_unclosed_found_span: None, + last_delim_empty_block_spans: FxHashMap::default(), + matching_block_spans: Vec::new(), + }; + let res = tt_reader.parse_token_trees(/* is_delimited */ false); + (res, tt_reader.unmatched_braces) } - // Parse a stream of tokens into a list of `TokenTree`s, up to a `CloseDelim`. - fn parse_token_trees_until_close_delim(&mut self) -> TokenStream { - let mut buf = TokenStreamBuilder::default(); + // Parse a stream of tokens into a list of `TokenTree`s. + fn parse_token_trees(&mut self, is_delimited: bool) -> PResult<'a, TokenStream> { + self.token = self.string_reader.next_token().0; + let mut buf = Vec::new(); loop { - if let token::CloseDelim(..) = self.token.kind { - return buf.into_token_stream(); - } - - match self.parse_token_tree() { - Ok(tree) => buf.push(tree), - Err(mut e) => { - e.emit(); - return buf.into_token_stream(); + match self.token.kind { + token::OpenDelim(delim) => buf.push(self.parse_token_tree_open_delim(delim)), + token::CloseDelim(delim) => { + return if is_delimited { + Ok(TokenStream::new(buf)) + } else { + Err(self.close_delim_err(delim)) + }; + } + token::Eof => { + if is_delimited { + self.eof_err().emit(); + } + return Ok(TokenStream::new(buf)); + } + _ => { + // Get the next normal token. This might require getting multiple adjacent + // single-char tokens and joining them together. + let (this_spacing, next_tok) = loop { + let (next_tok, is_next_tok_preceded_by_whitespace) = + self.string_reader.next_token(); + if !is_next_tok_preceded_by_whitespace { + if let Some(glued) = self.token.glue(&next_tok) { + self.token = glued; + } else { + let this_spacing = + if next_tok.is_op() { Spacing::Joint } else { Spacing::Alone }; + break (this_spacing, next_tok); + } + } else { + break (Spacing::Alone, next_tok); + } + }; + let this_tok = std::mem::replace(&mut self.token, next_tok); + buf.push(TokenTree::Token(this_tok, this_spacing)); } } } } - fn parse_token_tree(&mut self) -> PResult<'a, TokenTree> { - let sm = self.string_reader.sess.source_map(); - - match self.token.kind { - token::Eof => { - let msg = "this file contains an unclosed delimiter"; - let mut err = - self.string_reader.sess.span_diagnostic.struct_span_err(self.token.span, msg); - for &(_, sp) in &self.open_braces { - err.span_label(sp, "unclosed delimiter"); - self.unmatched_braces.push(UnmatchedBrace { - expected_delim: Delimiter::Brace, - found_delim: None, - found_span: self.token.span, - unclosed_span: Some(sp), - candidate_span: None, - }); - } + fn eof_err(&mut self) -> PErr<'a> { + let msg = "this file contains an unclosed delimiter"; + let mut err = self.string_reader.sess.span_diagnostic.struct_span_err(self.token.span, msg); + for &(_, sp) in &self.open_braces { + err.span_label(sp, "unclosed delimiter"); + self.unmatched_braces.push(UnmatchedBrace { + expected_delim: Delimiter::Brace, + found_delim: None, + found_span: self.token.span, + unclosed_span: Some(sp), + candidate_span: None, + }); + } - if let Some((delim, _)) = self.open_braces.last() { - if let Some((_, open_sp, close_sp)) = - self.matching_delim_spans.iter().find(|(d, open_sp, close_sp)| { - if let Some(close_padding) = sm.span_to_margin(*close_sp) { - if let Some(open_padding) = sm.span_to_margin(*open_sp) { - return delim == d && close_padding != open_padding; - } - } - false - }) - // these are in reverse order as they get inserted on close, but - { - // we want the last open/first close - err.span_label(*open_sp, "this delimiter might not be properly closed..."); - err.span_label( - *close_sp, - "...as it matches this but it has different indentation", - ); + if let Some((delim, _)) = self.open_braces.last() { + if let Some((_, open_sp, close_sp)) = + self.matching_delim_spans.iter().find(|(d, open_sp, close_sp)| { + let sm = self.string_reader.sess.source_map(); + if let Some(close_padding) = sm.span_to_margin(*close_sp) { + if let Some(open_padding) = sm.span_to_margin(*open_sp) { + return delim == d && close_padding != open_padding; + } } - } - Err(err) + false + }) + // these are in reverse order as they get inserted on close, but + { + // we want the last open/first close + err.span_label(*open_sp, "this delimiter might not be properly closed..."); + err.span_label(*close_sp, "...as it matches this but it has different indentation"); } - token::OpenDelim(delim) => { - // The span for beginning of the delimited section - let pre_span = self.token.span; - - // Parse the open delimiter. - self.open_braces.push((delim, self.token.span)); - self.bump(); + } + err + } - // Parse the token trees within the delimiters. - // We stop at any delimiter so we can try to recover if the user - // uses an incorrect delimiter. - let tts = self.parse_token_trees_until_close_delim(); + fn parse_token_tree_open_delim(&mut self, open_delim: Delimiter) -> TokenTree { + // The span for beginning of the delimited section + let pre_span = self.token.span; - // Expand to cover the entire delimited token tree - let delim_span = DelimSpan::from_pair(pre_span, self.token.span); + self.open_braces.push((open_delim, self.token.span)); - match self.token.kind { - // Correct delimiter. - token::CloseDelim(d) if d == delim => { - let (open_brace, open_brace_span) = self.open_braces.pop().unwrap(); - let close_brace_span = self.token.span; + // Parse the token trees within the delimiters. + // We stop at any delimiter so we can try to recover if the user + // uses an incorrect delimiter. + let tts = self.parse_token_trees(/* is_delimited */ true).unwrap(); - if tts.is_empty() { - let empty_block_span = open_brace_span.to(close_brace_span); - if !sm.is_multiline(empty_block_span) { - // Only track if the block is in the form of `{}`, otherwise it is - // likely that it was written on purpose. - self.last_delim_empty_block_spans.insert(delim, empty_block_span); - } - } + // Expand to cover the entire delimited token tree + let delim_span = DelimSpan::from_pair(pre_span, self.token.span); - //only add braces - if let (Delimiter::Brace, Delimiter::Brace) = (open_brace, delim) { - self.matching_block_spans.push((open_brace_span, close_brace_span)); - } + match self.token.kind { + // Correct delimiter. + token::CloseDelim(close_delim) if close_delim == open_delim => { + let (open_brace, open_brace_span) = self.open_braces.pop().unwrap(); + let close_brace_span = self.token.span; - if self.open_braces.is_empty() { - // Clear up these spans to avoid suggesting them as we've found - // properly matched delimiters so far for an entire block. - self.matching_delim_spans.clear(); - } else { - self.matching_delim_spans.push(( - open_brace, - open_brace_span, - close_brace_span, - )); - } - // Parse the closing delimiter. - self.bump(); + if tts.is_empty() { + let empty_block_span = open_brace_span.to(close_brace_span); + let sm = self.string_reader.sess.source_map(); + if !sm.is_multiline(empty_block_span) { + // Only track if the block is in the form of `{}`, otherwise it is + // likely that it was written on purpose. + self.last_delim_empty_block_spans.insert(open_delim, empty_block_span); } - // Incorrect delimiter. - token::CloseDelim(other) => { - let mut unclosed_delimiter = None; - let mut candidate = None; - - if self.last_unclosed_found_span != Some(self.token.span) { - // do not complain about the same unclosed delimiter multiple times - self.last_unclosed_found_span = Some(self.token.span); - // This is a conservative error: only report the last unclosed - // delimiter. The previous unclosed delimiters could actually be - // closed! The parser just hasn't gotten to them yet. - if let Some(&(_, sp)) = self.open_braces.last() { - unclosed_delimiter = Some(sp); - }; - if let Some(current_padding) = sm.span_to_margin(self.token.span) { - for (brace, brace_span) in &self.open_braces { - if let Some(padding) = sm.span_to_margin(*brace_span) { - // high likelihood of these two corresponding - if current_padding == padding && brace == &other { - candidate = Some(*brace_span); - } - } - } - } - let (tok, _) = self.open_braces.pop().unwrap(); - self.unmatched_braces.push(UnmatchedBrace { - expected_delim: tok, - found_delim: Some(other), - found_span: self.token.span, - unclosed_span: unclosed_delimiter, - candidate_span: candidate, - }); - } else { - self.open_braces.pop(); - } + } - // If the incorrect delimiter matches an earlier opening - // delimiter, then don't consume it (it can be used to - // close the earlier one). Otherwise, consume it. - // E.g., we try to recover from: - // fn foo() { - // bar(baz( - // } // Incorrect delimiter but matches the earlier `{` - if !self.open_braces.iter().any(|&(b, _)| b == other) { - self.bump(); - } - } - token::Eof => { - // Silently recover, the EOF token will be seen again - // and an error emitted then. Thus we don't pop from - // self.open_braces here. - } - _ => {} + //only add braces + if let (Delimiter::Brace, Delimiter::Brace) = (open_brace, open_delim) { + self.matching_block_spans.push((open_brace_span, close_brace_span)); } - Ok(TokenTree::Delimited(delim_span, delim, tts)) + if self.open_braces.is_empty() { + // Clear up these spans to avoid suggesting them as we've found + // properly matched delimiters so far for an entire block. + self.matching_delim_spans.clear(); + } else { + self.matching_delim_spans.push((open_brace, open_brace_span, close_brace_span)); + } + // Move past the closing delimiter. + self.token = self.string_reader.next_token().0; } - token::CloseDelim(delim) => { - // An unexpected closing delimiter (i.e., there is no - // matching opening delimiter). - let token_str = token_to_string(&self.token); - let msg = format!("unexpected closing delimiter: `{}`", token_str); - let mut err = - self.string_reader.sess.span_diagnostic.struct_span_err(self.token.span, &msg); + // Incorrect delimiter. + token::CloseDelim(close_delim) => { + let mut unclosed_delimiter = None; + let mut candidate = None; - // Braces are added at the end, so the last element is the biggest block - if let Some(parent) = self.matching_block_spans.last() { - if let Some(span) = self.last_delim_empty_block_spans.remove(&delim) { - // Check if the (empty block) is in the last properly closed block - if (parent.0.to(parent.1)).contains(span) { - err.span_label( - span, - "block is empty, you might have not meant to close it", - ); - } else { - err.span_label(parent.0, "this opening brace..."); - - err.span_label(parent.1, "...matches this closing brace"); + if self.last_unclosed_found_span != Some(self.token.span) { + // do not complain about the same unclosed delimiter multiple times + self.last_unclosed_found_span = Some(self.token.span); + // This is a conservative error: only report the last unclosed + // delimiter. The previous unclosed delimiters could actually be + // closed! The parser just hasn't gotten to them yet. + if let Some(&(_, sp)) = self.open_braces.last() { + unclosed_delimiter = Some(sp); + }; + let sm = self.string_reader.sess.source_map(); + if let Some(current_padding) = sm.span_to_margin(self.token.span) { + for (brace, brace_span) in &self.open_braces { + if let Some(padding) = sm.span_to_margin(*brace_span) { + // high likelihood of these two corresponding + if current_padding == padding && brace == &close_delim { + candidate = Some(*brace_span); + } + } } - } else { - err.span_label(parent.0, "this opening brace..."); - - err.span_label(parent.1, "...matches this closing brace"); } + let (tok, _) = self.open_braces.pop().unwrap(); + self.unmatched_braces.push(UnmatchedBrace { + expected_delim: tok, + found_delim: Some(close_delim), + found_span: self.token.span, + unclosed_span: unclosed_delimiter, + candidate_span: candidate, + }); + } else { + self.open_braces.pop(); } - err.span_label(self.token.span, "unexpected closing delimiter"); - Err(err) - } - _ => { - let tok = self.token.take(); - let mut spacing = self.bump(); - if !self.token.is_op() { - spacing = Spacing::Alone; + // If the incorrect delimiter matches an earlier opening + // delimiter, then don't consume it (it can be used to + // close the earlier one). Otherwise, consume it. + // E.g., we try to recover from: + // fn foo() { + // bar(baz( + // } // Incorrect delimiter but matches the earlier `{` + if !self.open_braces.iter().any(|&(b, _)| b == close_delim) { + self.token = self.string_reader.next_token().0; } - Ok(TokenTree::Token(tok, spacing)) } + token::Eof => { + // Silently recover, the EOF token will be seen again + // and an error emitted then. Thus we don't pop from + // self.open_braces here. + } + _ => unreachable!(), } - } - fn bump(&mut self) -> Spacing { - let (spacing, token) = self.string_reader.next_token(); - self.token = token; - spacing + TokenTree::Delimited(delim_span, open_delim, tts) } -} -#[derive(Default)] -struct TokenStreamBuilder { - buf: Vec<TokenTree>, -} + fn close_delim_err(&mut self, delim: Delimiter) -> PErr<'a> { + // An unexpected closing delimiter (i.e., there is no + // matching opening delimiter). + let token_str = token_to_string(&self.token); + let msg = format!("unexpected closing delimiter: `{}`", token_str); + let mut err = + self.string_reader.sess.span_diagnostic.struct_span_err(self.token.span, &msg); -impl TokenStreamBuilder { - #[inline(always)] - fn push(&mut self, tree: TokenTree) { - if let Some(TokenTree::Token(prev_token, Spacing::Joint)) = self.buf.last() - && let TokenTree::Token(token, joint) = &tree - && let Some(glued) = prev_token.glue(token) - { - self.buf.pop(); - self.buf.push(TokenTree::Token(glued, *joint)); - } else { - self.buf.push(tree) + // Braces are added at the end, so the last element is the biggest block + if let Some(parent) = self.matching_block_spans.last() { + if let Some(span) = self.last_delim_empty_block_spans.remove(&delim) { + // Check if the (empty block) is in the last properly closed block + if (parent.0.to(parent.1)).contains(span) { + err.span_label(span, "block is empty, you might have not meant to close it"); + } else { + err.span_label(parent.0, "this opening brace..."); + err.span_label(parent.1, "...matches this closing brace"); + } + } else { + err.span_label(parent.0, "this opening brace..."); + err.span_label(parent.1, "...matches this closing brace"); + } } - } - fn into_token_stream(self) -> TokenStream { - TokenStream::new(self.buf) + err.span_label(self.token.span, "unexpected closing delimiter"); + err } } diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index 77c4fadab..f075de714 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -113,11 +113,26 @@ pub(crate) fn emit_unescape_error( } else { ("", "if you meant to write a `str` literal, use double quotes") }; - + let mut escaped = String::with_capacity(lit.len()); + let mut chrs = lit.chars().peekable(); + while let Some(first) = chrs.next() { + match (first, chrs.peek()) { + ('\\', Some('"')) => { + escaped.push('\\'); + escaped.push('"'); + chrs.next(); + } + ('"', _) => { + escaped.push('\\'); + escaped.push('"') + } + (c, _) => escaped.push(c), + }; + } handler.span_suggestion( span_with_quotes, msg, - format!("{}\"{}\"", prefix, lit), + format!("{prefix}\"{escaped}\""), Applicability::MachineApplicable, ); } |